#!/usr/bin/perl # decode.pl -- Version 1.00.10 98.04.28 15:45 # Filter to convert QP or Base64 text/plain and text/html content # to 8-bit clean, for further filtering by transliteration programs # (eg. elot2ascii.pl) # (c) 1997, 1998, Hellenic Resources Institute, Inc. foreach $Arg (@ARGV) { if($Arg =~ /^-v$/) { $verbose = 1; } } @temp = ; $i = 0; while($i <= $#temp) { # Prepare headers for decoding if(@temp[$i+1] =~ /^\n?$/) { last; } # Stop if headers are over. if(@temp[$i+1] =~ /^\s{1,}/) { # If following header line begins with a blank, we append @temp[$i] =~ s/\s*$//; # Clean trailing spaces before continuation line @temp[$i+1] =~ s/^\s*//; # Clean leading spaces on continuation line @temp[$i] = @temp[$i] . " " . @temp[$i+1]; # Merge continuation line @temp[$i] =~ s/\?=(\s*)=\?/?==?/gi; # Remove redundant spaces between encoded content @temp[$i] =~ s/([^\s\:\,])\s=\?/$1=\?/gi; # " " " " encoded content and plain text @temp[$i] =~ s/\?=[ \t]*([^<])/\?=$1/gi; # " " " " encoded content and plain text splice(@temp, $i+1, 1); } else { @temp[$i] =~ s/([^\?])=$/$1\?=/; # TERRIBLE kludge, for erroneously closed encoded header strings # (closed by '=$' instead of '?=$') -- dep 98-04-28 15:45 $i++; } } $i = 0; while($i <= $#temp) { # Decode headers $_ = @temp[$i]; if(/^\n?$/) { last; } # Stop if headers are over. while(/=\?[^\?]*\?([BQ])\?([^\?]*)\?=/) { $text = &Decode($2, $1, 1); s/=\?([^\?]*)\?([BQ])\?([^\?]*)\?=/$text/i; @temp[$i] = $_; } $i++; } $i = 0; while($i <= $#temp) { if((@temp[$i] =~ /^Content-Type\:\s*\w*\/\w*\;\s*$/i)&& (@temp[$i+1] =~ /^\s{1,}/i)) { @temp[$i] =~ s/\s*$//; @temp[$i+1] =~ s/^\s*//; @temp[$i] = @temp[$i] . " " . @temp[$i+1]; splice(@temp, $i+1, 1); } else { $i++; } } $i = 0; while($i <= $#temp) { # Establish message format: Is it multipart? $_ = @temp[$i]; if(/^Content-Type\:\s*multipart\/(mixed|alternative)\;\s*boundary=\"([^\"]*)\"/i) { push(@boundary, $2); if($verbose) { print STDERR "$i: Content-Type: multipart/$1; boundary=\"@boundary[$#boundary]\"\n"; } $multipart = 1; } if(/^Content-Type\:\s*text\/(plain|enriched|html)\;\s*charset=\"?([^\"]*)\"?/i) { $type = $1; $charset = $2; $multipart = 0; $j = $i + 1; while(($j < $#temp)&&(@temp[$j] !~ /^$/)) { if(@temp[$j] =~ /Content-Transfer-Encoding:/i) { $encoding = @temp[$j]; $encoding =~ s/^Content-Transfer-Encoding:\s*(.*)\s*$/$1/i; @temp[$j] =~ s/^(Content-Transfer-Encoding:)\s*.*$/$1 8bit/i; if($verbose) { print STDERR "$i: Content-Type: $type, encoded using $encoding\n"; } last; } $j++; } } if(/^\n?$/) { last; } # Stop if headers are over. $i++; } if($multipart == 1) { # Process a multipart message. $i = 0; while(@temp[$i++] !~ /^\n?$/) { nop; } # Skip headers. while($i <= $#temp) { if(@temp[$i] =~ /^\-\-@boundary[0]\-\-$/) { last; } # Message is over. if((@temp[$i] =~ /^\-\-@boundary[$#boundary]\-\-$/)&&($#boundary > 0)) { if($verbose) { print STDERR "$i: ** Exiting Sub-Message\n"; } pop(@boundary); } if(@temp[$i] =~ /^\-\-@boundary[$#boundary]$/) { # Beginning of part in message. @temp[$i-1] =~ s/\n?$/\n/; if($verbose) { print STDERR "$i: Found Boundary @boundary[$#boundary]\n"; } $type = @temp[++$i]; # Find the type. $type =~ s/^Content-Type:\s*([^;]*);.*\n/$1/; # If optional parameter is present. $type =~ s/^Content-Type:\s*([^;]*)\n/$1/; # If optional parameter is not present. if($type =~ /text\/(plain|enriched|html)/i) { # Text type, will convert to 8bit while(@temp[++$i] !~ /^Content-Transfer-Encoding:/i) { if(@temp[$i] =~ /^$/i) { splice(@temp, $i, 0, "Content-Transfer-Encoding: 7bit"); last; } } $encoding = @temp[$i]; $encoding =~ s/^Content-Transfer-Encoding:\s*(.*)\s*$/$1/i; @temp[$i] =~ s/^(Content-Transfer-Encoding:)\s*.*$/$1 8bit/i; if($verbose) { printf STDERR "%u: Content-Type: %s, encoded using %s\n", $i-1, $type, $encoding; } while(@temp[$i++] !~ /^\n?$/) { nop; } if($encoding =~ /^quoted-printable$/i) { # Remove soft breaks for QP encoded section $j = $i; while(@temp[$j] !~ /^\-\-@boundary[$#boundary](\-\-)?$/) { if(@temp[$j] =~ /=\n?$/) { @temp[$j] =~ s/=\n?$//; @temp[$j] = @temp[$j] . @temp[$j+1]; splice(@temp, $j+1, 1); } else { $j++; } } } while((@temp[$i] !~ /^\-\-@boundary[$#boundary](\-\-)?$/)&&($i < $#temp)) { # Decode until the end of the section @temp[$i] = &Decode(@temp[$i], $encoding, 0); $i++; } } else { if(@temp[$i] =~ /^Content-Type\:\s*multipart\/alternative\;\s*boundary=\"([^\"]*)\"/i) { if($verbose) { print STDERR "$i: ** Entering Sub-Message\n"; print STDERR "$i: $temp[$i]"; } push(@boundary, $1); } else { $encoding = @temp[$i+1]; $encoding =~ s/^Content-Transfer-Encoding:\s*(.*)\s*$/$1/i; if($verbose) { print STDERR "$i: Content-Type: $type, encoded using $encoding\n"; } while(@temp[$i++] !~ /^\-\-@boundary[$#boundary](\-\-)?$/) { nop; } # I'm in a hurry, skip to the end if($verbose) { print STDERR "$i: Found Boundary @boundary[$#boundary]\n"; } } } } else { $i++; } } } else { # Not multipart, single content text $i = 0; while(@temp[$i++] !~ /^\n?$/) { nop; } # Skip headers if($encoding =~ /^quoted-printable$/i) { # If QP, remove soft breaks $j = $i; while($j <= $#temp) { if(@temp[$j] =~ /=\n?$/) { @temp[$j] =~ s/=\n?$//; @temp[$j] = @temp[$j] . @temp[$j+1]; splice(@temp, $j+1, 1); } else { $j++; } } } while($i <= $#temp) { # Decode the message text @temp[$i] = &Decode(@temp[$i], $encoding, 0); $i++; } } while (@temp[$#temp] =~ /^$/) { # Remove trailing blank lines splice(@temp, $#temp, 1); } print @temp; # OK, output it and go home exit; sub Decode { # Oh, yeah, here's how we decode local($string, $encoding, $headers) = @_; if(($encoding =~ /^q$/i)||($encoding =~ /quoted-printable/i)) { $string =~ s/=([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; if($headers) { $string =~ s/_/ /g; } } else { if(($encoding =~ /^b$/i)||($encoding =~ /base64/i)) { $string =~ tr#A-Za-z0-9+/##cd; # remove non-base64 chars $string =~ tr#A-Za-z0-9+/# -_#; # convert to uuencoded format $len = pack("c", 32 + 0.75*length($string)); # compute length byte $string = unpack("u", $len . $string); # uudecode and print } } $string; }