|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/cgi-bin/ |
Upload File : |
#!/usr/bin/perl
require "./cgi-lib.pl";
require "./codelib.pl";
$debug = 0;
&ReadParse(*values);
#For debugging
print "Content-type: text/plain\n\n" if $debug;
#$corr = "+ll2Jfw-)+ICYAIg-"; #"symbol+AF0gJg- ";
$latintest = "DA>a1/2a?o
AaoA?!IOoIOU1/2aO>AE?,oAa*c?c?OOE1/4?!?Aa?ADA1/4?IOONEO?1/2??*C3?,?DE!?
??OAIaEOEC?oAe3oO>??AaOa?AA???IOAC??AaDAIeEA3E!?IoEAEcOa?!
?oAo?oIiON>OA?AE??IOON?NAa?A?OO*,oEuAE??IOAiEIOU,oEuAa?ADA?OO*!?
AaDAEIE??AEAECE?Ce??IOACAi1/2aAa??O>,oEEOUIa2>EYO???Aa2>OaODE1/4Ie?u,$!?
OUAIOU1/41/4D???EyIOOU3/4<En*C3?oA??*ADAoAAE?!
OU1/2a1/2nAe?1/4ND>1,D3/4o2>?i??Ec1uOcOi?1/4?AO2DD?A>???E?2>AEAU?1/4EIAO!?
??AaDAAeODDAEO>n??";
#$values{"corrupted"} = $latintest;
$emailencoding = "";
$fixedline = detectProblem($values{"corrupted"});
if ($emailencoding eq "") {
@guesses = codeguess($fixedline);
} else {
push @guesses, $emailencoding;
}
if ($guesses[0] eq "gb") {
print "Content-type: text/plain; charset=gb2312\n\n";
} elsif ($guesses[0] eq "big5") {
print "Content-type: text/plain; charset=big5\n\n";
} elsif ($guesses[0] eq "utf8") {
print "Content-type: text/plain; charset=utf-8\n\n";
} else {
print "Content-type: text/plain\n\n";
}
print $fixedline, "\n";
sub detectProblem {
my($line) = @_;
if ($line =~ /Content-Transfer-Encoding: base64/) {
print "base64\n" if $debug;
$fixline = fixbase64($line);
} elsif ($line =~ m/=\?/) {
print "Detected base64 e-mail subject\n" if $debug;
$line =~ m/\?B\?([^\?=]+)/;
$fixline = fixbase64($1);
} elsif ($line =~ m/\$(\@|B)/) {
print "Detected jis\n" if $debug;
$fixline = jis6226to8bit($line);
} elsif ($line =~ m/\~\{/) {
print "hz2gb\n" if $debug;
$fixline = hz2gb($line);
$emailencoding = "gb";
} elsif ($line =~ m/\=[0-9A-F]/) {
print "quotedprintable\n" if $debug;
$fixline = quotedprintableto8bit($line);
} elsif ($line =~ m/\+[A-Za-z0-9\+\/]+(\-|\s)/) {
print "utf7\n" if $debug;
$fixline = utf7fix($line);
} elsif (badLineBreak($line) == 1) {
print "badline break\n" if $debug;
$fixline = fixLineBreak($line);
} elsif ($line =~ m/\&?\#\d+;/) {
print "ncr\n" if $debug;
$fixline = $line;
$fixline =~ s/(\&?\#\d+;)/touni($1)/ge;
$emailencoding = "utf8";
} elsif ($line =~ m/\&?\#x[0-9a-fA-F]{2,4};/) {
print "ncr2\n" if $debug;
$fixline = $line;
$fixline =~ s/(\&?\#x[0-9a-fA-F]{2,4};)/touni($1)/ge;
$emailencoding = "utf8";
} elsif ($line =~ m/(1\/4|1\/2|3\/4)/) {
print "latin1\n" if $debug;
$fixline = fixLatin1AsASCII($line);
} elsif ($line =~ m/\\u[0-9a-fA-F]{4}/i) {
print "java unicode\n" if $debug;
$fixline = $line;
$fixline =~ s/(\\u[0-9a-fA-F]{4})/touni($1)/ige;
$emailencoding = "utf8";
} elsif ($line =~ m/\%[0-9a-fA-F]{2}/) {
print "URL encoded" if $debug;
$fixline = $line;
$fixline =~ s/\%([A-Fa-f0-9]{2})/chr(hex($1))/eg;
} else {
print "Nothing detected" if $debug;
$fixline = $line;
}
return $fixline;
}
sub jis6226to8bit {
my($inline) = @_;
my($outline) = "";
my($linelen) = length($inline);
my($i);
for ($i = 0; $i < $linelen; $i++) {
if (ord(substr($inline, $i, 1)) == 0x1b ||
ord(substr($inline, $i, 1)) == 0x01) {
#print "After esc " . substr($inline, $i+1, 2) . "\n";
if (substr($inline, $i+1, 2) eq "\$\@" or
substr($inline, $i+1, 2) eq "\$B") {
$i += 3;
while (ord(substr($inline, $i, 1)) != 0x1b &&
ord(substr($inline, $i, 1)) != 0x01 &&
$i < $linelen)
{
$outline .= chr(ord(substr($inline, $i, 1)) + 0x80);
$i += 1;
}
$i+=2;
}
} else {
$outline .= substr($inline, $i, 1);
}
}
return $outline;
}
sub quotedprintableto8bit {
my($inline) = @_;
my($outline) = $inline;
$outline =~ s/=[\r\n]+//g;
$outline =~ s/=([0-9A-Fa-f][0-9A-Fa-f])/chr hex $1/ge;
return $outline;
}
sub utf7val {
my($char) = @_;
my($binstr);
$val = ord($char);
if ($char =~ m/[A-Z]/) {
$val -= 0x41;
} elsif ($char =~ m/[a-z]/) {
$val = ($val - 0x61) + 26;
} elsif ($char =~ m/\d/) {
$val = ($val - 0x30) + 52;
} elsif ($char =~ m/\+/) {
$val = 62;
} elsif ($char =~ m/\//) {
$val = 63;
}
return $val;
# print " ", unpack("b6", $val), "\n";
}
sub utf7to8 {
my($utf7str) = @_;
my($i, $utf16str, $byte, $cursix, $binstr, $utf8str);
$utf7str =~ s/\+([a-zA-Z0-9\+\/]+)(\-|\s)?/$1/;
$utf7len = length($utf7str);
$cursix = 0;
for ($i = 0; $i < $utf7len; $i++) {
$byte = utf7val(substr($utf7str, $i, 1));
$packed = pack("i", $byte);
$binstr .= substr(unpack("B8", $packed), 2, 6);
}
if ((length($binstr) % 8) != 0) {
$binstr = substr($binstr, 0, length($binstr) - (length($binstr) % 8));
}
$packed = pack("B" . length($binstr), $binstr);
$utfhex = unpack("H*", $packed);
for ($i = 0; $i < length($utfhex); $i+=4) {
$utf8str .= hex2utf8("0x" . substr($utfhex, $i, 4));
}
return $utf8str;
}
sub utf7fix {
my($inline) = @_;
my($outline) = $inline;
$outline =~ s/(\+[a-zA-Z0-9\+\/]+(\-|\s)?)/utf7to8 $1/ge;
$outline =~ s/(\+\-)/\+/g;
return $outline;
}
sub badLineBreak {
my($text) = @_;
my($linelen, $i, $char, $binchar, $firstbyte);
$firstbyte = 0;
$linelen = length($text);
for ($i = 0; $i < $linelen; $i++) {
$char = substr($text, $i, 1);
$char2 = substr($text, $i+1, 1);
$binchar = vec($text, $i, 8);
if ($binchar > 127) {
if ($firstbyte == 1) {
$firstbyte = 0;
} else {
$firstbyte = 1;
}
} else {
$firstbyte = 0;
}
if ($firstbyte == 1 &&
$char2 =~ m/\n|\r/) {
$firstbyte = 0;
return 1;
}
}
return 0;
}
sub fixLineBreak {
my($text) = @_;
my($linelen, $i, $char, $binchar, $firstbyte, $outline);
$firstbyte = 0;
$linelen = length($text);
for ($i = 0; $i < $linelen; $i++) {
$char = substr($text, $i, 1);
$char2 = substr($text, $i+1, 1);
$binchar = vec($text, $i, 8);
if ($binchar > 127) {
if ($firstbyte == 1) {
$firstbyte = 0;
} else {
$firstbyte = 1;
}
} else {
$firstbyte = 0;
}
if ($firstbyte == 1 &&
$char2 =~ m/\n|\r/) {
$firstbyte = 0;
$outline .= "\n" . $char;
$i+=2;
} else {
$outline .= $char;
}
}
return $outline;
}
sub touni {
my($escape) = shift;
my($hexval) = 0;
if ($escape =~ m/\\u/i) {
$escape =~ s/\\u//ig;
$hexval = sprintf("%X", oct("0x" . $escape));
} elsif ($escape =~ m/\&?\#x[0-9a-fA-F]{2,4};/) {
$escape =~ s/\&?\#x([0-9a-fA-F]{2,4});/$1/;
$hexval = sprintf("%X", oct("0x" . $escape));
} else {
$escape =~ s/\&?\#(\d+);/$1/;
$hexval = sprintf("%X", $escape);
}
if (length($hexval) == 3) {
$hexval = "0" . $hexval;
} elsif (length($hexval) == 2) {
$hexval = "00" . $hexval;
}
$binchar = oct("0x" . $hexval);
if ($binchar <= 127) {
$retval = pack("C", $binchar);
} elsif ($binchar <= 2047) {
$bin1 = ($binchar >> 6) | 0xC0;
$bin2 = ($binchar & 0x3F) | 0x80;
$retval = pack("C2", $bin1, $bin2);
} else {
$bin1 = ($binchar >> 12) | 0xE0;
$bin2 = (($binchar & 0x0FFF) >> 6) | 0x80;
$bin3 = ($binchar & 0x003F) | 0x80;
$retval = pack("C*", $bin1, $bin2, $bin3);
# #print "in 3 char version with $hexchar and $retval bin1 $bin1 bin2 $bin2 bin3 $bin3\n";
}
$retval;
}
sub fixbase64 {
my($basetext) = @_;
my($i, $retval, $byte1, $byte2, $byte3);
my(%alphabits);
$alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
for ($i = 0; $ i < length($alphabet); $i++) {
$alphabits{substr($alphabet, $i, 1)} = $i;
}
$basetext =~ s/^.*Content-Transfer-Encoding: base64//s;
$basetext =~ s/( |\n|\r|\t)//g;
for ($i = 0; $i < length($basetext); $i+=4) {
$inputgroup = substr($basetext, $i, 4);
$byte1 = $alphabits{substr($inputgroup, 0, 1)} * 0x04;
$byte1 |= ((0x30 & $alphabits{substr($inputgroup, 1, 1)}) >> 4);
$byte2 = (0x0F & $alphabits{substr($inputgroup, 1, 1)}) * 0x10;
$byte2 |= ((0x3c & $alphabits{substr($inputgroup, 2, 1)}) >> 2);
$byte3 = (0x03 & $alphabits{substr($inputgroup, 2, 1)}) * 0x40;
$byte3 |= $alphabits{substr($inputgroup, 3, 1)};
$outputgroup = pack("CCC", $byte1, $byte2, $byte3);
$retval .= $outputgroup;
}
$retval;
}
sub fixLatin1AsASCII {
my($atext) = @_;
my($ctext) = $atext;
&loadGBFreq;
@atol = ("!", "�",
"c", "�",
"?", "�", # or L
"*", "�",
"Y", "�",
"?", "�", # or |
"\$", "�",
"?", "�", # or "
"c", "�",
"a", "�",
"<", "�",
"?", "�", # or ~
"?", "�", # or -
"R", "�",
"?", "�", # or -
"?", "�", # or *
"?", "�", # or +
"2", "�",
"3", "�",
"?", "�",
"?", "�", # or m or u
"?", "�", # or P
"*", "�",
",", "�", # or ?
"1", "�",
"o", "�",
">", "�",
"1/4", "�",
"1/2", "�",
"3/4", "�",
"?", "�",
"A", "�",
"A", "�",
"A", "�",
"A", "�",
"A", "�",
"A", "�",
"A", "�",
"C", "�",
"E", "�",
"E", "�",
"E", "�",
"E", "�",
"I", "�",
"I", "�",
"I", "�",
"I", "�",
"D", "�",
"N", "�",
"O", "�",
"O", "�",
"O", "�",
"O", "�",
"O", "�",
"?", "�", # or x
"O", "�",
"U", "�",
"U", "�",
"U", "�",
"U", "�",
"Y", "�",
"?", "�", # or t
"?", "�", # or b or s
"a", "�",
"a", "�",
"a", "�",
"a", "�",
"a", "�",
"a", "�",
"a", "�", # or ae
"c", "�",
"e", "�",
"e", "�",
"e", "�",
"e", "�",
"i", "�",
"i", "�",
"i", "�",
"i", "�",
"/", "�", # or d
"n", "�",
"o", "�",
"o", "�",
"o", "�",
"o", "�",
"?", "�", # or /
"o", "�",
"u", "�",
"u", "�",
"u", "�",
"u", "�",
"y", "�",
"?", "�", # or t
"y", "�",
"�", "�",
"�", "�",
"�", "�",
"�", "�",
"�", "�"
);
my($i);
for ($i = 0; $i < @atol; $i+=2) {
if (!defined($ahash{$atol[$i]})) {
$ahash{$atol[$i]} = $atol[$i+1];
} else {
$ahash{$atol[$i]} .= " " . $atol[$i+1];
}
}
$ctext =~ s/1\/4/�/g;
$ctext =~ s/1\/2/�/g;
$ctext =~ s/3\/4/�/g;
# $ctext =~ s/AE/�/g;
# $ctext =~ s/ae/�/g;
for ($i = 0; $i < length($ctext); $i++) {
$curchar = substr($ctext, $i, 1);
$curchar2 = substr($ctext, $i+1, 1);
if ($curchar =~ m/\s/ or $curchar2 =~ m/\s/) {
$outtext .= $curchar;
next;
}
@bytes1 = split(/\s/, $ahash{$curchar});
@bytes2 = split(/\s/, $ahash{$curchar2});
$max = 0;
$guesschar = "??";
foreach $byte1 (@bytes1) {
foreach $byte2 (@bytes2) {
$testchar = $byte1 . $byte2;
print $testchar, " ", $gbfreq{$testchar}, "\n" if $debug;
if ($gbfreq{$testchar} > $max) {
$max = $gbfreq{$testchar};
$guesschar = $testchar;
}
}
}
print "\n" if $debug;
if ($guesschar ne "??") {
$outtext .= $guesschar;
$i++;
} else {
$outtext .= $curchar;
}
}
return $outtext;
}
sub loadGBFreq {
open(FREQ, "gbpy.freq") or print "Content-type: text/plain\n\nCan't open gbpy.freq";
my($curfreq) = 0;
my($gbchar, $py);
while (<FREQ>) {
next if m/^\#/;
($gbchar, $py) = split;
$gbfreq{$gbchar} = 6472 - $curfreq;
$curfreq++;
}
close(FREQ);
}