KGRKJGETMRETU895U-589TY5MIGM5JGB5SDFESFREWTGR54TY
Server : Apache/2.4.62
System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64
User : www ( 80)
PHP Version : 8.3.8
Disable Function : NONE
Directory :  /domains/mandarintools/cgi-bin/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Current File : /domains/mandarintools/cgi-bin/charlook.pl
#!/usr/bin/perl -- # -*- perl -*-

#$UNICHIN = "./UNICHIN.TXT";
$UNICHIN = "./UNICHIN_IDS.TXT";

require "./cgi-lib.pl";
require "./codelib.pl";

&ReadParse(*values);


# If anything goes wrong, print this
sub errormsg {
    my ($errortxt) = @_;
    print "Content-type: text/html\n\n";
    print "<HTML>\n<HEAD>\n";
    print "<TITLE>Character Look-up Error</TITLE>\n";
    print "</HEAD>\n<BODY>\n";
    print $errortxt, "<P>";
    print " Please try again. \n</BODY>\n</HTML>";
    exit;
}

&init_rads;

# The different fields and their positions in the character data file
$uchar = 0; $radstr = 1; $strokes = 2;
$pinyin = 3; $english = 4; $canton = 5; 
$var = 6; $freq = 7; $big5 = 8; $gb = 9; $ids = 10;

$searchmode = $values{'searchmode'};
$chartype = $values{'chartype'};
$printtype = $values{'printtype'};
$englishquery = $values{'english'};
$pinyinquery = $values{'pinyin'};
$cantonesequery = $values{'cantonese'};
$strokequery = $values{'strokes'};
$ordering = $values{'ordering'};
$lowerb = $values{'lowerb'};
$upperb = $values{'upperb'};
$radicalquery = $values{'radical'};
$whatchar = $values{'whatchar'};
$enctype = $values{'enctype'};
$display = $values{'display'};
$tonemarks = $values{'tonemarks'};

if (!defined($values{'searchmode'})) {
    print "Content-type: text/html\n\n";
    print "<HTML><HEAD>";
    print '<META HTTP-EQUIV="REFRESH" content="0;url=http://www.mandarintools.com/chardict.html">';
    print "</HEAD><BODY>Nothing entered.  Going back to start page";
    exit;

}

# set cutoff frequency
if ($searchmode eq "limited") {
    $cutoff = 3500;		# only frequencies above (below?) 3500 pass
} elsif ($searchmode eq "standard") {
    $cutoff = 6500;		# only frequencies above 6500 pass
} elsif ($searchmode eq "comprehensive") {
    $cutoff = 20001		# everything passes
}


# set return type
if ($chartype eq "trad") {
    if ($printtype eq "gb") {
	$printtype = "big5";  # force it to Big5
    }
} elsif ($chartype eq "simp") {
    if ($printtype eq "big5") {
	$printtype = "gb";    # force it to GB
    }
} elsif ($chartype eq "all") {
  
}

# Send the content and character set type to the browser
print "Content-type: text/html";
if ($printtype eq "gb") { 
    print "; charset=gb2312"; 
} elsif ($printtype eq "big5") {
    print "; charset=big5";
} elsif ($printtype eq "utf8") {
    print "; charset=utf-8";
} 
print "\n\n";

print "<HTML>\n";
print "<HEAD>\n";
print "<TITLE>Character Search Results</TITLE>\n";
print '<STYLE>A:link	{	
	text-decoration:	none;
	color:	#33F;
	background:	#FFFFF5;
	}	
		
A:visited	{	
	text-decoration:	none;
	color:	#93C;
	background:	#FFFFF5;
	}	
		
A:active	{	
	text-decoration:	none;
	font-weight:	bold;
	color:	black;
	background:	#CCF;
	}	
		
A:hover	{	
	text-decoration:	none;
	color:	#FFFFF5;
	background:	#33F;
	}	
</STYLE>';
print "</HEAD>\n";
print "<BODY BGCOLOR=#FFFFFF>\n";

unless ($ENV{'HTTP_REFERER'} =~ /mandarintools/i or
	$ENV{'HTTP_REFERER'} eq "" or
	$ENV{'HTTP_REFERER'} =~ /mail/i) {

print <<GOOGLE;

<center>
<script type="text/javascript"><!--
google_ad_client = "pub-1796608980793545";
google_ad_width = 468;
google_ad_height = 60;
google_ad_format = "468x60_as";
google_ad_channel ="9444390123";
google_ad_type = "text";
google_page_url = document.location;
google_color_border = "FDFFCA";
google_color_bg = "FDFFCA";
google_color_link = "0000CC";
google_color_url = "008000";
google_color_text = "000000";
//--></script>
<script type="text/javascript"
  src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
</center>
<br>
GOOGLE

}


if (defined($values{'searchenglish'}) and length($englishquery) == 0) {
    if (length($pinyinquery) > 0) {
	undef($values{'searchenglish'});
	$values{'searchpinyin'} = 1;
    } elsif (length($cantonesequery) > 0) {
	undef($values{'searchenglish'});
	$values{'searchcantonese'} = 1;
    } elsif (length($whatchar) > 0) {
	undef($values{'searchenglish'});
	$values{'searchchar'} = 1;
    }
}




if (defined($values{'searchenglish'})) {
    if (length($englishquery) > 0) {
	open(UNICHIN) or errormsg("Can't open UNICHIN");
	&printhead;
	if ($ordering eq "frequency") {
	    while ($line = <UNICHIN>) {
		$line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		$engquery = $englishquery;
		if ($fields[$english] =~ m/\b$engquery\b/i) {
		    $charfreq{$fields[$uchar]} = $fields[$freq];
		    $chardata{$fields[$uchar]} = $line;
		}
	    }
	    @sortedchars = sort byfrequency keys(%charfreq);

	    if (@sortedchars == 0) { printnoresults(); last; }

	    foreach $char (@sortedchars) {
		@fields = split(/\t/, $chardata{$char});
		&printrow;
	    }
	} elsif ($ordering eq "radstroke") {
	    while ($line = <UNICHIN>) {
		$line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		$engquery = $englishquery;
		if ($fields[$english] =~ m/\b$engquery\b/i) {
		    $results++;
		    &printrow;
		}
	    }
	    if ($results == 0) { printnoresults(); last; }
	}
	print "</TABLE>\n";
	close(UNICHIN);
    }  else {
	print "No English words entered for search.  Please try again.";
    }

} elsif (defined($values{'searchpinyin'})) {
    if (length($pinyinquery) > 0) {
	open(UNICHIN);
	&printhead;
	if ($ordering eq "frequency") {
	    while ($line = <UNICHIN>) {
		$line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		$pyquery = $pinyinquery;
		# If no tone number, add reg exp for any tone
		if ($pyquery =~ m/^[a-z]+$/i) {
		    $pyquery = $pyquery . "[1-5]?";
		}
		if ($fields[$pinyin] =~ m/\b$pyquery\b/i) {
		    #print $freq;
		    $charfreq{$fields[$uchar]} = $fields[$freq];
		    $chardata{$fields[$uchar]} = $line;
		}
	    }
	    @sortedchars = sort byfrequency keys(%charfreq);

	    if (@sortedchars == 0) { printnoresults(); last; }
	    foreach $char (@sortedchars) {
		@fields = split(/\t/, $chardata{$char});
		&printrow;
	    }

	} elsif ($ordering eq "radstroke") {
	    while ($line = <UNICHIN>) {
		$line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		$pyquery = $pinyinquery;
		if ($pyquery =~ m/^[a-z]+$/i) {	       # if no tone, add one on
		    $pyquery = $pyquery . "[1-5]?";
		}
		if ($fields[$pinyin] =~ m/\b$pyquery\b/i) {
		    $results++;
		    &printrow;
		}
	    }
	    if ($results == 0) { printnoresults(); last; }
	}
	print "</TABLE>\n";
	close(UNICHIN);
    } else {
	print "No pinyin entered for search.  Please try again.";
    }
} elsif (defined($values{'searchcantonese'})) {
    if (length($cantonesequery) > 0) {
	open(UNICHIN);
	&printhead;
	if ($ordering eq "frequency") {
	    while ($line = <UNICHIN>) {
		$line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		$cantonquery = $cantonesequery;
		if ($fields[$canton] =~ m/\b$cantonquery\d?\b/i) {
		    $charfreq{$fields[$uchar]} = $fields[$freq];
		    $chardata{$fields[$uchar]} = $line;
		}
	    }
	    @sortedchars = sort byfrequency keys(%charfreq);

	    if (@sortedchars == 0) { printnoresults(); last; }

	    foreach $char (@sortedchars) {
		@fields = split(/\t/, $chardata{$char});
		&printrow;
	    }
	} elsif ($ordering eq "radstroke") {
	    while ($line = <UNICHIN>) {
		$line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		$cantonquery = $cantonesequery;
		if ($fields[$canton] =~ m/\b$cantonquery\d?\b/i) {
		    $results++;
		    &printrow;
		}
	    }
	    if ($results == 0) { printnoresults(); last; }
	}
	print "</TABLE>\n";
	close(UNICHIN);
    } else {
	print "No Cantonese entered for search.  Please try again.";
    }
} elsif (defined($values{'searchradstroke'})) {
    if (length($lowerb) > 0) {
	# Get all the stroke number boundaries in order
	if (length($upperb) == 0 || $upperb < $lowerb) 
	{
	    $upperb = $lowerb;
	} elsif ($upperb - $lowerb > 2) {
	    $upperb = $lowerb + 2;
	}
	$initrads;
	open(UNICHIN);
	&printhead;
	if ($ordering eq "frequency") {
	    while ($line = <UNICHIN>) {
        $line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		for ($i = $lowerb; $i <= $upperb; $i++) {
		    $radquery = $radicalquery;
		    if ($fields[$radstr] =~ m/^$radquery\'?\.$i$/i) {
		      $charfreq{$fields[$uchar]} = $fields[$freq];
		      $chardata{$fields[$uchar]} = $line;
                    }
		}
	    }
	    @sortedchars = sort byfrequency keys(%charfreq);
	    foreach $char (@sortedchars) {
		@fields = split(/\t/, $chardata{$char});
		&printrow;
	    }
	     
	} elsif ($ordering eq "radstroke") {
	    while ($line = <UNICHIN>) {
        $line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		for ($i = $lowerb; $i <= $upperb; $i++) {
                    $radquery = $radicalquery;
		    if ($fields[$radstr] =~ m/^$radquery\'?\.$i$/i) {
		      &printrow;
		    } 
 	        }
	    }
        }
	print "</TABLE>\n";
	close(UNICHIN);
    } else {
	print "No lower bound entered for search.  Please try again.";
    }

} elsif (defined($values{'searchchar'})) {
    $whatchar =~ s/\s//g;
    # Handle Unicode ampersand sequences
    if ($whatchar =~ m/\&\#\d+;/) {
	#print "Before: " . $whatchar . "<BR>";
	(@unichars) = ($whatchar =~ /\&\#(\d+);/g);
	$whatchar = "";
	foreach $unichar (@unichars) {
	    my($hexval) = sprintf("%X", $unichar);

	    if (length($hexval) == 3) {
		$hexval = "0" . $hexval;
	    } elsif (length($hexval) == 2) {
		$hexval = "00" . $hexval;
	    }

	    $whatchar .= &hex2utf8($hexval);
	}
	if ($englishquery eq "debug") {
	    print "After: " . $whatchar . "<BR>";
	}
    }

    # Still need to handle UTF-8 somewhere
    if (length($whatchar) > 0 and $enctype eq "utf8") {
	for ($i = 0; $i < length($whatchar); $i+=3) {
	    $whatchars[$i/3] = substr($whatchar, $i, 3);
	}
	&printhead;
	foreach $thischar (@whatchars) {
	    $encodingfield = $uchar;

            $thischar = bytes2hex(utf82ucs($thischar));
            $thischar = substr($thischar, 2, 4);  # Remove 0x at beginning


	    open(UNICHIN) or print "Unable to open UNICHIN_IDS.TXT: $!\n";;
	    while ($line = <UNICHIN>) {
		$line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		if ($fields[$encodingfield] eq $thischar) {
		    &printrow;
		    last;
		}
	    }
	    close(UNICHIN);	    
	}
	print "</TABLE>\n";

    } elsif (length($whatchar) > 0) {
	for ($i = 0; $i < length($whatchar); $i+=2) {
	    $whatchars[$i/2] = substr($whatchar, $i, 2);
	}
	&printhead;
	foreach $thischar (@whatchars) {
	    if ($enctype eq "big5") {
		$encodingfield = $big5;
	    } elsif ($enctype eq "gb") {
		$encodingfield = $gb;
	    } else {  # unicode
		$encodingfield = $uchar;
	    }

            $thischar = bytes2hex($thischar);
            $thischar = substr($thischar, 2, 4);  # Remove 0x at beginning

	    open(UNICHIN) or print "Unable to open UNICHIN.TXT: $!\n";;
	    while ($line = <UNICHIN>) {
		$line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		if ($fields[$encodingfield] eq $thischar) {
		    &printrow;
		    last;
		}
	    }
	    close(UNICHIN);	    
	}
	print "</TABLE>\n";
    } else {
	print "No character entered for search.  Please try again.";
    }
} elsif (defined($values{'searchcomponent'})) {
    $whatchar =~ s/\s//g;
    # Handle Unicode ampersand sequences
    if ($whatchar =~ m/\&\#\d+;/) {
	#print "Before: " . $whatchar . "<BR>";
	(@unichars) = ($whatchar =~ /(\&\#\d+;)/g);
	$whatchar = "";
	foreach $unichar (@unichars) {
	    $whatchar .= hex2utf8($unichar);
	}
	#print "After: " . $whatchar . "<BR>";
    }

    &printhead;

    open(UNICHIN) or print "Unable to open UNICHIN.TXT: $!\n";;
    while ($line = <UNICHIN>) {
	$line =~ s/[\r\n]*$//;
	@fields = split(/\t/, $line);
	if ($fields[$ids] =~ m/$whatchar/) {
	    &printrow;
	}
    }
    close(UNICHIN);	    

    print "</TABLE>\n";


} elsif (defined($values{'searchstrokes'})) {
    if (length($strokequery) > 0 && $strokequery =~ m/^\d{1,2}$/) {
	open(UNICHIN);
	&printhead;
	if ($ordering eq "frequency") {
	    while ($line = <UNICHIN>) {
		$line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		if ($fields[$strokes] == $strokequery) {
		    $charfreq{$fields[$uchar]} = $fields[$freq];
		    $chardata{$fields[$uchar]} = $line;
		}
	    }
	    @sortedchars = sort byfrequency keys(%charfreq);

	    if (@sortedchars == 0) { printnoresults(); last; }

	    foreach $char (@sortedchars) {
		@fields = split(/\t/, $chardata{$char});
		&printrow;
	    }
	} elsif ($ordering eq "radstroke") {
	    while ($line = <UNICHIN>) {
		$line =~ s/[\r\n]*$//;
		@fields = split(/\t/, $line);
		if ($fields[$strokes] == $strokequery) {
		    $results++;
		    &printrow;
		}
	    }
	    if ($results == 0) { printnoresults(); last; }
	}
	print "</TABLE>\n";
	close(UNICHIN);
    } else {
	print "No stroke count entered for search.  Please try again.";
    }
}

#if ($ENV{'HTTP_REFERER'} !~ /onkellotus/i or
#    ($ENV{'HTTP_REFERER'} ne "" and $results != 1)) {
    print "<P>\n<HR>\n<P>";
#}

if (0) {

    print <<GOOGLE;

<!--
<script type="text/javascript"><!--
    google_ad_client = "pub-1796608980793545";
/* 120x600, created 7/22/08 */
    google_ad_slot = "4333306884";
google_ad_width = 120;
google_ad_height = 600;
//-->
</script>
<script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
-->

GOOGLE

}

print <<EOHTML;

<center>
<script type="text/javascript"><!--
    google_ad_client = "pub-1796608980793545";
/* 728x90, created 3/6/09 */
    google_ad_slot = "9234409569";
 google_ad_width = 728;
 google_ad_height = 90;
//-->
</script>
<script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
</center>
EOHTML

    print "<font color=white>Referrer: " . $ENV{'HTTP_REFERER'} . "</font>";

print "</BODY>\n</HTML>";


sub printhead {
    #print "<font color=white>$display</font>";
    print "<TABLE BORDER=\"2\">\n";
    print "<TR>\n";
    print "<EM>";
    
    print "<TD>Character</TD>" if $display =~ m/\bchar\b/;
    print "<TD>Radical/Stroke</TD>" if $display =~ m/\bradstroke\b/;
    print "<TD>Total Strokes</TD>" if $display =~ m/\bstrokes\b/;
    print "<TD>Pinyin</TD>" if $display =~ m/\bpinyin\b/;
    print "<TD>English Def.</TD>" if $display =~ m/\benglish\b/;
    print "<TD>Cantonese</TD>" if $display =~ m/\bcantonese\b/;
    print "<TD>Variants</TD>" if $display =~ m/\bvariants\b/;
    print "<TD>Unicode</TD>" if $display =~ m/\bunicode\b/;
    print "<TD>GB</TD>" if $display =~ m/\bgb\b/;
    print "<TD>Big5</TD>" if $display =~ m/\bbig5\b/;
    print "<TD>Components</TD>" if $display =~ m/\bparts\b/;

    print "</EM></TR>\n";
}

sub printnoresults {
    print "</TABLE>";
    print "<P>";
    print "Your query did not return any results.  The are several possible reasons for this";
    print "<OL><LI> This dictionary only looks at single Chinese characters.  Many if not most";
    print " words in Chinese use two or more characters.  Try the <A HREF=\"http://www.mandarintools.com/worddict.html\">";
    print " Chinese word dictionary</A> at this same site ";
    print "<LI> You may have misspelled the query</LI>\n";
    print "<LI> The dictionary may not have the exact word, but it may have a synonym of it.  Try several variations.</LI>\n";
    print "<LI>There just aren't any characters in the dictionary that match your query\n";
    print "</OL><P>\n";
    #print $values{'searchcantonese'} . " " . $values{'cantonese'};

}
 
sub printrow {
    my($utf8char, $utf8hex);
    return unless $fields[$freq] <= $cutoff || defined($values{'searchchar'});
    # Make sure if printing in Big5 or GB that the character is in those encodings
    if ($printtype eq "gb") {
	return if $fields[$gb] eq "-";
    } elsif ($printtype eq "big5") {
	return if $fields[$big5] eq "-";
    }
    if ($chartype eq "simp" and $fields[$gb] eq "-" and !defined($values{'searchchar'})) { return; }
    if ($chartype eq "trad" and $fields[$big5] eq "-" and !defined($values{'searchchar'})) { return; }
    # Get UTF-8 in hex
    $utf8char = hex2utf8($fields[$uchar]);
    $utf8hex = sprintf("%%%X%%%X%%%X", vec($utf8char, 0, 8), vec($utf8char, 1, 8), vec($utf8char, 2, 8));

    #Start the Row HTML
    print "<TR>\n";
    if ($display =~ m/\bchar\b/) {
	print "<TD><font size=+2>";
	if ($printtype eq "gif") {
	    print "<A HREF=\"http://www.mandarintools.com/cgi-bin/wordlook.pl?word=$utf8hex";
	    if ($chartype eq "simp") {
 		print "\&searchtype=simp";
	    } elsif ($chartype eq "trad") {
 		print "\&searchtype=trad";
	    } elsif ($chartype eq "all") {
 		print "\&searchtype=chinese";
	    }

# 	    print "<A HREF=\"http://www.monashwushu.com/cgi-bin/wordlook.pl?word=";
# 	    if ($fields[$gb] ne "-") {
# 		print "\%" . substr($fields[$gb], 0, 2) . "\%" . substr($fields[$gb], 2, 2);
# 		#print "\&searchtype=gb\&output=gb";
# 		print "\&searchtype=gb";
# 	    } elsif ($fields[$big5] ne "-") {
# 		print "\%" . substr($fields[$big5], 0, 2) . "\%" . substr($fields[$big5], 2, 2);
# 		#print "\&searchtype=big5\&output=big5";
# 		print "\&searchtype=big5";
# 	    } else {
# 		print "\%" . substr($fields[$uchar], 0, 2) . "\%" . substr($fields[$uchar], 2, 2);
# 		print "\&searchtype=ucs2"; #\&output=ucs2";
# 	    }
	    print "\&where=start\">";
	    print "<IMG HEIGHT=30 WIDTH=16 SRC=\"ugif/$fields[$uchar].gif\">";
	    print "</A>";
	} elsif ($printtype eq "utf8") {
	    print "<A HREF=\"http://www.mandarintools.com/cgi-bin/wordlook.pl?word=$utf8hex";
	    if ($chartype eq "simp") {
 		print "\&searchtype=simp";
	    } elsif ($chartype eq "trad") {
 		print "\&searchtype=trad";
	    } elsif ($chartype eq "all") {
 		print "\&searchtype=chinese";
	    }

	    #print "<A HREF=\"http://epsilon3.georgetown.edu/~petersee/cgi-bin/wordlook.cgi?word=";
	    #print "<A HREF=\"http://www.monashwushu.com/cgi-bin/wordlook.pl?word=";
# 	    if ($fields[$gb] ne "-") {
# 		print "\%" . substr($fields[$gb], 0, 2) . "\%" . substr($fields[$gb], 2, 2);
# 		print "\&searchtype=gb"; #\&output=gb";
# 	    } elsif ($fields[$big5] ne "-") {
# 		print "\%" . substr($fields[$big5], 0, 2) . "\%" . substr($fields[$big5], 2, 2);
# 		print "\&searchtype=big5"; #\&output=big5";
# 	    } else {
# 		print "\%" . substr($fields[$uchar], 0, 2) . "\%" . substr($fields[$uchar], 2, 2);
# 		print "\&searchtype=ucs2"; #\&output=ucs2";
# 	    }
	    print "\&where=start\">";
	    print hex2utf8($fields[$uchar]);
	    print "</A>";
	} elsif ($printtype eq "gb") {
	    print "<A HREF=\"http://www.mandarintools.com/cgi-bin/wordlook.pl?word=$utf8hex";
	    #print "<A HREF=\"http://epsilon3.georgetown.edu/~petersee/cgi-bin/wordlook.cgi?word=";
	    #print "<A HREF=\"http://www.monashwushu.com/cgi-bin/wordlook.pl?word=";
	    #print "\%" . substr($fields[$gb], 0, 2) . "\%" . substr($fields[$gb], 2, 2);
	    print "\&searchtype=simp\&where=start\">";
	    print hex2gb($fields[$gb]);
	    print "</A>";
	} else {
	    #print "<A HREF=\"http://www.mandarintools.com/cgi-bin/wordlook.pl?word=";
	    #print "<A HREF=\"http://epsilon3.georgetown.edu/~petersee/cgi-bin/wordlook.cgi?word=";
	    #print "<A HREF=\"http://www.monashwushu.com/cgi-bin/wordlook.pl?word=";
	    print "<A HREF=\"http://www.mandarintools.com/cgi-bin/wordlook.pl?word=$utf8hex";
	    #print "\%" . substr($fields[$big5], 0, 2) . "\%" . substr($fields[$big5], 2, 2);
	    print "\&searchtype=trad\&where=start\">";
	    print hex2big5($fields[$big5]);
	    print "</A>";
	}
	print "</font></TD>";
    }
    
    # Print radical/stroke
    if ($display =~ m/\bradstroke\b/) {
	($rad, $str) = $fields[$radstr] =~ m/^([0-9]+)\'?\.([0-9]+)$/;
	print "<TD>";

	if (-e "../animations/$fields[$uchar]a.gif") {
	    print "<A HREF=\"http://www.mandarintools.com/animations/$fields[$uchar]a.gif\">";
	}

	if ($printtype eq "utf8") {
	    print hex2utf8($rads[$rad-1]) , " + $str";
	} else {
	    print "<IMG HEIGHT=30 WIDTH=16 SRC=\"ugif/$rads[$rad-1].gif\"> + $str";
	}
	if (-e "../animations/$fields[$uchar]a.gif") { print "</A>"; }
	print "</TD>";
    }

    # Print Total strokes
    if ($display =~ m/\bstrokes\b/) {
	print "<TD>$fields[$strokes]</TD>";
    }

    # Print pinyin
    if ($display =~ m/\bpinyin\b/) {
	my(@pys) = split(/\s+/, $fields[$pinyin]);
	print "<TD>";
	foreach $audiblepy (@pys) {
	    $audiblepy = lc($audiblepy);
	    $displaypy = $audiblepy;
	    if ($tonemarks) {
		$displaypy = &addTones($audiblepy);
	    }
	    #print "<A HREF=\"http://www.bell-labs.com/cgi-user/tts/mandarintts\?text=$audiblepy\&rate=slowest\&audio=.wav\">";
	    if (-e "../sounds/$audiblepy.aif") {
		#print "<A HREF=\"https://research.microsoft.com/speech/tts/TTS.dll?TTS\?Text=" . 
		#    "\%" . substr($fields[$gb], 0, 2) . "\%" . substr($fields[$gb], 2, 2) . "\">";
		print "<A HREF=\"http://www.mandarintools.com/sounds/$audiblepy.aif\">";
		print "$displaypy";
		print "</A> "; 
	    } else {
		print "$displaypy ";
	    }
	}
	print "</TD>";
    }

    # Print English definition
    if ($display =~ m/\benglish\b/) {
	print "<TD>$fields[$english]</TD>";
    }

    # Print Cantonese
    if ($display =~ m/\bcantonese\b/) {
	print "<TD>\L$fields[$canton]\E</TD>";
    }

    # Print Variants
    if ($display =~ m/\bvariants\b/) {
	if ($fields[var] =~ s/^S (....)$/\1/) {
	    print "<TD>Simp.: ";
	    if ($printtype eq "utf8") {
		print hex2utf8($fields[$var]);
	    } else {
		print "<IMG HEIGHT=30 WIDTH=16 SRC=\"ugif/$fields[$var].gif\">";
	    }
	    print "</TD>";
	} elsif ($fields[$var] =~ s/^T (....)$/\1/) {
	    print "<TD>Trad.: ";
	    if ($printtype eq "utf8") {
		print hex2utf8($fields[$var]);
	    } else {
		print "<IMG HEIGHT=30 WIDTH=16 SRC=\"ugif/$fields[$var].gif\">";
	    }
	    print "</TD>";
	} else {
	    print "<TD> -- </TD>";	# Take up space
	}
    }

    # Print Unicode Hex
    if ($display =~ m/\bunicode\b/) {
	print "<TD>$fields[$uchar]</TD>";
    }

    # Print GB Hex
    if ($display =~ m/\bgb\b/) {
	print "<TD>$fields[$gb]</TD>";
    }

    # Print Big5 Hex
    if ($display =~ m/\bbig5\b/) {
	print "<TD>$fields[$big5]</TD>";
    }

    # Print Big5 Hex
    if ($display =~ m/\bparts\b/) {
	print "<TD>";
	if ($fields[$ids] eq "-" or length($fields[$ids]) % 3 != 0) {
	    print "-";
	} else {
	    for ($idsIndex = 0; $idsIndex < length($fields[$ids]); $idsIndex+=3) {
		$idschar = substr($fields[$ids], $idsIndex, 3);
		$idshex = utf82hex($idschar);
		next if oct("0x$idshex") >= 0x2ff0 and oct("0x$idshex") <= 0x2fff;
		print "<A HREF=\"http://www.mandarintools.com/cgi-bin/charlook.pl?";
		print "searchmode=$searchmode\&";
		print "printtype=$printtype\&";
		print "chartype=$chartype\&";
		print "ordering=$ordering\&";
		foreach $displayunit (split(/[^a-zA-Z]+/, $display)) {
		    print "display=$displayunit\&";
		}
		print "english=\&cantonese=\&pinyin=\&";
		print "searchcomponent=Search+Component\&";
		print "enctype=$enctype\&";
		print "whatchar=$idschar";
		print "\">";
		print "\&\#x$idshex;";
		print "</A>";
	    }
	}
	print "</TD>";
    }

    print "\n</TR>\n";
}


sub byfrequency {		# procedure used to sort unicode characters by frequency
    $charfreq{$a} <=> $charfreq{$b};
}


sub init_rads {
    open(RAD, "purerads") or print "Unable to open purerads: $!\n";
    while ($line = <RAD>) {
	$line =~ m/^(....)/;
	$urad = $1;
	push @rads, $urad;
    }
    close(RAD);
}


sub touni {
    my($escape) = @_;
    $escape =~ s/\&\#(\d+);/$1/;
    my($hexval) = sprintf("%X", $escape);

    if (length($hexval) == 3) {
	$hexval = "0" . $hexval;
    } elsif (length($hexval) == 2) {
	$hexval = "00" . $hexval;
    }

    return pack("CC", hex(substr($hexval, 0, 2)), hex(substr($hexval, 2, 2)));
}


sub addTones {
    my($withnumbers) = shift;
    my($i);
    $withnumbers =~ s/ng(\d)\b/${1}ng/g;
    $withnumbers =~ s/n(\d)\b/${1}n/g;
    $withnumbers =~ s/ao(\d)\b/a${1}o/g;
    $withnumbers =~ s/ai(\d)\b/a${1}i/g;
    $withnumbers =~ s/ei(\d)\b/e${1}i/g;
    $withnumbers =~ s/ou(\d)\b/o${1}u/g;
    $withnumbers =~ s/r(\d)\b/${1}r/g;

    @tonenums = ("a1", "a2", "a3", "a4", "a5", "e1", "e2", "e3", "e4", "e5",
                 "i1", "i2", "i3", "i4", "i5", "o1", "o2", "o3", "o4", "o5",
                 "uu1", "uu2", "uu3", "uu4", "uu5", "uu",
                 "u1", "u2", "u3", "u4", "u5",
                 "v1", "v2", "v3", "v4", "v5",
                 "u:1", "u:2", "u:3", "u:4", "u:5", "u:",
                 "A1", "A2", "A3", "A4", "A5", "E1", "E2", "E3", "E4", "E5",
                 "I1", "I2", "I3", "I4", "I5", "O1", "O2", "O3", "O4", "O5",
                 "U1", "U2", "U3", "U4", "U5",
                 "U:1", "U:2", "U:3", "U:4", "U:5", "U:"
        );
    @tonemarks = ('&#x0101;', '&aacute;', '&#x01ce;', '&#x00e0;', 'a',
                  '&#x0113;', '&#x00e9;', '&#x011b;', '&#x00e8;', 'e',
                    '&#x012b;', '&#x00ed;', '&#x01d0;', '&#x00ec;', 'i',
                    '&#x014d;', '&#x00f3;', '&#x01d2;', '&#x00f2;', 'o',
                    '&#x01d6;', '&#x01d8;', '&#x01da;', '&#x01dc;', '&#x00fc;', '&#x00fc;',
                    '&#x016b;', '&#x00fa;', '&#x01d4;', '&#x00f9;', 'u',
                    '&#x01d6;', '&#x01d8;', '&#x01da;', '&#x01dc;', '&#x00fc;', '&#x00fc;',
                    '&#x01d6;', '&#x01d8;', '&#x01da;', '&#x01dc;', '&#x00fc;', '&#x00fc;',
                    '&#x0100;', '&#x00c1;', '&#x01cd;', '&#x00c0;', 'A',
                    '&#x0112;', '&#x00c9;', '&#x011a;', '&#x00c8;', 'E',
                    '&#x012a;', '&#x00cd;', '&#x01cf;', '&#x00cc;', 'I',
                    '&#x014c;', '&#x00d3;', '&#x01d1;', '&#x00d2;', 'O',
                    '&#x016a;', '&#x00da;', '&#x01d3;', '&#x00d9;', 'U',
                    '&#x01d5;', '&#x01d7;', '&#x01d9;', '&#x01db;', '&#x00dc;', '&#x00dc;'
        );

    for ($i = 0; $i < scalar(@tonenums); $i++) {
        $withnumbers =~ s/$tonenums[$i]/$tonemarks[$i]/ge;
    }
    $withnumbers =~ s/([aeioung])5/$1/g;

    return $withnumbers;
}

Anon7 - 2021