|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/cgi-bin/ |
Upload File : |
#!/usr/bin/perl -- # -*- perl -*-
#$UNICHIN = "./UNICHIN.TXT";
$UNICHIN = "./UNICHIN_IDS.TXT";
require "./cgi-lib.pl";
require "./codelib.pl";
&ReadParse(*values);
# If anything goes wrong, print this
sub errormsg {
my ($errortxt) = @_;
print "Content-type: text/html\n\n";
print "<HTML>\n<HEAD>\n";
print "<TITLE>Character Look-up Error</TITLE>\n";
print "</HEAD>\n<BODY>\n";
print $errortxt, "<P>";
print " Please try again. \n</BODY>\n</HTML>";
exit;
}
&init_rads;
# The different fields and their positions in the character data file
$uchar = 0; $radstr = 1; $strokes = 2;
$pinyin = 3; $english = 4; $canton = 5;
$var = 6; $freq = 7; $big5 = 8; $gb = 9; $ids = 10;
$searchmode = $values{'searchmode'};
$chartype = $values{'chartype'};
$printtype = $values{'printtype'};
$englishquery = $values{'english'};
$pinyinquery = $values{'pinyin'};
$cantonesequery = $values{'cantonese'};
$strokequery = $values{'strokes'};
$ordering = $values{'ordering'};
$lowerb = $values{'lowerb'};
$upperb = $values{'upperb'};
$radicalquery = $values{'radical'};
$whatchar = $values{'whatchar'};
$enctype = $values{'enctype'};
$display = $values{'display'};
$tonemarks = $values{'tonemarks'};
if (!defined($values{'searchmode'})) {
print "Content-type: text/html\n\n";
print "<HTML><HEAD>";
print '<META HTTP-EQUIV="REFRESH" content="0;url=http://www.mandarintools.com/chardict.html">';
print "</HEAD><BODY>Nothing entered. Going back to start page";
exit;
}
# set cutoff frequency
if ($searchmode eq "limited") {
$cutoff = 3500; # only frequencies above (below?) 3500 pass
} elsif ($searchmode eq "standard") {
$cutoff = 6500; # only frequencies above 6500 pass
} elsif ($searchmode eq "comprehensive") {
$cutoff = 20001 # everything passes
}
# set return type
if ($chartype eq "trad") {
if ($printtype eq "gb") {
$printtype = "big5"; # force it to Big5
}
} elsif ($chartype eq "simp") {
if ($printtype eq "big5") {
$printtype = "gb"; # force it to GB
}
} elsif ($chartype eq "all") {
}
# Send the content and character set type to the browser
print "Content-type: text/html";
if ($printtype eq "gb") {
print "; charset=gb2312";
} elsif ($printtype eq "big5") {
print "; charset=big5";
} elsif ($printtype eq "utf8") {
print "; charset=utf-8";
}
print "\n\n";
print "<HTML>\n";
print "<HEAD>\n";
print "<TITLE>Character Search Results</TITLE>\n";
print '<STYLE>A:link {
text-decoration: none;
color: #33F;
background: #FFFFF5;
}
A:visited {
text-decoration: none;
color: #93C;
background: #FFFFF5;
}
A:active {
text-decoration: none;
font-weight: bold;
color: black;
background: #CCF;
}
A:hover {
text-decoration: none;
color: #FFFFF5;
background: #33F;
}
</STYLE>';
print "</HEAD>\n";
print "<BODY BGCOLOR=#FFFFFF>\n";
unless ($ENV{'HTTP_REFERER'} =~ /mandarintools/i or
$ENV{'HTTP_REFERER'} eq "" or
$ENV{'HTTP_REFERER'} =~ /mail/i) {
print <<GOOGLE;
<center>
<script type="text/javascript"><!--
google_ad_client = "pub-1796608980793545";
google_ad_width = 468;
google_ad_height = 60;
google_ad_format = "468x60_as";
google_ad_channel ="9444390123";
google_ad_type = "text";
google_page_url = document.location;
google_color_border = "FDFFCA";
google_color_bg = "FDFFCA";
google_color_link = "0000CC";
google_color_url = "008000";
google_color_text = "000000";
//--></script>
<script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
</center>
<br>
GOOGLE
}
if (defined($values{'searchenglish'}) and length($englishquery) == 0) {
if (length($pinyinquery) > 0) {
undef($values{'searchenglish'});
$values{'searchpinyin'} = 1;
} elsif (length($cantonesequery) > 0) {
undef($values{'searchenglish'});
$values{'searchcantonese'} = 1;
} elsif (length($whatchar) > 0) {
undef($values{'searchenglish'});
$values{'searchchar'} = 1;
}
}
if (defined($values{'searchenglish'})) {
if (length($englishquery) > 0) {
open(UNICHIN) or errormsg("Can't open UNICHIN");
&printhead;
if ($ordering eq "frequency") {
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
$engquery = $englishquery;
if ($fields[$english] =~ m/\b$engquery\b/i) {
$charfreq{$fields[$uchar]} = $fields[$freq];
$chardata{$fields[$uchar]} = $line;
}
}
@sortedchars = sort byfrequency keys(%charfreq);
if (@sortedchars == 0) { printnoresults(); last; }
foreach $char (@sortedchars) {
@fields = split(/\t/, $chardata{$char});
&printrow;
}
} elsif ($ordering eq "radstroke") {
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
$engquery = $englishquery;
if ($fields[$english] =~ m/\b$engquery\b/i) {
$results++;
&printrow;
}
}
if ($results == 0) { printnoresults(); last; }
}
print "</TABLE>\n";
close(UNICHIN);
} else {
print "No English words entered for search. Please try again.";
}
} elsif (defined($values{'searchpinyin'})) {
if (length($pinyinquery) > 0) {
open(UNICHIN);
&printhead;
if ($ordering eq "frequency") {
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
$pyquery = $pinyinquery;
# If no tone number, add reg exp for any tone
if ($pyquery =~ m/^[a-z]+$/i) {
$pyquery = $pyquery . "[1-5]?";
}
if ($fields[$pinyin] =~ m/\b$pyquery\b/i) {
#print $freq;
$charfreq{$fields[$uchar]} = $fields[$freq];
$chardata{$fields[$uchar]} = $line;
}
}
@sortedchars = sort byfrequency keys(%charfreq);
if (@sortedchars == 0) { printnoresults(); last; }
foreach $char (@sortedchars) {
@fields = split(/\t/, $chardata{$char});
&printrow;
}
} elsif ($ordering eq "radstroke") {
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
$pyquery = $pinyinquery;
if ($pyquery =~ m/^[a-z]+$/i) { # if no tone, add one on
$pyquery = $pyquery . "[1-5]?";
}
if ($fields[$pinyin] =~ m/\b$pyquery\b/i) {
$results++;
&printrow;
}
}
if ($results == 0) { printnoresults(); last; }
}
print "</TABLE>\n";
close(UNICHIN);
} else {
print "No pinyin entered for search. Please try again.";
}
} elsif (defined($values{'searchcantonese'})) {
if (length($cantonesequery) > 0) {
open(UNICHIN);
&printhead;
if ($ordering eq "frequency") {
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
$cantonquery = $cantonesequery;
if ($fields[$canton] =~ m/\b$cantonquery\d?\b/i) {
$charfreq{$fields[$uchar]} = $fields[$freq];
$chardata{$fields[$uchar]} = $line;
}
}
@sortedchars = sort byfrequency keys(%charfreq);
if (@sortedchars == 0) { printnoresults(); last; }
foreach $char (@sortedchars) {
@fields = split(/\t/, $chardata{$char});
&printrow;
}
} elsif ($ordering eq "radstroke") {
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
$cantonquery = $cantonesequery;
if ($fields[$canton] =~ m/\b$cantonquery\d?\b/i) {
$results++;
&printrow;
}
}
if ($results == 0) { printnoresults(); last; }
}
print "</TABLE>\n";
close(UNICHIN);
} else {
print "No Cantonese entered for search. Please try again.";
}
} elsif (defined($values{'searchradstroke'})) {
if (length($lowerb) > 0) {
# Get all the stroke number boundaries in order
if (length($upperb) == 0 || $upperb < $lowerb)
{
$upperb = $lowerb;
} elsif ($upperb - $lowerb > 2) {
$upperb = $lowerb + 2;
}
$initrads;
open(UNICHIN);
&printhead;
if ($ordering eq "frequency") {
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
for ($i = $lowerb; $i <= $upperb; $i++) {
$radquery = $radicalquery;
if ($fields[$radstr] =~ m/^$radquery\'?\.$i$/i) {
$charfreq{$fields[$uchar]} = $fields[$freq];
$chardata{$fields[$uchar]} = $line;
}
}
}
@sortedchars = sort byfrequency keys(%charfreq);
foreach $char (@sortedchars) {
@fields = split(/\t/, $chardata{$char});
&printrow;
}
} elsif ($ordering eq "radstroke") {
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
for ($i = $lowerb; $i <= $upperb; $i++) {
$radquery = $radicalquery;
if ($fields[$radstr] =~ m/^$radquery\'?\.$i$/i) {
&printrow;
}
}
}
}
print "</TABLE>\n";
close(UNICHIN);
} else {
print "No lower bound entered for search. Please try again.";
}
} elsif (defined($values{'searchchar'})) {
$whatchar =~ s/\s//g;
# Handle Unicode ampersand sequences
if ($whatchar =~ m/\&\#\d+;/) {
#print "Before: " . $whatchar . "<BR>";
(@unichars) = ($whatchar =~ /\&\#(\d+);/g);
$whatchar = "";
foreach $unichar (@unichars) {
my($hexval) = sprintf("%X", $unichar);
if (length($hexval) == 3) {
$hexval = "0" . $hexval;
} elsif (length($hexval) == 2) {
$hexval = "00" . $hexval;
}
$whatchar .= &hex2utf8($hexval);
}
if ($englishquery eq "debug") {
print "After: " . $whatchar . "<BR>";
}
}
# Still need to handle UTF-8 somewhere
if (length($whatchar) > 0 and $enctype eq "utf8") {
for ($i = 0; $i < length($whatchar); $i+=3) {
$whatchars[$i/3] = substr($whatchar, $i, 3);
}
&printhead;
foreach $thischar (@whatchars) {
$encodingfield = $uchar;
$thischar = bytes2hex(utf82ucs($thischar));
$thischar = substr($thischar, 2, 4); # Remove 0x at beginning
open(UNICHIN) or print "Unable to open UNICHIN_IDS.TXT: $!\n";;
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
if ($fields[$encodingfield] eq $thischar) {
&printrow;
last;
}
}
close(UNICHIN);
}
print "</TABLE>\n";
} elsif (length($whatchar) > 0) {
for ($i = 0; $i < length($whatchar); $i+=2) {
$whatchars[$i/2] = substr($whatchar, $i, 2);
}
&printhead;
foreach $thischar (@whatchars) {
if ($enctype eq "big5") {
$encodingfield = $big5;
} elsif ($enctype eq "gb") {
$encodingfield = $gb;
} else { # unicode
$encodingfield = $uchar;
}
$thischar = bytes2hex($thischar);
$thischar = substr($thischar, 2, 4); # Remove 0x at beginning
open(UNICHIN) or print "Unable to open UNICHIN.TXT: $!\n";;
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
if ($fields[$encodingfield] eq $thischar) {
&printrow;
last;
}
}
close(UNICHIN);
}
print "</TABLE>\n";
} else {
print "No character entered for search. Please try again.";
}
} elsif (defined($values{'searchcomponent'})) {
$whatchar =~ s/\s//g;
# Handle Unicode ampersand sequences
if ($whatchar =~ m/\&\#\d+;/) {
#print "Before: " . $whatchar . "<BR>";
(@unichars) = ($whatchar =~ /(\&\#\d+;)/g);
$whatchar = "";
foreach $unichar (@unichars) {
$whatchar .= hex2utf8($unichar);
}
#print "After: " . $whatchar . "<BR>";
}
&printhead;
open(UNICHIN) or print "Unable to open UNICHIN.TXT: $!\n";;
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
if ($fields[$ids] =~ m/$whatchar/) {
&printrow;
}
}
close(UNICHIN);
print "</TABLE>\n";
} elsif (defined($values{'searchstrokes'})) {
if (length($strokequery) > 0 && $strokequery =~ m/^\d{1,2}$/) {
open(UNICHIN);
&printhead;
if ($ordering eq "frequency") {
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
if ($fields[$strokes] == $strokequery) {
$charfreq{$fields[$uchar]} = $fields[$freq];
$chardata{$fields[$uchar]} = $line;
}
}
@sortedchars = sort byfrequency keys(%charfreq);
if (@sortedchars == 0) { printnoresults(); last; }
foreach $char (@sortedchars) {
@fields = split(/\t/, $chardata{$char});
&printrow;
}
} elsif ($ordering eq "radstroke") {
while ($line = <UNICHIN>) {
$line =~ s/[\r\n]*$//;
@fields = split(/\t/, $line);
if ($fields[$strokes] == $strokequery) {
$results++;
&printrow;
}
}
if ($results == 0) { printnoresults(); last; }
}
print "</TABLE>\n";
close(UNICHIN);
} else {
print "No stroke count entered for search. Please try again.";
}
}
#if ($ENV{'HTTP_REFERER'} !~ /onkellotus/i or
# ($ENV{'HTTP_REFERER'} ne "" and $results != 1)) {
print "<P>\n<HR>\n<P>";
#}
if (0) {
print <<GOOGLE;
<!--
<script type="text/javascript"><!--
google_ad_client = "pub-1796608980793545";
/* 120x600, created 7/22/08 */
google_ad_slot = "4333306884";
google_ad_width = 120;
google_ad_height = 600;
//-->
</script>
<script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
-->
GOOGLE
}
print <<EOHTML;
<center>
<script type="text/javascript"><!--
google_ad_client = "pub-1796608980793545";
/* 728x90, created 3/6/09 */
google_ad_slot = "9234409569";
google_ad_width = 728;
google_ad_height = 90;
//-->
</script>
<script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
</center>
EOHTML
print "<font color=white>Referrer: " . $ENV{'HTTP_REFERER'} . "</font>";
print "</BODY>\n</HTML>";
sub printhead {
#print "<font color=white>$display</font>";
print "<TABLE BORDER=\"2\">\n";
print "<TR>\n";
print "<EM>";
print "<TD>Character</TD>" if $display =~ m/\bchar\b/;
print "<TD>Radical/Stroke</TD>" if $display =~ m/\bradstroke\b/;
print "<TD>Total Strokes</TD>" if $display =~ m/\bstrokes\b/;
print "<TD>Pinyin</TD>" if $display =~ m/\bpinyin\b/;
print "<TD>English Def.</TD>" if $display =~ m/\benglish\b/;
print "<TD>Cantonese</TD>" if $display =~ m/\bcantonese\b/;
print "<TD>Variants</TD>" if $display =~ m/\bvariants\b/;
print "<TD>Unicode</TD>" if $display =~ m/\bunicode\b/;
print "<TD>GB</TD>" if $display =~ m/\bgb\b/;
print "<TD>Big5</TD>" if $display =~ m/\bbig5\b/;
print "<TD>Components</TD>" if $display =~ m/\bparts\b/;
print "</EM></TR>\n";
}
sub printnoresults {
print "</TABLE>";
print "<P>";
print "Your query did not return any results. The are several possible reasons for this";
print "<OL><LI> This dictionary only looks at single Chinese characters. Many if not most";
print " words in Chinese use two or more characters. Try the <A HREF=\"http://www.mandarintools.com/worddict.html\">";
print " Chinese word dictionary</A> at this same site ";
print "<LI> You may have misspelled the query</LI>\n";
print "<LI> The dictionary may not have the exact word, but it may have a synonym of it. Try several variations.</LI>\n";
print "<LI>There just aren't any characters in the dictionary that match your query\n";
print "</OL><P>\n";
#print $values{'searchcantonese'} . " " . $values{'cantonese'};
}
sub printrow {
my($utf8char, $utf8hex);
return unless $fields[$freq] <= $cutoff || defined($values{'searchchar'});
# Make sure if printing in Big5 or GB that the character is in those encodings
if ($printtype eq "gb") {
return if $fields[$gb] eq "-";
} elsif ($printtype eq "big5") {
return if $fields[$big5] eq "-";
}
if ($chartype eq "simp" and $fields[$gb] eq "-" and !defined($values{'searchchar'})) { return; }
if ($chartype eq "trad" and $fields[$big5] eq "-" and !defined($values{'searchchar'})) { return; }
# Get UTF-8 in hex
$utf8char = hex2utf8($fields[$uchar]);
$utf8hex = sprintf("%%%X%%%X%%%X", vec($utf8char, 0, 8), vec($utf8char, 1, 8), vec($utf8char, 2, 8));
#Start the Row HTML
print "<TR>\n";
if ($display =~ m/\bchar\b/) {
print "<TD><font size=+2>";
if ($printtype eq "gif") {
print "<A HREF=\"http://www.mandarintools.com/cgi-bin/wordlook.pl?word=$utf8hex";
if ($chartype eq "simp") {
print "\&searchtype=simp";
} elsif ($chartype eq "trad") {
print "\&searchtype=trad";
} elsif ($chartype eq "all") {
print "\&searchtype=chinese";
}
# print "<A HREF=\"http://www.monashwushu.com/cgi-bin/wordlook.pl?word=";
# if ($fields[$gb] ne "-") {
# print "\%" . substr($fields[$gb], 0, 2) . "\%" . substr($fields[$gb], 2, 2);
# #print "\&searchtype=gb\&output=gb";
# print "\&searchtype=gb";
# } elsif ($fields[$big5] ne "-") {
# print "\%" . substr($fields[$big5], 0, 2) . "\%" . substr($fields[$big5], 2, 2);
# #print "\&searchtype=big5\&output=big5";
# print "\&searchtype=big5";
# } else {
# print "\%" . substr($fields[$uchar], 0, 2) . "\%" . substr($fields[$uchar], 2, 2);
# print "\&searchtype=ucs2"; #\&output=ucs2";
# }
print "\&where=start\">";
print "<IMG HEIGHT=30 WIDTH=16 SRC=\"ugif/$fields[$uchar].gif\">";
print "</A>";
} elsif ($printtype eq "utf8") {
print "<A HREF=\"http://www.mandarintools.com/cgi-bin/wordlook.pl?word=$utf8hex";
if ($chartype eq "simp") {
print "\&searchtype=simp";
} elsif ($chartype eq "trad") {
print "\&searchtype=trad";
} elsif ($chartype eq "all") {
print "\&searchtype=chinese";
}
#print "<A HREF=\"http://epsilon3.georgetown.edu/~petersee/cgi-bin/wordlook.cgi?word=";
#print "<A HREF=\"http://www.monashwushu.com/cgi-bin/wordlook.pl?word=";
# if ($fields[$gb] ne "-") {
# print "\%" . substr($fields[$gb], 0, 2) . "\%" . substr($fields[$gb], 2, 2);
# print "\&searchtype=gb"; #\&output=gb";
# } elsif ($fields[$big5] ne "-") {
# print "\%" . substr($fields[$big5], 0, 2) . "\%" . substr($fields[$big5], 2, 2);
# print "\&searchtype=big5"; #\&output=big5";
# } else {
# print "\%" . substr($fields[$uchar], 0, 2) . "\%" . substr($fields[$uchar], 2, 2);
# print "\&searchtype=ucs2"; #\&output=ucs2";
# }
print "\&where=start\">";
print hex2utf8($fields[$uchar]);
print "</A>";
} elsif ($printtype eq "gb") {
print "<A HREF=\"http://www.mandarintools.com/cgi-bin/wordlook.pl?word=$utf8hex";
#print "<A HREF=\"http://epsilon3.georgetown.edu/~petersee/cgi-bin/wordlook.cgi?word=";
#print "<A HREF=\"http://www.monashwushu.com/cgi-bin/wordlook.pl?word=";
#print "\%" . substr($fields[$gb], 0, 2) . "\%" . substr($fields[$gb], 2, 2);
print "\&searchtype=simp\&where=start\">";
print hex2gb($fields[$gb]);
print "</A>";
} else {
#print "<A HREF=\"http://www.mandarintools.com/cgi-bin/wordlook.pl?word=";
#print "<A HREF=\"http://epsilon3.georgetown.edu/~petersee/cgi-bin/wordlook.cgi?word=";
#print "<A HREF=\"http://www.monashwushu.com/cgi-bin/wordlook.pl?word=";
print "<A HREF=\"http://www.mandarintools.com/cgi-bin/wordlook.pl?word=$utf8hex";
#print "\%" . substr($fields[$big5], 0, 2) . "\%" . substr($fields[$big5], 2, 2);
print "\&searchtype=trad\&where=start\">";
print hex2big5($fields[$big5]);
print "</A>";
}
print "</font></TD>";
}
# Print radical/stroke
if ($display =~ m/\bradstroke\b/) {
($rad, $str) = $fields[$radstr] =~ m/^([0-9]+)\'?\.([0-9]+)$/;
print "<TD>";
if (-e "../animations/$fields[$uchar]a.gif") {
print "<A HREF=\"http://www.mandarintools.com/animations/$fields[$uchar]a.gif\">";
}
if ($printtype eq "utf8") {
print hex2utf8($rads[$rad-1]) , " + $str";
} else {
print "<IMG HEIGHT=30 WIDTH=16 SRC=\"ugif/$rads[$rad-1].gif\"> + $str";
}
if (-e "../animations/$fields[$uchar]a.gif") { print "</A>"; }
print "</TD>";
}
# Print Total strokes
if ($display =~ m/\bstrokes\b/) {
print "<TD>$fields[$strokes]</TD>";
}
# Print pinyin
if ($display =~ m/\bpinyin\b/) {
my(@pys) = split(/\s+/, $fields[$pinyin]);
print "<TD>";
foreach $audiblepy (@pys) {
$audiblepy = lc($audiblepy);
$displaypy = $audiblepy;
if ($tonemarks) {
$displaypy = &addTones($audiblepy);
}
#print "<A HREF=\"http://www.bell-labs.com/cgi-user/tts/mandarintts\?text=$audiblepy\&rate=slowest\&audio=.wav\">";
if (-e "../sounds/$audiblepy.aif") {
#print "<A HREF=\"https://research.microsoft.com/speech/tts/TTS.dll?TTS\?Text=" .
# "\%" . substr($fields[$gb], 0, 2) . "\%" . substr($fields[$gb], 2, 2) . "\">";
print "<A HREF=\"http://www.mandarintools.com/sounds/$audiblepy.aif\">";
print "$displaypy";
print "</A> ";
} else {
print "$displaypy ";
}
}
print "</TD>";
}
# Print English definition
if ($display =~ m/\benglish\b/) {
print "<TD>$fields[$english]</TD>";
}
# Print Cantonese
if ($display =~ m/\bcantonese\b/) {
print "<TD>\L$fields[$canton]\E</TD>";
}
# Print Variants
if ($display =~ m/\bvariants\b/) {
if ($fields[var] =~ s/^S (....)$/\1/) {
print "<TD>Simp.: ";
if ($printtype eq "utf8") {
print hex2utf8($fields[$var]);
} else {
print "<IMG HEIGHT=30 WIDTH=16 SRC=\"ugif/$fields[$var].gif\">";
}
print "</TD>";
} elsif ($fields[$var] =~ s/^T (....)$/\1/) {
print "<TD>Trad.: ";
if ($printtype eq "utf8") {
print hex2utf8($fields[$var]);
} else {
print "<IMG HEIGHT=30 WIDTH=16 SRC=\"ugif/$fields[$var].gif\">";
}
print "</TD>";
} else {
print "<TD> -- </TD>"; # Take up space
}
}
# Print Unicode Hex
if ($display =~ m/\bunicode\b/) {
print "<TD>$fields[$uchar]</TD>";
}
# Print GB Hex
if ($display =~ m/\bgb\b/) {
print "<TD>$fields[$gb]</TD>";
}
# Print Big5 Hex
if ($display =~ m/\bbig5\b/) {
print "<TD>$fields[$big5]</TD>";
}
# Print Big5 Hex
if ($display =~ m/\bparts\b/) {
print "<TD>";
if ($fields[$ids] eq "-" or length($fields[$ids]) % 3 != 0) {
print "-";
} else {
for ($idsIndex = 0; $idsIndex < length($fields[$ids]); $idsIndex+=3) {
$idschar = substr($fields[$ids], $idsIndex, 3);
$idshex = utf82hex($idschar);
next if oct("0x$idshex") >= 0x2ff0 and oct("0x$idshex") <= 0x2fff;
print "<A HREF=\"http://www.mandarintools.com/cgi-bin/charlook.pl?";
print "searchmode=$searchmode\&";
print "printtype=$printtype\&";
print "chartype=$chartype\&";
print "ordering=$ordering\&";
foreach $displayunit (split(/[^a-zA-Z]+/, $display)) {
print "display=$displayunit\&";
}
print "english=\&cantonese=\&pinyin=\&";
print "searchcomponent=Search+Component\&";
print "enctype=$enctype\&";
print "whatchar=$idschar";
print "\">";
print "\&\#x$idshex;";
print "</A>";
}
}
print "</TD>";
}
print "\n</TR>\n";
}
sub byfrequency { # procedure used to sort unicode characters by frequency
$charfreq{$a} <=> $charfreq{$b};
}
sub init_rads {
open(RAD, "purerads") or print "Unable to open purerads: $!\n";
while ($line = <RAD>) {
$line =~ m/^(....)/;
$urad = $1;
push @rads, $urad;
}
close(RAD);
}
sub touni {
my($escape) = @_;
$escape =~ s/\&\#(\d+);/$1/;
my($hexval) = sprintf("%X", $escape);
if (length($hexval) == 3) {
$hexval = "0" . $hexval;
} elsif (length($hexval) == 2) {
$hexval = "00" . $hexval;
}
return pack("CC", hex(substr($hexval, 0, 2)), hex(substr($hexval, 2, 2)));
}
sub addTones {
my($withnumbers) = shift;
my($i);
$withnumbers =~ s/ng(\d)\b/${1}ng/g;
$withnumbers =~ s/n(\d)\b/${1}n/g;
$withnumbers =~ s/ao(\d)\b/a${1}o/g;
$withnumbers =~ s/ai(\d)\b/a${1}i/g;
$withnumbers =~ s/ei(\d)\b/e${1}i/g;
$withnumbers =~ s/ou(\d)\b/o${1}u/g;
$withnumbers =~ s/r(\d)\b/${1}r/g;
@tonenums = ("a1", "a2", "a3", "a4", "a5", "e1", "e2", "e3", "e4", "e5",
"i1", "i2", "i3", "i4", "i5", "o1", "o2", "o3", "o4", "o5",
"uu1", "uu2", "uu3", "uu4", "uu5", "uu",
"u1", "u2", "u3", "u4", "u5",
"v1", "v2", "v3", "v4", "v5",
"u:1", "u:2", "u:3", "u:4", "u:5", "u:",
"A1", "A2", "A3", "A4", "A5", "E1", "E2", "E3", "E4", "E5",
"I1", "I2", "I3", "I4", "I5", "O1", "O2", "O3", "O4", "O5",
"U1", "U2", "U3", "U4", "U5",
"U:1", "U:2", "U:3", "U:4", "U:5", "U:"
);
@tonemarks = ('ā', 'á', 'ǎ', 'à', 'a',
'ē', 'é', 'ě', 'è', 'e',
'ī', 'í', 'ǐ', 'ì', 'i',
'ō', 'ó', 'ǒ', 'ò', 'o',
'ǖ', 'ǘ', 'ǚ', 'ǜ', 'ü', 'ü',
'ū', 'ú', 'ǔ', 'ù', 'u',
'ǖ', 'ǘ', 'ǚ', 'ǜ', 'ü', 'ü',
'ǖ', 'ǘ', 'ǚ', 'ǜ', 'ü', 'ü',
'Ā', 'Á', 'Ǎ', 'À', 'A',
'Ē', 'É', 'Ě', 'È', 'E',
'Ī', 'Í', 'Ǐ', 'Ì', 'I',
'Ō', 'Ó', 'Ǒ', 'Ò', 'O',
'Ū', 'Ú', 'Ǔ', 'Ù', 'U',
'Ǖ', 'Ǘ', 'Ǚ', 'Ǜ', 'Ü', 'Ü'
);
for ($i = 0; $i < scalar(@tonenums); $i++) {
$withnumbers =~ s/$tonenums[$i]/$tonemarks[$i]/ge;
}
$withnumbers =~ s/([aeioung])5/$1/g;
return $withnumbers;
}