|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/cgi-bin/ |
Upload File : |
#!/usr/bin/perl
#use lib '/chtools';
#use lib '/var/www/htdocs/eepeter/cgi-bin';
require "cgi-lib.pl";
require "segmenter.pl";
&ReadParse(*values);
$atype = $values{'atype'};
$ctext = $values{'ctext'};
$tagwords = $values{'words'};
if ($ctext =~ m#^\s*(http|gopher|ftp)://#) {
#open(WEB, "./lynx -assume_charset=gb2312 -source $ctext |");
#while (<WEB>) {
# chomp;
# push $sourcetext@srclines, $_, "\r\n";
#}
#close(<WEB>);
$sourcetext = `./lynx -assume_charset=gb2312 -source $ctext`;
$sourcetext = formatHTML($sourcetext);
$sourcetext =~ s/\r//g;
$sourcetext =~ s/\n\s\s+/\n\n/g;
@srclines = split(/(\n)/, $sourcetext);
} else {
$ctext .= "\r\n";
if ($atype eq 'addmargin') {
@srclines = split(/(\r\n(\r\n)+)/, $ctext, 80);
} else {
@srclines = split(/(\r\n)/, $ctext, 80);
}
}
print "Content-type: text/html; charset=gb2312\n\n";
print "<HTML>\n";
print "<HEAD><TITLE>Chinese Annotation Results</TITLE>\n";
$anchor = 0;
$tmptagwords = $tagwords . "\r\n";
@tagwords = split(/\r\n/, $tmptagwords, 100);
foreach $tagword (@tagwords) {
if ($tagword =~ m/\s/) {
($chin, $py, $eng) = ($tagword =~ m/^(\S+)\s(\[[a-zA-Z0-5: ]+\])\s(.+)$/);
&addsegword($chin);
} else {
&addsegword($tagword);
}
}
$count = 0;
foreach $srcline (@srclines) {
$seglines[$count++] = segmentline($srcline);
}
undef %cwords;
if ($atype eq "segment") { # Just segment, no annotation
print "</HEAD>\n<BODY>\n"; # put it hear to allow for javascript addition elsewhere
foreach $segline (@seglines) {
print $segline . "<BR>";
}
} elsif ($atype eq "adddict") {
print "</HEAD>\n<BODY>\n";
open(CED, "cedict.gb") or die "Can't open dictionary";
while (<CED>) {
chomp;
($chin, $py, $eng) = (m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
$cdef{$chin} = "$chin\t$py\t$eng";
}
close(CED);
$tagwords .= "\r\n";
@tagwords = split(/\r\n/, $tagwords, 100);
foreach $tagword (@tagwords) {
#print "$tagword\n";
if ($tagword =~ m/\s/) {
($chin, $py, $eng) = ($tagword =~ m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
$cdef{$chin} = "$chin\n$py\n$eng";
$tagwords{$chin} = "$chin\n$py\n$eng";
} else {
$tagwords{$tagword} = $cdef{$tagword};
}
}
foreach $_ (@seglines) {
s/\r\n/<BR>/;
if (m/^\s+$/) {
print "<P>\n";
next;
}
@words = split(/(\s+)/);
foreach $word (@words) {
if (exists($cdef{$word})) {
if (exists($canchor{$word})) {
print "<A HREF=\"\#$canchor{$word}\">$word</A>";
} else {
$anchor++;
$canchor{$word} = $anchor;
$anchwords[$anchor] = $word;
print "<A HREF=\"\#$canchor{$word}\">$word</A>";
}
} else {
print "$word";
}
}
}
print "<HR>\n";
for ($i = 1; $i <= $anchor; $i++) {
print "\n<A NAME=\"$i\">\n";
print $cdef{$anchwords[$i]} . "<P>";
}
print "<BR>" x 25;
} elsif ($atype eq "js_adddict") {
print <<JS;
<SCRIPT LANGUAGE=JAVASCRIPT>
<!--
// Status line display
function sline(txt) {
window.status=txt;
}
// Clear Status Line
function clearstat() {
window.status="";
}
//-->
</SCRIPT>
</HEAD>
<BODY>
JS
open(CED, "cedict.gb") or die "Can't open dictionary";
while (<CED>) {
chomp;
s/\'/\\\'/g;
s/\"/\\\"/g;
($chin, $py, $eng) = (m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
$cdef{$chin} = "$chin $py $eng";
}
close(CED);
$tagwords .= "\r\n";
@tagwords = split(/\r\n/, $tagwords, 100);
foreach $tagword (@tagwords) {
#print "$tagword\n";
if ($tagword =~ m/\s/) {
($chin, $py, $eng) = ($tagword =~ (m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/));
$cdef{$chin} = "$chin $py $eng";
$tagwords{$chin} = "$chin\n$py\n$eng";
} else {
$tagwords{$tagword} = $cdef{$tagword};
}
}
foreach $_ (@seglines) {
s/\n/<BR>/;
if (m/^\s+$/) {
print "<P>\n";
next;
}
@words = split(/(\s+)/);
foreach $word (@words) {
if (exists($cdef{$word})) {
if (exists($canchor{$word})) {
print "<A HREF=\"\#$canchor{$word}\" onMouseOver=\"sline(\'$cdef{$word}\'); return true\" onMouseOut=\"clearstat()\">$word</A>";
} else {
$anchor++;
$canchor{$word} = $anchor;
$anchwords[$anchor] = $word;
print "<A HREF=\"\#$canchor{$word}\" onMouseOver=\"sline(\'$cdef{$word}\'); return true\" onMouseOut=\"clearstat()\">$word</A>";
# print "<A HREF=\"\#$canchor{$word}\">$word</A>";
}
} else {
print "$word";
}
}
}
print "<HR>\n";
for ($i = 1; $i <= $anchor; $i++) {
print "\n<A NAME=\"$i\">\n";
$cdef{$anchwords[$i]} =~ s/\\\"/\"/g;
$cdef{$anchwords[$i]} =~ s/\\\'/\'/g;
print $cdef{$anchwords[$i]} . "<P>";
}
print "<BR>" x 25;
} elsif ($atype eq "topinyin") {
open(CED, "cedict.gb") or die "Can't open dictionary";
while (<CED>) {
($chin, $py, $eng) = (m/^(\S+) \[([a-zA-Z0-5: ]+)\] (.+)$/);
$py =~ s/\s//g;
$cdef{$chin} = $py;
}
close(CED);
$tagwords .= "\r\n";
@tagwords = split(/\r\n/, $tagwords, 100);
foreach $tagword (@tagwords) {
#print "$tagword\n";
if ($tagword =~ m/\s/) {
($chin, $py, $eng) = ($tagword =~ m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
$cdef{$chin} = $py;
$tagwords{$chin} = "$chin\n$py\n$eng";
} else {
$tagwords{$tagword} = $cdef{$tagword};
}
}
open(GBPY, "gbpy.txt") or die "Can't open dictionary\n";
while(<GBPY>) {
chomp;
my($gbchar, $py) = split;
($fpy, $rest) = split(/\s/, $py);
$gbpy{$gbchar} = $fpy;
}
close(GBPY);
foreach $_ (@seglines) {
s/\n/<BR>/;
if (m/^\s+$/) {
print "<P>\n";
next;
}
@words = split(/(\s+)/);
foreach $word (@words) {
if (exists($cdef{$word})) {
print $cdef{$word};
} elsif (vec($word, 0, 8) > 127) {
my($i);
for ($i = 0; $i < length($word); $i+=2) {
print $gbpy{substr($word, $i, 2)};
}
} else {
print "$word";
}
}
}
} elsif ($atype eq "addpinyin") {
open(CED, "cedict.gb") or die "Can't open dictionary";
while (<CED>) {
($chin, $py, $eng) = (m/^(\S+) \[([a-zA-Z0-5: ]+)\] (.+)$/);
#$py =~ s/\s//g;
$cdef{$chin} = $py;
}
close(CED);
$tagwords .= "\r\n";
@tagwords = split(/\r\n/, $tagwords, 100);
foreach $tagword (@tagwords) {
#print "$tagword\n";
if ($tagword =~ m/\s/) {
($chin, $py, $eng) = ($tagword =~ m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
$cdef{$chin} = $py;
$tagwords{$chin} = "$chin\n$py\n$eng";
} else {
$tagwords{$tagword} = $cdef{$tagword};
}
}
open(GBPY, "gbpy.txt") or die "Can't open dictionary\n";
while(<GBPY>) {
chomp;
my($gbchar, $py) = split;
($fpy, $rest) = split(/\s/, $py);
$gbpy{$gbchar} = $fpy;
}
close(GBPY);
foreach $_ (@seglines) {
s/\n/<BR>/;
if (m/^\s+$/) {
print "<P>\n";
next;
}
@words = split(/(\s+)/);
foreach $word (@words) {
if (exists($cdef{$word})) {
my(@pys) = split(/\s+/, $cdef{$word});
my($i);
for ($i = 0; $i < length($word); $i+=2) {
print substr($word, $i, 2);
print $pys[$i/2];
}
} elsif (vec($word, 0, 8) > 127) {
my($i);
for ($i = 0; $i < length($word); $i+=2) {
print substr($word, $i, 2);
print $gbpy{substr($word, $i, 2)};
}
} else {
print "$word";
}
}
}
} elsif ($atype eq "addruby") {
open(CED, "cedict.gb") or die "Can't open dictionary";
while (<CED>) {
($chin, $py, $eng) = (m/^(\S+) \[([a-zA-Z0-5: ]+)\] (.+)$/);
#$py =~ s/\s//g;
$cdef{$chin} = $py;
}
close(CED);
$tagwords .= "\r\n";
@tagwords = split(/\r\n/, $tagwords, 100);
foreach $tagword (@tagwords) {
#print "$tagword\n";
if ($tagword =~ m/\s/) {
($chin, $py, $eng) = ($tagword =~ m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
$cdef{$chin} = $py;
$tagwords{$chin} = "$chin\n$py\n$eng";
} else {
$tagwords{$tagword} = $cdef{$tagword};
}
}
open(GBPY, "gbpy.txt") or die "Can't open dictionary\n";
while(<GBPY>) {
chomp;
my($gbchar, $py) = split;
($fpy, $rest) = split(/\s/, $py);
$gbpy{$gbchar} = $fpy;
}
close(GBPY);
print "<style>\n{ ruby-align:center }\n</style>\n";
print "<font size=+1>";
foreach $_ (@seglines) {
s/\n/<BR>/;
if (m/^\s+$/) {
print "<P>\n";
next;
}
@words = split(/(\s+)/);
foreach $word (@words) {
if (exists($cdef{$word}) and length{$word} != 2) {
my(@pys) = split(/\s+/, $cdef{$word});
my($i);
for ($i = 0; $i < length($word); $i+=2) {
print "<ruby>" . substr($word, $i, 2);
print "<rt>" . $pys[$i/2] . "</ruby>";
}
} elsif (vec($word, 0, 8) > 127) {
my($i);
for ($i = 0; $i < length($word); $i+=2) {
print "<ruby>" . substr($word, $i, 2);
print "<rt>" . $gbpy{substr($word, $i, 2)} . "</ruby>";
}
} else {
print "$word";
}
}
}
} elsif ($atype eq "addmargin") {
print "</HEAD>\n<BODY>\n";
open(CED, "cedict.gb") or die "Can't open dictionary";
while (<CED>) {
chomp;
($chin, $py, $eng) = (m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
$cdef{$chin} = "$chin\t$py\t$eng";
}
close(CED);
$tagwords .= "\r\n";
@tagwords = split(/\r\n/, $tagwords, 60);
foreach $tagword (@tagwords) {
if ($tagword =~ m/\s/) {
($chin, $py, $eng) = ($tagword =~ m/^(\S+)\s(\[[a-zA-Z0-5: ]+\])\s(.+)$/);
$cdef{$chin} = "$chin\n$py\n$eng";
$tagwords{$chin} = "$chin\n$py\n$eng";
} else {
$tagwords{$tagword} = $cdef{$tagword};
}
}
print "<TABLE>\n";
foreach $_ (@seglines) {
print "<TR>\n<TD WIDTH=\"80\%\">\n";
@words = split(/\s+/);
@anchwords = ();
foreach $word (@words) {
if (exists($tagwords{$word})) {
if (exists($canchor{$word})) {
print "$word";
} else {
# First occurence
$anchor++;
$canchor{$word} = $anchor;
$anchwords[$anchor] = $word;
print "<STRONG>$word</STRONG>";
}
} else {
print "$word";
}
}
# Print sidenotes
print "</TD>\n<TD WIDTH=\"20\%\">\n";
for ($i = 1; $i <= $anchor; $i++) {
print "<SMALL>" . $cdef{$anchwords[$i]} . "</SMALL><P>";
}
print "</TD></TR>\n";
}
}
print "</TABLE>\n";
print "</BODY></HTML>";
exit(0);
sub formatHTML {
my($htmltext) = @_;
$htmltext =~ s/\s+/ /g;
$htmltext =~ s/<BR>/\n/ig;
$htmltext =~ s/<P(\s[^>]+)?>/\n/ig;
$htmltext =~ s/<[^>]+>//g;
return $htmltext;
}