.

KGRKJGETMRETU895U-589TY5MIGM5JGB5SDFESFREWTGR54TY
Server : Apache/2.4.62
System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64
User : www ( 80)
PHP Version : 8.3.8
Disable Function : NONE
Directory : /domains/mandarintools/cgi-bin/
Upload File :
Current File : /domains/mandarintools/cgi-bin/annotate.pl
#!/usr/bin/perl

#use lib '/chtools';
#use lib '/var/www/htdocs/eepeter/cgi-bin';

require "cgi-lib.pl";
require "segmenter.pl";


&ReadParse(*values);
$atype = $values{'atype'};


$ctext = $values{'ctext'};
$tagwords = $values{'words'};

if ($ctext =~ m#^\s*(http|gopher|ftp)://#) {
   #open(WEB, "./lynx -assume_charset=gb2312 -source $ctext |");
    #while (<WEB>) {
    # chomp;
    # push $sourcetext@srclines, $_, "\r\n";
   #}
   #close(<WEB>);
    $sourcetext = `./lynx -assume_charset=gb2312 -source $ctext`;
    $sourcetext = formatHTML($sourcetext);
    $sourcetext =~ s/\r//g;
    $sourcetext =~ s/\n\s\s+/\n\n/g;
    @srclines = split(/(\n)/, $sourcetext);
    
} else {
    $ctext .= "\r\n";
    if ($atype eq 'addmargin') {
	@srclines = split(/(\r\n(\r\n)+)/, $ctext, 80);
    } else {
	@srclines = split(/(\r\n)/, $ctext, 80);
    }
}

print "Content-type: text/html; charset=gb2312\n\n";
print "<HTML>\n";
print "<HEAD><TITLE>Chinese Annotation Results</TITLE>\n";

$anchor = 0;


$tmptagwords = $tagwords . "\r\n";
@tagwords = split(/\r\n/, $tmptagwords, 100);
foreach $tagword (@tagwords) {
    if ($tagword =~ m/\s/) {
	($chin, $py, $eng) = ($tagword =~ m/^(\S+)\s(\[[a-zA-Z0-5: ]+\])\s(.+)$/);
	&addsegword($chin);
    } else {
	&addsegword($tagword);
    }
}

$count = 0;
foreach $srcline (@srclines) {
    $seglines[$count++] = segmentline($srcline);
}
undef %cwords;


if ($atype eq "segment") {  # Just segment, no annotation
    print "</HEAD>\n<BODY>\n";  # put it hear to allow for javascript addition elsewhere
    foreach $segline (@seglines) {
	print $segline . "<BR>";
    }
} elsif ($atype eq "adddict") {
    print "</HEAD>\n<BODY>\n";
    open(CED, "cedict.gb") or die "Can't open dictionary";
    while (<CED>) {
        chomp;
	($chin, $py, $eng) = (m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
	$cdef{$chin} = "$chin\t$py\t$eng";
    }
    close(CED);
    
    $tagwords .= "\r\n";
    @tagwords = split(/\r\n/, $tagwords, 100);
    foreach $tagword (@tagwords) {
	#print "$tagword\n";
	if ($tagword =~ m/\s/) {
	    ($chin, $py, $eng) = ($tagword =~ m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
	    $cdef{$chin} = "$chin\n$py\n$eng";
	    $tagwords{$chin} = "$chin\n$py\n$eng";
	} else {
	    $tagwords{$tagword} = $cdef{$tagword};
	}
    }

    foreach $_ (@seglines) {
	s/\r\n/<BR>/;
	if (m/^\s+$/) {
	    print "<P>\n";
	    next;
	}
	
	@words = split(/(\s+)/);
	foreach $word (@words) {
	    if (exists($cdef{$word})) {
		if (exists($canchor{$word})) {
		print "<A HREF=\"\#$canchor{$word}\">$word</A>";
	    } else {
		$anchor++;
		$canchor{$word} = $anchor;
		$anchwords[$anchor] = $word;
		print "<A HREF=\"\#$canchor{$word}\">$word</A>";
	    }
	    } else {
		print "$word";
	    }
	}
    }
    print "<HR>\n";
    for ($i = 1; $i <= $anchor; $i++) {
	print "\n<A NAME=\"$i\">\n";
	print $cdef{$anchwords[$i]} . "<P>";
	
    }
    print "<BR>" x 25;

} elsif ($atype eq "js_adddict") {
    print <<JS;
<SCRIPT LANGUAGE=JAVASCRIPT>
<!-- 
// Status line display
 function sline(txt) {
  window.status=txt;
 }

//  Clear Status Line
  function clearstat() {
    window.status="";
  }
//-->
</SCRIPT>
</HEAD>
<BODY>
JS
    open(CED, "cedict.gb") or die "Can't open dictionary";
    while (<CED>) {
        chomp;
	s/\'/\\\'/g;
	s/\"/\\\"/g;
	($chin, $py, $eng) = (m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
	$cdef{$chin} = "$chin $py $eng";
    }
    close(CED);
    
    $tagwords .= "\r\n";
    @tagwords = split(/\r\n/, $tagwords, 100);
    foreach $tagword (@tagwords) {
	#print "$tagword\n";
	if ($tagword =~ m/\s/) {
	    ($chin, $py, $eng) = ($tagword =~ (m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/));
	    $cdef{$chin} = "$chin $py $eng";
	    $tagwords{$chin} = "$chin\n$py\n$eng";
	} else {
	    $tagwords{$tagword} = $cdef{$tagword};
	}
    }

    foreach $_ (@seglines) {
	s/\n/<BR>/;
	if (m/^\s+$/) {
	    print "<P>\n";
	    next;
	}
	
	@words = split(/(\s+)/);
	foreach $word (@words) {
	    if (exists($cdef{$word})) {
		if (exists($canchor{$word})) {
		    print "<A HREF=\"\#$canchor{$word}\" onMouseOver=\"sline(\'$cdef{$word}\'); return true\" onMouseOut=\"clearstat()\">$word</A>";
		} else {
		    $anchor++;
		    $canchor{$word} = $anchor;
		    $anchwords[$anchor] = $word;
		    print "<A HREF=\"\#$canchor{$word}\" onMouseOver=\"sline(\'$cdef{$word}\'); return true\" onMouseOut=\"clearstat()\">$word</A>";
#		print "<A HREF=\"\#$canchor{$word}\">$word</A>";
		}
	    } else {
		print "$word";
	    }
	}
    }
    print "<HR>\n";
    for ($i = 1; $i <= $anchor; $i++) {
	print "\n<A NAME=\"$i\">\n";
        $cdef{$anchwords[$i]} =~ s/\\\"/\"/g;
        $cdef{$anchwords[$i]} =~ s/\\\'/\'/g;
	print $cdef{$anchwords[$i]} . "<P>";
	
    }
    print "<BR>" x 25;

} elsif ($atype eq "topinyin") {
    open(CED, "cedict.gb") or die "Can't open dictionary";
    while (<CED>) {
	($chin, $py, $eng) = (m/^(\S+) \[([a-zA-Z0-5: ]+)\] (.+)$/);
	$py =~ s/\s//g;
	$cdef{$chin} = $py;
    }
    close(CED);
    
    $tagwords .= "\r\n";
    @tagwords = split(/\r\n/, $tagwords, 100);
    foreach $tagword (@tagwords) {
	#print "$tagword\n";
	if ($tagword =~ m/\s/) {
	    ($chin, $py, $eng) = ($tagword =~ m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
	    $cdef{$chin} = $py;
	    $tagwords{$chin} = "$chin\n$py\n$eng";
	} else {
	    $tagwords{$tagword} = $cdef{$tagword};
	}
    }

    open(GBPY, "gbpy.txt") or die "Can't open dictionary\n";
    while(<GBPY>) {
	chomp;
	my($gbchar, $py) = split;
	($fpy, $rest) = split(/\s/, $py);
	$gbpy{$gbchar} = $fpy;
    }
    close(GBPY);

    foreach $_ (@seglines) {
	s/\n/<BR>/;
	if (m/^\s+$/) {
	    print "<P>\n";
	    next;
	}
	
	@words = split(/(\s+)/);
	foreach $word (@words) {
	    if (exists($cdef{$word})) {
		print $cdef{$word};
	    } elsif (vec($word, 0, 8) > 127) {
		my($i);
		for ($i = 0; $i < length($word); $i+=2) {
		    print $gbpy{substr($word, $i, 2)};
		}
	    } else {
		print "$word";
	    }
	}
    }
} elsif ($atype eq "addpinyin") {
    open(CED, "cedict.gb") or die "Can't open dictionary";
    while (<CED>) {
	($chin, $py, $eng) = (m/^(\S+) \[([a-zA-Z0-5: ]+)\] (.+)$/);
	#$py =~ s/\s//g;
	$cdef{$chin} = $py;
    }
    close(CED);
    
    $tagwords .= "\r\n";
    @tagwords = split(/\r\n/, $tagwords, 100);
    foreach $tagword (@tagwords) {
	#print "$tagword\n";
	if ($tagword =~ m/\s/) {
	    ($chin, $py, $eng) = ($tagword =~ m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
	    $cdef{$chin} = $py;
	    $tagwords{$chin} = "$chin\n$py\n$eng";
	} else {
	    $tagwords{$tagword} = $cdef{$tagword};
	}
    }


    open(GBPY, "gbpy.txt") or die "Can't open dictionary\n";
    while(<GBPY>) {
	chomp;
	my($gbchar, $py) = split;
	($fpy, $rest) = split(/\s/, $py);
	$gbpy{$gbchar} = $fpy;
    }
    close(GBPY);

    foreach $_ (@seglines) {
	s/\n/<BR>/;
	if (m/^\s+$/) {
	    print "<P>\n";
	    next;
	}
	
	@words = split(/(\s+)/);
	foreach $word (@words) {
	    if (exists($cdef{$word})) {
		my(@pys) = split(/\s+/, $cdef{$word});
		my($i);
		for ($i = 0; $i < length($word); $i+=2) {
		    print substr($word, $i, 2);
		    print $pys[$i/2];
		}
	    } elsif (vec($word, 0, 8) > 127) {
		my($i);
		for ($i = 0; $i < length($word); $i+=2) {
		    print substr($word, $i, 2);
		    print $gbpy{substr($word, $i, 2)};
		}
	    } else {
		print "$word";
	    }
	}
    }
} elsif ($atype eq "addruby") {
    open(CED, "cedict.gb") or die "Can't open dictionary";
    while (<CED>) {
	($chin, $py, $eng) = (m/^(\S+) \[([a-zA-Z0-5: ]+)\] (.+)$/);
	#$py =~ s/\s//g;
	$cdef{$chin} = $py;
    }
    close(CED);
    
    $tagwords .= "\r\n";
    @tagwords = split(/\r\n/, $tagwords, 100);
    foreach $tagword (@tagwords) {
	#print "$tagword\n";
	if ($tagword =~ m/\s/) {
	    ($chin, $py, $eng) = ($tagword =~ m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
	    $cdef{$chin} = $py;
	    $tagwords{$chin} = "$chin\n$py\n$eng";
	} else {
	    $tagwords{$tagword} = $cdef{$tagword};
	}
    }

    open(GBPY, "gbpy.txt") or die "Can't open dictionary\n";
    while(<GBPY>) {
	chomp;
	my($gbchar, $py) = split;
	($fpy, $rest) = split(/\s/, $py);
	$gbpy{$gbchar} = $fpy;
    }
    close(GBPY);

    print "<style>\n{ ruby-align:center }\n</style>\n";
    print "<font size=+1>";

    foreach $_ (@seglines) {
	s/\n/<BR>/;
	if (m/^\s+$/) {
	    print "<P>\n";
	    next;
	}
	
	@words = split(/(\s+)/);
	foreach $word (@words) {
	    if (exists($cdef{$word}) and length{$word} != 2) {
		my(@pys) = split(/\s+/, $cdef{$word});
		my($i);
		for ($i = 0; $i < length($word); $i+=2) {
		    print "<ruby>" . substr($word, $i, 2);
		    print "<rt>" . $pys[$i/2] . "</ruby>";
		}
	    } elsif (vec($word, 0, 8) > 127) {
		my($i);
		for ($i = 0; $i < length($word); $i+=2) {
		    print "<ruby>" . substr($word, $i, 2);
		    print "<rt>" . $gbpy{substr($word, $i, 2)} . "</ruby>";
		}
	    } else {
		print "$word";
	    }
	}
    }

} elsif ($atype eq "addmargin") {


    print "</HEAD>\n<BODY>\n";
    open(CED, "cedict.gb") or die "Can't open dictionary";
    while (<CED>) {
        chomp;
	($chin, $py, $eng) = (m/^(\S+) (\[[a-zA-Z0-5: ]+\]) (.+)$/);
	$cdef{$chin} = "$chin\t$py\t$eng";
    }
    close(CED);
    
    $tagwords .= "\r\n";
    @tagwords = split(/\r\n/, $tagwords, 60);
    foreach $tagword (@tagwords) {
	if ($tagword =~ m/\s/) {
	    ($chin, $py, $eng) = ($tagword =~ m/^(\S+)\s(\[[a-zA-Z0-5: ]+\])\s(.+)$/);
	    $cdef{$chin} = "$chin\n$py\n$eng";
	    $tagwords{$chin} = "$chin\n$py\n$eng";
	} else {
	    $tagwords{$tagword} = $cdef{$tagword};
	}
    }

    print "<TABLE>\n";

    foreach $_ (@seglines) {
	print "<TR>\n<TD WIDTH=\"80\%\">\n";
	@words = split(/\s+/);
	@anchwords = ();
	foreach $word (@words) {
	    if (exists($tagwords{$word})) {
		if (exists($canchor{$word})) {
		    print "$word";
		} else {
		    # First occurence
		    $anchor++;
		    $canchor{$word} = $anchor;
		    $anchwords[$anchor] = $word;
		    print "<STRONG>$word</STRONG>";
		}
	    } else {
		print "$word";
	    }
	}

	# Print sidenotes
	print "</TD>\n<TD WIDTH=\"20\%\">\n";
	for ($i = 1; $i <= $anchor; $i++) {
	    print "<SMALL>" . $cdef{$anchwords[$i]} . "</SMALL><P>";
	}
	print "</TD></TR>\n";
	
    }


}

print "</TABLE>\n";

print "</BODY></HTML>";
exit(0);

sub formatHTML {
    my($htmltext) = @_;

    $htmltext =~ s/\s+/ /g;
    $htmltext =~ s/<BR>/\n/ig;
    $htmltext =~ s/<P(\s[^>]+)?>/\n/ig;
    $htmltext =~ s/<[^>]+>//g;
    
    return $htmltext;
}
Anon7 - 2021