|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/cgi-bin/ |
Upload File : |
#!/usr/bin/perl -- # -*- perl -*-
use CGI;
$query = CGI::new();
$nocheck = 0;
if (defined($query->param('nocheck'))) {
$nocheck = 1;
}
$submissions = $query->param('submissions');
$submissions =~ s/\%(..)/chr(hex($1))/eg;
$submissions =~ s/\&\#(\d+);/chr($1)/eg;
$email = $query->param('email');
$email =~ s/\%(..)/chr(hex($1))/eg;
$chartype = $query->param('chartype');
#print "Content-type: text/plain\n\n";
#print $query->param(), "\n";
#print " nocheck " . $query->param('nocheck');
#print "name " . $query->param('name') . " email " . $query->param('email');
@newentries = split(/\r?\n/, $submissions);
if ($nocheck == 0) {
# Load in UTF-8 pinyin table
open(PY, "./uni8py.txt") or die "Can't open uni8py.txt";
while (<PY>) {
chomp;
next if m/^\s*\#/;
($char, $py) = split(/\t/);
(@pys) = split(/\s/, $py);
$char2py{$char} = {};
foreach $pystr (@pys) {
$char2py{$char}->{$pystr} = 1;
}
}
# Set some index mneumonics
$chinese = 0; $pinyin = 1; $english = 2;
# First do ill-formed check on new additions
$hasillformed = 0;
$illformed = "";
foreach $entry (@newentries) {
next if $entry =~ m/^\s*\#/ or $entry =~ m/^\s*$/;
$entry =~ s/\s*[\r\n]*$//;
$entry =~ s/^\s*//;
$entry =~ s/\s+/ /g;
#print "Checking $entry<BR>";
if ($entry =~ m/^(\S+) \[([a-z:]+[0-5](\s[a-z:]+[0-5])*)\]\s+\/(.*)\/$/) {
@fields = ($entry =~ m/^(\S+) \[([a-z:]+[0-5](?:\s[a-z:]+[0-5])*)\] \/(.*)\/$/);
# Pinyin check
my(@chars);
for ($i = 0; $i < length($fields[$chinese]); $i+=3) {
$chars[$i/3] = substr($fields[$chinese], $i, 3);
}
my(@pys) = split(/\s/, $fields[$pinyin]);
$hasbadpy = 0;
if ($#chars != $#pys) {
$illformed .= "# Character/PY count mismatch (" . scalar(@chars) . " characters/" .
scalar(@pys) . " pinyin syllables):\n# $entry\n\n";
$hasillformed = 1; $hasbadpy = 1;
next;
#print ILL $entry, "\n";
}
for ($j = 0; $j <= $#chars; $j++) {
if (!defined($char2py{$chars[$j]}->{$pys[$j]})) {
$hasbadpy = 1;
$hasillformed = 1;
$illformed .= "# Possible wrong pinyin in $entry:\n" .
"# $chars[$j]:$pys[$j]; Alternative pinyin: [" .
join(" ", keys %{$char2py{$chars[$j]}}) . "] \n\n";
}
}
$entryheads{$fields[0] . ' ' . $fields[1]} = 1;
#if ($hasbadpy == 0) {
# push @newentries, $entry;
#}
} else {
$hasillformed = 1;
$illformed .= "Entry Format error: $entry \n\n";
}
}
# Check if new entries duplicate any currently in dictionary
# Give option to skip or merge English defs.
open(U8, "cedict_ts.u8") or die "Can't open cedict_ts.u8";
while ($entry = <U8>) {
next if $entry =~ m/^\s*\#/;
next if $entry =~ m/^\s*$/;
$entry =~ s/[\r\n]*$//;
@fields = ($entry =~ m/^(\S+) (\S+) \[([a-z:]+[0-5](?:\s[a-z:]+[0-5])*)\] \/(.*)\/$/);
if (defined($entryheads{$fields[0] . ' ' . $fields[2]})) {
$hasillformed = 1;
$illformed .= "# Entry $fields[0] $fields[2] duplicates existing CEDICT entry: \n" .
"$fields[0] " . "[" . $fields[2] . "] /" . $fields[3] . "/\n\n";
} elsif (defined($entryheads{$fields[1] . ' ' . $fields[2]})) {
$hasillformed = 1;
$illformed .= "# Entry $fields[1] $fields[2] duplicates existing CEDICT entry: \n" .
"$fields[1] " . "[" . $fields[2] . "] /" . $fields[3] . "/\n\n";
}
}
#Have user correct ill-formed entries before moving on
if ($hasillformed == 1) {
print "Content-type: text/html\n\n";
print "<HTML>\n<HEAD>\n";
print"\n<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=utf-8\">\n";
print "<TITLE>Submission Errors</TITLE>\n";
print "</HEAD>\n<BODY>\n";
print "Submission has one or more entries with possible errors. " .
"Please check the error messages below, " .
"correct the entries and re-check or submit anyway if you think the entries are correct.<P>";
print "<strong>Possible problem entries:</strong><P>\n";
print "<pre>" . $illformed . "</pre>";
print "<P>";
print '<FORM METHOD=GET ACTION="http://www.mandarintools.com/cgi-bin/submit.pl">';
print '<input type=hidden name="name" value="' . $query->param('name') . '">';
print '<input type=hidden name="chartype" value="' . $query->param('chartype') . '">';
print '<input type=hidden name="email" value=' . $email . '>';
print '<input type=hidden name="comments" value="' . $query->param('comments') . '">';
print '<textarea NAME="submissions" ROWS=20 COLS=80 WRAP="off">';
print $submissions;
print '</textarea><br>';
print '<input TYPE=SUBMIT name=check VALUE="Check Again">';
print '<input TYPE=SUBMIT name=nocheck VALUE="Submit Anyway">';
print '</form>';
print " </BODY>\n</HTML>";
exit();
}
}
# sub bypinyin {
# $apy = "";
# $a =~ m/ \[([^\]]+)\] /;
# $apy = $1;
# $apy =~ s/ //g;
# $achn = "";
# $a =~ m/^(\S+)/;
# $achn = $1;
# $bpy = "";
# $b =~ m/ \[([^\]]+)\] /;
# $bpy = $1;
# $bpy =~ s/ //g;
# $bchn = "";
# $b =~ m/^(\S+)/;
# $bchn = $1;
# #print "a $apy b $bpy\n";
# if (length($apy) == 0 or length($bpy) == 0) {
# print "Empty pinyin\n";
# }
# $apy cmp $bpy or
# $achn cmp $bchn;
# }
# @sortedentries = sort bypinyin @centries;
if (scalar(@newentries) == 0) {
print "Content-type: text/html\n\n";
print "<HTML>\n<HEAD>\n";
print"\n<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=utf-8\">\n";
print "<TITLE>No Submission Received</TITLE>\n";
print "</HEAD>\n<BODY>\n";
print "No submissions found.";
print " </BODY>\n</HTML>";
exit;
}
open(NEW, ">> contributions.txt") or
print "Content-type: text/plain\n\n Unable to open contributions.txt: $!\n";
print NEW "\n#";
$submitter = $query->param('name');
if ($submitter ne "") {
print NEW " Name: $submitter ";
}
if ($email ne "") {
print NEW " Email: $email ";
}
print NEW " Date: " . localtime(time) . "\n";
if ($chartype ne "") {
print NEW "# CHARTYPE: $chartype\n";
}
$comments = $query->param('comments');
if ($comments ne "") {
$comments =~ s/\r?\n/ /g;
print NEW "# Comments: $comments \n";
}
if ($hasillformed == 1 and $nocheck == 1) {
#$illformed =~ s/\n/\n\#/g;
print NEW "# Illformed: $illformed\n";
}
foreach $entry (@newentries) {
print NEW $entry, "\n";
}
close(NEW);
print "Content-type: text/html\n\n";
print "<HTML>\n<HEAD>\n";
print"\n<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=utf-8\">\n";
print"\n<META HTTP-EQUIV=\"refresh\" CONTENT=\"5;url=http://www.mandarintools.com/submit.html\">\n";
print "<TITLE>Submission Received</TITLE>\n";
print "</HEAD>\n<BODY>\n";
print "Thank you for your submission. Please look for it in the next CEDICT release.<P>\n";
print "Returning to submit page...";
#print "Pwd: " . `pwd`;
print " </BODY>\n</HTML>";