|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/download/ |
Upload File : |
#!/usr/bin/perl
$chinword = 0; $pinyin = 1; $english = 2;
open(ILL, "> illformd.txt");
open(B5, "cedict.gb") or die "Can't open cedict.gb";
while ($entry = <B5>) {
next if $entry =~ m/^#/;
chomp $entry;
if ($entry =~ m/^(\S+) \[([^\]]+)\] \/(.*)\/$/) {
@fields = ($entry =~ m/^(\S+) \[([^\]]+)\] (.*)$/);
$b5entries{$fields[$chinword] . "��"} = 1;
$b5entries{$fields[$chinword]} = 1;
} else {
print ILL $entry, "\n";
}
}
open(CE, "out2.txt") or die "Can't open out2.txt\n";
$index = 0;
while ($entry = <CE>) {
next if $entry =~ m/^#/;
chomp $entry;
@fields = ($entry =~ m/^(\S+) \[([^\]]+)\] (.*)$/);
if ($fields[$pinyin] =~ m/de5$/ and $fields[$chinword] =~ m/��$/) {
$fields[$pinyin] =~ s/ de5$//;
$fields[$chinword] =~ s/��$//;
}
if ($fields[$chinword] =~ m/�a$/ and $fields[$pinyin] =~ m/di4$/ and $fields[$english] =~ m/ly\/$/) {
$fields[$pinyin] =~ s/ di4$//;
$fields[$chinword] =~ s/�a$//;
}
$entry = "$fields[$chinword] \[$fields[$pinyin]\] $fields[$english]";
next if (exists $b5entries{$fields[$chinword]}); # skip words already in CEDICT
if (exists $centries{$fields[$chinword]}) {
@fields2 = ($centries{$fields[$chinword]} =~ m/^(\S+) \[([^\]]+)\] (.*)$/);
if ($fields2[$pinyin] eq $fields[$pinyin]) {
# add english definition
$centries{$fields2[$chinword]} =~ s/\/\s*$//;
$centries{$fields2[$chinword]} .= $fields[$english];
} else {
$centries{$fields[$chinword]} = $entry;
}
} else {
$centries{$fields[$chinword]} = $entry;
}
# push @entries, $entry;
$index++;
}
close(CE);
sub bypinyin {
$apy = "";
$a =~ m/ \[([^\]]+)\]/;
$apy = $1;
$apy =~ s/ //g;
$bpy = "";
$b =~ m/ \[([^\]]+)\]/;
$bpy = $1;
$bpy =~ s/ //g;
#print "a $apy b $bpy\n";
if (length($apy) == 0 or length($bpy) == 0) {
print "Empty pinyin\n";
}
$apy cmp $bpy;
}
@sortedentries = sort bypinyin values(%centries);
foreach $entry (@sortedentries) {
print $entry . "\n";
}