KGRKJGETMRETU895U-589TY5MIGM5JGB5SDFESFREWTGR54TY
Server : Apache/2.4.62
System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64
User : www ( 80)
PHP Version : 8.3.8
Disable Function : NONE
Directory :  /domains/mandarintools/download/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Current File : /domains/mandarintools/download/orderbypinyin.pl
#!/usr/bin/perl
$chinword = 0; $pinyin = 1; $english = 2;

open(ILL, "> illformd.txt");

open(B5, "cedict.gb") or die "Can't open cedict.gb";
while ($entry = <B5>) {
    next if $entry =~ m/^#/;
    chomp $entry;
    if ($entry =~ m/^(\S+) \[([^\]]+)\] \/(.*)\/$/) {
	@fields = ($entry =~ m/^(\S+) \[([^\]]+)\] (.*)$/);
	$b5entries{$fields[$chinword] . "��"} = 1;
	$b5entries{$fields[$chinword]} = 1;
        
    } else {
	print ILL $entry, "\n";
    }
}


open(CE, "out2.txt") or die "Can't open out2.txt\n";

$index = 0;
while ($entry = <CE>) {
    next if $entry =~ m/^#/;
    chomp $entry;
    @fields = ($entry =~ m/^(\S+) \[([^\]]+)\] (.*)$/);
    if ($fields[$pinyin] =~ m/de5$/ and $fields[$chinword] =~ m/��$/) {
	$fields[$pinyin] =~ s/ de5$//;
	$fields[$chinword] =~ s/��$//;
    }
    if ($fields[$chinword] =~ m/�a$/ and $fields[$pinyin] =~ m/di4$/ and $fields[$english] =~ m/ly\/$/) {
	$fields[$pinyin] =~ s/ di4$//;
	$fields[$chinword] =~ s/�a$//;
    }

    $entry = "$fields[$chinword] \[$fields[$pinyin]\] $fields[$english]";
    next if (exists $b5entries{$fields[$chinword]}); # skip words already in CEDICT
    if (exists $centries{$fields[$chinword]}) {
	@fields2 = ($centries{$fields[$chinword]} =~ m/^(\S+) \[([^\]]+)\] (.*)$/);
	if ($fields2[$pinyin] eq $fields[$pinyin]) {
	    # add english definition
	    $centries{$fields2[$chinword]} =~ s/\/\s*$//;
	    $centries{$fields2[$chinword]} .= $fields[$english];
	} else {
	    $centries{$fields[$chinword]} = $entry;
	}
    } else {
	$centries{$fields[$chinword]} = $entry;
    }
#    push @entries, $entry;
    $index++;
}
close(CE);

sub bypinyin {
    $apy = "";
    $a =~ m/ \[([^\]]+)\]/;
    $apy = $1;
    $apy =~ s/ //g;

    $bpy = "";
    $b =~ m/ \[([^\]]+)\]/;
    $bpy = $1;
    $bpy =~ s/ //g;

    #print "a $apy b $bpy\n";
    if (length($apy) == 0 or length($bpy) == 0) {
	print "Empty pinyin\n";
    }
    $apy cmp $bpy;
}

@sortedentries = sort bypinyin values(%centries);

foreach $entry (@sortedentries) {
    print $entry . "\n";
}







Anon7 - 2021