|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/cgi-bin/data/ |
Upload File : |
#!/usr/bin/perl
if ($#ARGV == -1) {
open(LST, "test/files.lst") or die "Can't open listing of hand tagged files.\n";
while (<LST>) {
next if m/^\#/;
if (m/\*/) {
($filename) = (m/^([^.]+)\.txt/);
push @srcfiles, $filename;
}
}
} else {
$srcfile = $ARGV[0];
push @srcfiles, $srcfile;
}
undef $/;
foreach $srcfile (@srcfiles) {
(@netags) = ();
(@stdtags) = ();
print STDERR "Processing $srcfile\n";
`./chinesene.cgi test/$srcfile.txt`;
open(NEE, "test/$srcfile.ne") or die "Unable to open machine tagged file, $srcfile\n";
$netext = <NEE>;
close(NEE);
for ($i = 0, $j = 0; $i < length($netext); $i++, $j++) {
if (substr($netext, $i, 1) eq "<") {
$tagstart = $j;
for (; substr($netext, $i, 1) ne ">"; $i++) {};
$i++;
$curtag = substr($netext, $tagstart, $i-$tagstart);
for (; substr($netext, $i, 1) ne "<"; $i++, $j++) {};
$tagend = $j;
$j--;
for (; substr($netext, $i, 1) ne ">"; $i++) {}
($type) = ($curtag =~ m/TYPE\=\"(\w+)\"/);
push @netags, "$type\t$tagstart\t$tagend\n";
}
}
open(STD, "test/$srcfile.sgm") or die "Unable to open hand-tagged file, $srcfile\n";
$stdtext = <STD>;
close(STD);
$j = 0;
for ($i = 0; $i < length($stdtext); $i++, $j++) {
if (substr($stdtext, $i, 1) eq "<") {
$tagstart = $j;
for (; substr($stdtext, $i, 1) ne ">"; $i++) {};
$i++;
$curtag = substr($stdtext, $tagstart, $i-$tagstart);
for (; substr($stdtext, $i, 1) ne "<"; $i++, $j++) {};
$tagend = $j;
$j--;
for (; substr($stdtext, $i, 1) ne ">"; $i++) {}
($type) = ($curtag =~ m/TYPE\=\"(\w+)\"/);
push @stdtags, "$type\t$tagstart\t$tagend\n";
}
}
# Compare the machine tagged text with the hand-tagged text
# Calculate RECALL
$foundtags = 0;
foreach $stdtag (@stdtags) {
foreach $netag (@netags) {
if ($stdtag eq $netag) {
$foundtags++;
last;
}
}
}
$totalstdtags += $#stdtags + 1;
$totalfoundtags += $foundtags;
print "$srcfile:\n";
print "RECALL : ", 100 * $foundtags/@stdtags, "\n";
# Calculate PRECISION
$correctne = 0;
foreach $netag (@netags) {
foreach $stdtag (@stdtags) {
if ($stdtag eq $netag) {
$correctne++;
last;
}
}
}
$totalnetags += $#netags + 1;
$totalcorrectne += $correctne;
print "PRECISION: ", 100 * $correctne/@netags, "\n";
# Can use the code below to print out the actual text of each entity
open(SRC, "test/$srcfile.txt") or die "Can't open original text\n";
$srctext = <SRC>;
close(SRC);
foreach $tag (@stdtags) {
($type, $start, $end) = split(/\t/, $tag);
$entity = substr($srctext, $start, $end - $start);
$entity =~ s/\n//g;
# print $entity , "\n";
}
}
print "\nAll Files:\n";
print "Total Test Corpus Tags: $totalstdtags\n";
print "Total Correct Corpus Tags: $totalfoundtags\n";
print "Total Machine Corpus Tags: $totalnetags\n";
print "RECALL : ", 100 * $totalfoundtags/$totalstdtags, "\n";
print "PRECISION: ", 100 * $totalcorrectne/$totalnetags, "\n";