KGRKJGETMRETU895U-589TY5MIGM5JGB5SDFESFREWTGR54TY
Server : Apache/2.4.62
System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64
User : www ( 80)
PHP Version : 8.3.8
Disable Function : NONE
Directory :  /domains/mandarintools/cgi-bin/data/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Current File : /domains/mandarintools/cgi-bin/data/nescore.pl
#!/usr/bin/perl

if ($#ARGV == -1) {
    open(LST, "test/files.lst") or die "Can't open listing of hand tagged files.\n";
    while (<LST>) {
	next if m/^\#/;
	if (m/\*/) {
	    ($filename) = (m/^([^.]+)\.txt/);
	    push @srcfiles, $filename;
	}
    }
} else {
    $srcfile = $ARGV[0];
    push @srcfiles, $srcfile;
}

undef $/;

foreach $srcfile (@srcfiles) {
    (@netags) = ();
    (@stdtags) = ();

    print STDERR "Processing $srcfile\n";
    `./chinesene.cgi test/$srcfile.txt`;

    open(NEE, "test/$srcfile.ne") or die "Unable to open machine tagged file, $srcfile\n";
    
    $netext = <NEE>;
    close(NEE);
    
    
    for ($i = 0, $j = 0; $i < length($netext); $i++, $j++) {
	if (substr($netext, $i, 1) eq "<") {
	    $tagstart = $j;
	    for (; substr($netext, $i, 1) ne ">"; $i++) {};
	    $i++;
	    $curtag = substr($netext, $tagstart, $i-$tagstart);
	    for (; substr($netext, $i, 1) ne "<"; $i++, $j++) {};
	    $tagend = $j;
	    $j--;
	    for (; substr($netext, $i, 1) ne ">"; $i++) {}
	    ($type) = ($curtag =~ m/TYPE\=\"(\w+)\"/);
	    push @netags, "$type\t$tagstart\t$tagend\n";
	} 
    }
    
    
    open(STD, "test/$srcfile.sgm") or die "Unable to open hand-tagged file, $srcfile\n";
    
    $stdtext = <STD>;
    close(STD);
    
    $j = 0;
    for ($i = 0; $i < length($stdtext); $i++, $j++) {
	if (substr($stdtext, $i, 1) eq "<") {
	    $tagstart = $j;
	    for (; substr($stdtext, $i, 1) ne ">"; $i++) {};
	    $i++;
	    $curtag = substr($stdtext, $tagstart, $i-$tagstart);
	    for (; substr($stdtext, $i, 1) ne "<"; $i++, $j++) {};
	    $tagend = $j;
	    $j--;
	    for (; substr($stdtext, $i, 1) ne ">"; $i++) {}
	    ($type) = ($curtag =~ m/TYPE\=\"(\w+)\"/);
	    push @stdtags, "$type\t$tagstart\t$tagend\n";
	} 
    }
    
    
# Compare the machine tagged text with the hand-tagged text
# Calculate RECALL
    $foundtags = 0;
    foreach $stdtag (@stdtags) {
	foreach $netag (@netags) {
	    if ($stdtag eq $netag) {
		$foundtags++;
		last;
	    }
	}
    }

    $totalstdtags += $#stdtags + 1;
    $totalfoundtags += $foundtags;
    print "$srcfile:\n";
    print "RECALL   :  ", 100 * $foundtags/@stdtags, "\n";
    
# Calculate PRECISION
    $correctne = 0;
    foreach $netag (@netags) {
	foreach $stdtag (@stdtags) {
	    if ($stdtag eq $netag) {
		$correctne++;
		last;
	    }
	}
    }
    
    $totalnetags += $#netags + 1;
    $totalcorrectne += $correctne;
    print "PRECISION:  ", 100 * $correctne/@netags, "\n";

# Can use the code below to print out the actual text of each entity
    open(SRC, "test/$srcfile.txt") or die "Can't open original text\n";
    $srctext = <SRC>;
    close(SRC);
    
    foreach $tag (@stdtags) {
	($type, $start, $end) = split(/\t/, $tag);
	$entity = substr($srctext, $start, $end - $start);
	$entity =~ s/\n//g;
#    print $entity , "\n";
    }

}

print "\nAll Files:\n";
print "Total Test Corpus Tags:    $totalstdtags\n";
print "Total Correct Corpus Tags: $totalfoundtags\n";
print "Total Machine Corpus Tags: $totalnetags\n";
print "RECALL   :  ", 100 * $totalfoundtags/$totalstdtags, "\n";
print "PRECISION:  ", 100 * $totalcorrectne/$totalnetags, "\n";



Anon7 - 2021