|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/ |
Upload File : |
#!/usr/bin/perl
open(LIST, "erik_wordlist.txt") or die "Can't open word listing";
while (<LIST>) {
($word, $offset, $length, $amount) = split;
$wordlist{$word} = "$offset $length $amount";
}
close(LIST);
# Open wordindex file for random access
open(INDEX, "erik_wordindex.txt") or die "Can't open word index file";
binmode(INDEX);
print "Enter query: ";
while (<STDIN>) {
my($wordcount) = 0;
last if m/^quit$/i or m/^q$/i;
$_ = lc;
@qwords = split;
undef %queryset;
foreach $qword (@qwords) {
if (defined($wordlist{$qword})) {
my($offset, $length, $amount) = split(/\s/, $wordlist{$qword});
seek INDEX, $offset, 0;
read INDEX, $docnos, $length-1;
@docnos = split(/,/, $docnos);
$prevno = -1;
foreach $docno (@docnos) {
if ($docno != $prevno) {
$queryset{$docno}++;
}
$prevno = $docno; # weed out repetition
}
}
$wordcount++;
}
# Print documents that matched each key word
$selection_index = 0;
foreach $key (sort {$a <=> $b} keys %queryset) {
if ($queryset{$key} >= $wordcount) {
open(DOCS, "erik_docindex.txt");
for ($i = 0; $i <= $key; $i++) {
$docinfo = <DOCS>;
}
close(DOCS);
($doctitle, $filename, $offset, $size) = split(/\t/, $docinfo);
$selections[$selection_index] = $docinfo;
$selection_index++;
print "$selection_index.: $doctitle\n";
}
}
print "\nView document \# (0 to query again):";
$selection = <STDIN>;
if ($selection != 0) {
($doctitle, $filename, $offset, $size) = split(/\t/, $selections[$selection-1]);
open(TXT, $filename);
seek(TXT, $offset, 0);
read(TXT, $doctxt, $size);
print $doctxt;
}
print "\n\nEnter query: ";
}