perl cgi script for Concordance

Printer-friendly versionSend by email
The following script does essentially two things: it searches 33586 lines of the Erdman text as one long string, correlates the line number associated with a hit with the source information extracted from the 33586 lines from another file, and prints that with each hit, plus 37 characters before and after. Kludgy as it is, this format enables search capabilities that exceed those of the hierarchy-bound, heavily marked up Blake Archive version ('search' at bottom).
#!/usr/local/bin/perl -w

# copyright 1998 Nelson Hilton (nhilton@english.uga.edu)
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details:
# http://www.gnu.org/licenses/gpl.txt



require "cgi-lib.pl";

use Benchmark;
$start = new Benchmark;  #Benchmark set

&ReadParse;

$file = "eeJul98numonlytext";  #3mb text file

#$searchterm = "$ARGV[0]";

#$searchterm = "(?i)$searchterm";  #case insensitive search

$searchterm=$in{'search'};
$case=$in{'case'};
$remoteaddr = $ENV{'REMOTE_ADDR'};

print "Content-type: text/html\n\n";
print "\n";
print "\n";
print "";
print "\n";
print "\n";

open (INFO, $file) or die "can't open $file";
@txtarray = ;
close (INFO);
chop @txtarray;
$txtarray = "@txtarray";

$file1 = "eeJul98idonlytext";
open (INFO1, $file1) or die "can't open eeidonlytxt3";
while ()
        {push (@eearray, $_);}
close (INFO1);



@data = split(/($searchterm)/i, $txtarray);
$no = 1;                        #counter
print "
"; for ($n=0; $n< $#data; $n++) # '$#' is last index of @data { if ($data[$n] =~ /$searchterm/i) { $firstpart = substr("$data[$n-1]"x37, -37,37); # -1 ADDED $thirdpart = substr("$data[$n+1]", 0,37); $line = $firstpart.$data[$n].$thirdpart; if ($firstpart =~ s/(.*)(\d{5})(.*)/$1\/$3/) {$id = $2; #if 5 digit id before search if ($thirdpart =~ s/(.*)(\d{5})(.*)/$1\/$3/){;}} elsif ($thirdpart =~ s/(.*)(\d{5})(.*)([\d{1,5}]?)/$1\/$3/) {$id = $2-1;} #if id after search, subtract 1 #bug here!! if ($firstpart =~ s/(

)//g) {;} #clean up for stray

, id digits if ($firstpart =~ s/(^\d{1,4})//) {;} if ($firstpart =~ s/(\d{5})/\//) {;} if ($thirdpart =~ s/(

)//g) {;} if ($thirdpart =~ s/(\d{5})/\//) {;} #if ($thirdpart =~ s/(<.*(^[>]))$/ /) {;} print "

$no. $firstpart $data[$n] $thirdpart   $eearray[$id] "; #$eearray[$id] is where we pick up the identifier $no++; } } print "
"; if ($n == 0){print "Sorry, no matches were found for $in{'search'}";} open (RECORD, '+>>/XXX/KWICsearchrecord'); $searchno = 0; $time = localtime(time); while () {if ($_) {$searchno++;}} { print RECORD "$searchno $searchterm $remoteaddr $time\n"; } print "
"; print"

eE KWICsearch no. $searchno --"; close (RECORD); $end = new Benchmark; print timestr(timediff($end, $start)), "

"; print "Please use following link to report expeditiously
"; print "bugs, typos, html lapses, &c. to
"; print 'mailto:nhilton@english.uga.edu
'; print "Please refer to \"KWICsearch $searchno\". Thanks!"; print "";

Published @ RC

January 2005