#!/usr/local/bin/perl
open(C, "$ARGV[0]") || die "can't open candidate doc id list file:$ARGV[0]\n";
while (<C>) {
/([^\s]+)/;
$dict{$1}=1;
}
close(C);
while (<stdin>) {
if (/<DOC\s+([^\s>]+)/) {
$docID = $1;
} elsif (/<\/DOC>/) {
if (defined $dict{$docID}) {
print "<DOC $docID>\n";
print "$docText\n";
print "<\/DOC>\n";
}
$docText ="";
$docID ="";
} else {
$docText .= $_;
}
}
-
-
Save vovkkk/366dd0f5c97a93577c79 to your computer and use it in GitHub Desktop.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment