Skip to content

Instantly share code, notes, and snippets.

@macros
Created June 23, 2011 22:23
Show Gist options
  • Save macros/1043780 to your computer and use it in GitHub Desktop.
Save macros/1043780 to your computer and use it in GitHub Desktop.
fetch_from_index
sub fetch_from_index {
my $rows = 10000;
my $offset = 0;
my $size = 0;
do {
my ($j, $docs);
my $req = HTTP::Request->new(GET => "http://10.8.2.16:8983/solr/select?");
$req->url->query_form("q"=>'url:wikiaphone',
"fl"=>"title,host",
"wt"=>"json","rows"=>"$rows","start"=>"$offset");
my $res = $ua->request($req);
if ( $res->content && ($j = JSON::DWIW::deserialize($res->content) ) && ($docs = $j->{"response"}->{"docs"}) ) {
$size = @$docs;
$offset = $offset + $rows;
process_docs($docs);
}
print "Finished $offset rows\n";
} while ( $size == $rows && $offset <= 300000 );
}
sub process_docs {
my $docs = shift;
foreach my $doc (@{$docs}) {
my $host = $doc->{host};
my $title = $doc->{title};
unless ( $title eq "Category:Content" || $title eq "MediaWiki:Content" || $title =~ /^Help:/) {
if ( exists $wikis{$host} ) {
push(@{$wikis{$host}}, $title);
} else {
$wikis{$host} = [];
push(@{$wikis{$host}}, $title) unless $title == "User:Moli.wikia";
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment