Skip to content

Instantly share code, notes, and snippets.

@mala
Created September 3, 2009 05:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mala/180142 to your computer and use it in GitHub Desktop.
Save mala/180142 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
# http://todeskin.g.hatena.ne.jp/eigokun/20090902/1251903150
# TODO: cache LWP response
use strict;
use URI;
use JSON;
use LWP::UserAgent;
use Web::Scraper;
use Data::Dumper;
my $tag_base = 'http://b.hatena.ne.jp/t/%s';
my $entry_api = 'http://b.hatena.ne.jp/entry/jsonlite/';
my $max_page = 1;
my $threshold = 1;
my $tag = $ARGV[0] || die "$0 <tag>";
my $users = {};
my @links = get_links($tag);
warn Dumper \@links;
for my $url (@links) {
eval {
get_users($url->as_string, $tag, $users);
};
warn $@ if $@;
}
warn Dumper $users;
sub get_links {
my $tag = shift;
my @links;
my $scraper = scraper {
process '//div[@class="entry-body"]/h3/a', 'link[]' => '@href';
};
for my $page (1 .. $max_page) {
my $tag_url = URI->new(sprintf($tag_base, $tag));
# sort=hot&threshold=5&of=25
$tag_url->query_form(
sort => "hot",
threshold => $threshold,
of => 25 * ($page - 1),
);
warn sprintf("Fetch: %s\n", $tag_url);
my $result = $scraper->scrape($tag_url);
push @links, @{$result->{link}};
}
return @links;
}
sub get_content {
my $url = shift;
my $ua = LWP::UserAgent->new;
warn sprintf("Fetch: %s\n", $url);
my $req = $ua->get($url);
if ($req->is_success) {
return $req->content;
} else {
warn "Fetch: $url Fail.\n";
return
}
}
sub get_users {
my ($url, $tag, $result) = @_;
my $api = URI->new($entry_api);
$api->query_form(url => $url);
my $json = get_content($api->as_string);
return unless $json;
my $obj = from_json($json);
for my $bookmark (@{ $obj->{bookmarks} }) {
next unless (grep { $_ eq $tag } @{$bookmark->{tags}});
$result->{ $bookmark->{user} }++;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment