Skip to content

Instantly share code, notes, and snippets.

@ryan5500
Created August 21, 2009 11:33
Show Gist options
  • Save ryan5500/171885 to your computer and use it in GitHub Desktop.
Save ryan5500/171885 to your computer and use it in GitHub Desktop.
package main;
use strict;
use AnyEvent;
use MatchWatcher;
my $cv = AnyEvent->condvar;
my $t1 = MatchWatcher::start_wait(2, 10, "http://some.sports.site.com/game.html");
$cv->recv;
package MatchScraper;
use strict;
use warnings;
use LWP::Simple;
use HTML::TreeBuilder::XPath;
my $tree = HTML::TreeBuilder::XPath->new;
sub scrape {
my $url = shift;
my $html = get_html($url);
my $table = get_table($html);
my @raw_arr = get_match_info ($table);
my $format = format_info(¥@raw_arr);
$tree->delete;
return $format;
}
sub format_info {
my $raw_info = shift;
my @info = ();
for my $row (@$raw_info) {
my $time = $$row[1];
for my $column ([$$row[0], $$row[2]]) {
for my $ele (@$column) {
next if (ref $ele ne 'ARRAY');
for my $e (@$ele) {
if (scalar(@$e) == 3) {
push(@info, "$time " . $$e[0] ." -> " . $$e[2]);
} elsif (@$e == 2) {
push(@info, "$time " . $$e[1]);
}
}
}
}
}
return @info;
}
sub get_match_info {
my $match_table = shift;
my @info_table = ();
for my $tr ($match_table->content_list) {
next if (ref $tr ne 'HTML::Element');
my @column = ();
for my $td ($tr->content_list) {
next if (ref $td ne "HTML::Element");
my @a = $td->content_list;
#if time column
if ($a[0]->tag ne 'table') {
push(@column, $td->as_text);
next;
}
my @infos = ();
for my $table ($td->content_list) {
next if (ref $table ne "HTML::Element");
for my $tr2 ($table->content_list) {
next if (ref $tr2 ne "HTML::Element");
my @info_elements = ();
for my $td2 ($tr2->content_list) {
next if (ref $tr2 ne "HTML::Element" || ref $td2 ne "HTML::Element");
push(@info_elements, $td2->as_text);
}
push(@infos , ¥@info_elements);
}
}
push(@column, ¥@infos);
}
push(@info_table, ¥@column);
}
return @info_table;
}
sub get_table {
my $html = shift;
#get match info table
$tree->parse($html);
my $keys = $tree->findnodes('//img[@src=¥'http://i.yimg.jp/images/sports/soccer/common/icon/ycard.gif¥']');
my $key = $$keys[7];
my $table = $key->parent->parent->parent->parent->parent->parent;
return $table;
}
sub get_html {
my $url = shift;
my $data = get($url);
return $data;
}
1;
package MatchWatcher;
use strict;
use AnyEvent;
use AnyEvent::Run;
use MatchScraper;
my $watcher;
my $process;
sub start_wait {
my ($time, $interval, $url) = @_;
return AnyEvent->timer(
after => $time,
cb => sub {
warn 'start_wait';
$watcher = watch_match($interval, $url);
},
);
}
sub watch_match {
my ($interval, $url) = @_;
return AnyEvent->timer(
after => 0,
interval => $interval,
cb => sub {
warn 'watch_match';
$process = AnyEvent::Run->new(
cmd => sub {
my $hoge = MatchScraper::scrape($url);
warn $hoge;
warn 'run';
},
on_read => sub {
shift->push_read( line => sub {
warn 'read:', $_[1];
});
},
on_eof => sub {},
);
},
);
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment