Created
June 27, 2013 00:50
-
-
Save c18t/5873152 to your computer and use it in GitHub Desktop.
crawler for twitter api v1.1. crawl_settings.pl で設定ファイル生成して crawl_insert.pl でDBに突っ込む
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/perl | |
use 5.014; | |
use warnings; | |
use utf8; | |
use lib qw(lib); | |
use autobox; | |
use autobox::dump; | |
use autobox::Core; | |
use YAML qw(LoadFile DumpFile); | |
use Config::Pit; | |
use Getopt::Long; | |
use Data::Lock qw(dlock); | |
dlock my $CHARSET = ($^O eq 'MSWin32' ? 'cp932' : 'utf8'); | |
binmode STDIN => ":encoding($CHARSET)"; | |
binmode STDOUT => ":encoding($CHARSET)"; | |
require "twitter_agent.pl"; | |
my ( $file, $help ) = ('') x 2; | |
my $result = GetOptions( | |
"f|file=s" => \$file, | |
"h|help|?" => \$help, | |
); | |
my $prof = shift; | |
warn <<"_HELP_" and exit if $help or !$prof; | |
Usage: $0 [-f [setting yaml file] -h [help]] (conf_name) | |
_HELP_ | |
my $utig = pit_get('utig.pl', require => { | |
consumer_key => '', | |
consumer_secret => '', | |
}); | |
my $config = pit_get("utig.pl.$prof"); | |
my $nt = twitter_agent($utig, $config); | |
pit_set("utig.pl.$prof", data => $config) if $nt->{config_updated}; | |
my $data = []; | |
if ($file ne '' and -f $file) { | |
$data = LoadFile($file); | |
goto ASK_MORE; | |
} | |
SETTING: print 'get user_timeline or favorites (u/f) [u]: '; chomp(my $method = <>); $method = $method eq 'f' ? 'favorites' : 'user_timeline'; | |
INPUT_NAME: print 'input target screen_name: '; chomp(my $screen_name = <>); goto INPUT_NAME if $screen_name eq ''; | |
$data->push({ method => $method, screen_name => $screen_name }); | |
ASK_MORE: print 'do you need to set more crawl settings? (y/n) [n]: '; chomp(my $y_or_n = <>); goto SETTING if $y_or_n =~ /^y/i; | |
my @screen_name; | |
my %lookup; | |
$data->for( sub { | |
my ($index, $object, $self) = @_; | |
@screen_name->push($object->{screen_name}) | |
if defined $object->{screen_name} && not defined $object->{user_id}; | |
$lookup{$object->{screen_name}} = $object->{user_id} | |
if defined $object->{screen_name} && defined $object->{user_id}; | |
} ); | |
if (scalar @screen_name) { | |
my $count = 0; | |
@screen_name->uniq->for( sub { | |
my ($index, $object, $self) = @_; | |
return if $index == 0; | |
if ($index % 100 == 0) { | |
my $res = $nt->lookup_users({ screen_name => [@screen_name[$count .. $index-1]] }); | |
$res->for( sub { $lookup{$_[1]{screen_name}} = $_[1]{id} } ); | |
$count = $index; | |
} | |
} ); | |
if ($#screen_name == 0 or $count != $#screen_name) { | |
my $res = $nt->lookup_users({ screen_name => [@screen_name[$count .. $#screen_name]] }); | |
$res->for( sub { $lookup{$_[1]{screen_name}} = $_[1]{id} } ); | |
} | |
$data->for( sub { | |
$_[1]{user_id} = $lookup{$_[1]{screen_name}}; | |
} ); | |
if ($file eq '') { | |
print 'crawl settings file is save as... [crawl_settings.yaml]: '; | |
chomp($file = <>); $file = 'crawl_settings.yaml' if $file eq ''; | |
} | |
DumpFile($file, $data); | |
say "crawl settings is saved as $file"; | |
} | |
exit; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
- method: user_timeline | |
screen_name: c18t | |
user_id: 219707702 | |
- method: favorites | |
screen_name: c18t | |
user_id: 219707702 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment