Skip to content

Instantly share code, notes, and snippets.

@c18t
Created June 27, 2013 00:50
Show Gist options
  • Save c18t/5873152 to your computer and use it in GitHub Desktop.
Save c18t/5873152 to your computer and use it in GitHub Desktop.
crawler for twitter api v1.1. crawl_settings.pl で設定ファイル生成して crawl_insert.pl でDBに突っ込む
#!/usr/local/bin/perl
use 5.014;
use warnings;
use utf8;
use lib qw(lib);
use autobox;
use autobox::dump;
use autobox::Core;
use YAML qw(LoadFile DumpFile);
use Config::Pit;
use Getopt::Long;
use Data::Lock qw(dlock);
dlock my $CHARSET = ($^O eq 'MSWin32' ? 'cp932' : 'utf8');
binmode STDIN => ":encoding($CHARSET)";
binmode STDOUT => ":encoding($CHARSET)";
require "twitter_agent.pl";
my ( $file, $help ) = ('') x 2;
my $result = GetOptions(
"f|file=s" => \$file,
"h|help|?" => \$help,
);
my $prof = shift;
warn <<"_HELP_" and exit if $help or !$prof;
Usage: $0 [-f [setting yaml file] -h [help]] (conf_name)
_HELP_
my $utig = pit_get('utig.pl', require => {
consumer_key => '',
consumer_secret => '',
});
my $config = pit_get("utig.pl.$prof");
my $nt = twitter_agent($utig, $config);
pit_set("utig.pl.$prof", data => $config) if $nt->{config_updated};
my $data = [];
if ($file ne '' and -f $file) {
$data = LoadFile($file);
goto ASK_MORE;
}
SETTING: print 'get user_timeline or favorites (u/f) [u]: '; chomp(my $method = <>); $method = $method eq 'f' ? 'favorites' : 'user_timeline';
INPUT_NAME: print 'input target screen_name: '; chomp(my $screen_name = <>); goto INPUT_NAME if $screen_name eq '';
$data->push({ method => $method, screen_name => $screen_name });
ASK_MORE: print 'do you need to set more crawl settings? (y/n) [n]: '; chomp(my $y_or_n = <>); goto SETTING if $y_or_n =~ /^y/i;
my @screen_name;
my %lookup;
$data->for( sub {
my ($index, $object, $self) = @_;
@screen_name->push($object->{screen_name})
if defined $object->{screen_name} && not defined $object->{user_id};
$lookup{$object->{screen_name}} = $object->{user_id}
if defined $object->{screen_name} && defined $object->{user_id};
} );
if (scalar @screen_name) {
my $count = 0;
@screen_name->uniq->for( sub {
my ($index, $object, $self) = @_;
return if $index == 0;
if ($index % 100 == 0) {
my $res = $nt->lookup_users({ screen_name => [@screen_name[$count .. $index-1]] });
$res->for( sub { $lookup{$_[1]{screen_name}} = $_[1]{id} } );
$count = $index;
}
} );
if ($#screen_name == 0 or $count != $#screen_name) {
my $res = $nt->lookup_users({ screen_name => [@screen_name[$count .. $#screen_name]] });
$res->for( sub { $lookup{$_[1]{screen_name}} = $_[1]{id} } );
}
$data->for( sub {
$_[1]{user_id} = $lookup{$_[1]{screen_name}};
} );
if ($file eq '') {
print 'crawl settings file is save as... [crawl_settings.yaml]: ';
chomp($file = <>); $file = 'crawl_settings.yaml' if $file eq '';
}
DumpFile($file, $data);
say "crawl settings is saved as $file";
}
exit;
---
- method: user_timeline
screen_name: c18t
user_id: 219707702
- method: favorites
screen_name: c18t
user_id: 219707702
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment