-
-
Save tokubass/1021405 to your computer and use it in GitHub Desktop.
Twitter API -> Read it later API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use 5.010; | |
use strict; | |
use warnings; | |
use Net::Twitter; | |
use URI::Find; | |
use Web::Scraper; | |
use LWP::UserAgent; | |
use YAML; | |
use Scalar::Util 'blessed'; | |
use Encode; | |
my $config_uri ='path/to/yaml'; | |
my $config = YAML::LoadFile($config_uri); | |
### yaml format (sample) | |
# --- | |
# lists: | |
# - list_id: 5157981 | |
# list_name: more-twitter-accounts | |
# since_id: ***************** # ... ex.) The latest number of the list | |
# user: twitter | |
# - list_id: 4497778 | |
# list_name: perl-people | |
# since_id: ***************** | |
# user: perlfoundation | |
# read_it_later: | |
# apikey: ********************* | |
# password: ******** | |
# username: ******** | |
my $nt = Net::Twitter->new( | |
traits => [qw/API::REST API::Lists/], | |
); | |
my $read_it_later = URI->new('https://readitlaterlist.com/v2/add'); | |
my $ua = LWP::UserAgent->new; | |
for my $list ( @{$config->{lists}} ) { | |
my $page = 1; | |
my $start_since_id = $list->{since_id}; | |
my $new_since_id = $start_since_id; | |
LOOP_PAGE: | |
while (1) { | |
my ($statuses, $success) = get_list_statuses($list, $page); | |
if ( not $success ) { | |
$new_since_id = $start_since_id; | |
} | |
last LOOP_PAGE unless @$statuses; | |
for my $status (reverse @$statuses) { | |
my @uris = find_uris_from($status->{text}); | |
for my $uri (@uris) { | |
next unless ( my $expand_uri = expand_uri($uri) ); | |
next unless ( my $html_title = get_html_title($expand_uri) ); | |
$read_it_later->query_form( | |
apikey => $config->{read_it_later}{apikey}, | |
username => $config->{read_it_later}{username}, | |
password => $config->{read_it_later}{password}, | |
url => $expand_uri, | |
title => sprintf "[TW]%s@%s / %s\n", | |
$list->{list_name}, | |
$status->{user}{screen_name}, | |
$html_title, | |
); | |
my $res; | |
eval { | |
$res = $ua->head("$read_it_later"); | |
}; | |
next if $@; | |
if ($res->is_success) { | |
printf "[TW]%s@%s / %s (%s)\n", | |
$list->{list_name}, | |
$status->{user}{screen_name}, | |
encode('utf-8', $html_title), | |
$expand_uri; | |
} | |
} | |
if ( $new_since_id < $status->{id} ) { | |
$new_since_id = $status->{id}; | |
} | |
} | |
$page++; | |
} | |
$list->{since_id} = $new_since_id; | |
} | |
YAML::DumpFile($config_uri, $config); | |
sub get_list_statuses { | |
my ($list, $page) = @_; | |
my $statuses; | |
my $success = 1; | |
eval { | |
$statuses = $nt->list_statuses({ | |
user => $list->{user}, | |
list_id => $list->{list_id}, | |
per_page => 200, | |
page => $page, | |
since_id => $list->{since_id}, | |
}); | |
}; | |
if (my $err = $@) { | |
die $@ unless blessed $err && $err->isa('Net::Twitter::Error'); | |
$success = undef; | |
} | |
return ($statuses, $success); | |
} | |
sub find_uris_from { | |
my $text = shift; | |
state @uris; @uris = (); | |
state $finder = URI::Find->new(sub{ | |
my ($uri, $orig_uri) = @_; | |
push @uris, $orig_uri; | |
return $orig_uri; | |
}); | |
$finder->find(\$text); | |
return @uris; | |
} | |
sub expand_uri { | |
my $uri = shift; | |
my $res = $ua->head($uri); | |
res->is_success ? return $res->request->uri | |
: return; | |
} | |
sub get_html_title { | |
my $uri = shift; | |
state $scraper = scraper { | |
process 'title', 'title' => 'TEXT'; | |
}; | |
my $html; | |
eval { | |
$html = $scraper->scrape(URI->new($uri)); | |
}; | |
return if $@; | |
return $html->{title} ? $html->{title} : "-- No title --"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment