Skip to content

Instantly share code, notes, and snippets.

@norry-gogo
Created May 29, 2011 09:09
Show Gist options
  • Save norry-gogo/997598 to your computer and use it in GitHub Desktop.
Save norry-gogo/997598 to your computer and use it in GitHub Desktop.
Twitter API -> Read it later API
#!/usr/bin/env perl
use 5.010;
use strict;
use warnings;
use Net::Twitter;
use URI::Find;
use Web::Scraper;
use LWP::UserAgent;
use YAML;
use Scalar::Util 'blessed';
use Encode;
my $config_uri ='path/to/yaml';
my $config = YAML::LoadFile($config_uri);
### yaml format (sample)
# ---
# lists:
# - list_id: 5157981
# list_name: more-twitter-accounts
# since_id: ***************** # ... ex.) The latest number of the list
# user: twitter
# - list_id: 4497778
# list_name: perl-people
# since_id: *****************
# user: perlfoundation
# read_it_later:
# apikey: *********************
# password: ********
# username: ********
my $nt = Net::Twitter->new(
traits => [qw/API::REST API::Lists/],
);
my $read_it_later = URI->new('https://readitlaterlist.com/v2/add');
my $ua = LWP::UserAgent->new;
for my $list ( @{$config->{lists}} ) {
my $page = 1;
my $start_since_id = $list->{since_id};
my $new_since_id = $start_since_id;
LOOP_PAGE:
while (1) {
my ($statuses, $success) = get_list_statuses($list, $page);
$new_since_id = $start_since_id unless $success;
last LOOP_PAGE unless @$statuses;
for my $status (reverse @$statuses) {
my @uris = find_uris_from($status->{text});
for my $uri (@uris) {
my $expand_uri = expand_uri($uri);
next unless $expand_uri;
my $html_title = get_html_title($expand_uri);
next unless $html_title;
$read_it_later->query_form(
apikey => $config->{read_it_later}{apikey},
username => $config->{read_it_later}{username},
password => $config->{read_it_later}{password},
url => $expand_uri,
title => sprintf "[TW]%s@%s / %s\n",
$list->{list_name},
$status->{user}{screen_name},
$html_title,
);
my $res;
eval {
$res = $ua->head("$read_it_later");
};
next if $@;
if ($res->is_success) {
printf "[TW]%s@%s / %s (%s)\n",
$list->{list_name},
$status->{user}{screen_name},
encode('utf-8', $html_title),
$expand_uri;
}
}
$new_since_id = $status->{id} if $new_since_id < $status->{id};
}
$page++;
}
$list->{since_id} = $new_since_id;
}
YAML::DumpFile($config_uri, $config);
sub get_list_statuses {
my ($list, $page) = @_;
my $statuses;
my $success = 1;
eval {
$statuses = $nt->list_statuses({
user => $list->{user},
list_id => $list->{list_id},
per_page => 200,
page => $page,
since_id => $list->{since_id}
});
};
if (my $err = $@) {
die $@ unless blessed $err && $err->isa('Net::Twitter::Error');
$success = undef;
}
return ($statuses, $success);
}
sub find_uris_from {
my $text = shift;
state @uris; @uris = ();
state $finder = URI::Find->new(sub{
my ($uri, $orig_uri) = @_;
push @uris, $orig_uri;
return $orig_uri;
});
$finder->find(\$text);
return @uris;
}
sub expand_uri {
my $uri = shift;
my $res = $ua->head($uri);
return unless $res->is_success;
return $res->request->uri;
}
sub get_html_title {
my $uri = shift;
state $scraper = scraper {
process 'title', 'title' => 'TEXT';
};
my $html;
eval {
$html = $scraper->scrape(URI->new($uri));
};
return if $@;
return "-- No title --" unless $html->{title};
return $html->{title};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment