Created
December 18, 2019 02:24
-
-
Save doriantaylor/f89caee7badb1581fd8b9799477fdcae to your computer and use it in GitHub Desktop.
this is what was used to get @vgr's threadapalooza
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings FATAL => 'all'; | |
use Storable (); | |
use Math::BigInt (); | |
use JSON (); | |
use Path::Class (); | |
use Net::Twitter (); | |
use Try::Tiny; | |
# put your tokens in here | |
our %TOKENS = ( | |
consumer_key => $ENV{TWITTER_CONSUMER_KEY}, | |
consumer_secret => $ENV{TWITTER_CONSUMER_SECRET}, | |
); | |
my $state = 'twitar.state'; | |
my $t = Net::Twitter->new | |
(ssl => 1, traits => [qw(API::RESTv1_1 OAuth)], %TOKENS); | |
# log into the api if necessary | |
my $tokens = try { Storable::retrieve($state) } || []; | |
if (@$tokens) { | |
$t->access_token($tokens->[0]); | |
$t->access_token_secret($tokens->[1]); | |
} | |
else { | |
my $uri = $t->get_authorization_url; | |
warn "Visit the auth URI: $uri\nto retrieve a PIN and enter it here: "; | |
chomp(my $pin = <STDIN>); | |
my @tokens = $t->request_access_token(verifier => $pin); | |
Storable::store(\@tokens, $state); | |
} | |
# stop perl from choking on utf8 output | |
binmode \*STDOUT, ':utf8'; | |
binmode \*STDERR, ':utf8'; | |
# hardcode the start because we are cheap | |
my $start = '1205983999274840064'; | |
# get alllllll the tweets indiscriminately | |
my @vgr = fetch_since($start); | |
# prune out the thread | |
@vgr = get_thread($start, \@vgr); | |
# this will give us a structure of threads | |
my $wad = parse_vgr(\@vgr); | |
# this just prints to stdout so use tee or shell redirect or something | |
print JSON->new->canonical->pretty->encode($wad); | |
# note since /statuses/user_timeline maxes out at 3200, this will | |
# probably not work after a while | |
### SUBROUTINES | |
sub fetch_since { | |
my $start = shift; | |
my $max = undef; | |
my $first = try { | |
$t->show_status($start) | |
} catch { | |
warn $_; | |
undef; | |
}; | |
return unless $first; | |
my $uid = $first->{user}{id_str}; | |
my @out; | |
while ($start) { | |
my %p = ( | |
user_id => $uid, count => 200, include_rts => 0, | |
exclude_replies => 0, since_id => $start, | |
); | |
$p{max_id} = $max if $max; | |
my $tl; | |
until ($tl) { | |
$tl = try { $t->user_timeline(\%p) } catch { | |
Carp::croak($_) unless Scalar::Util::blessed($_) | |
and $_->isa('Net::Twitter::Error'); | |
if ($_->code >= 500) { | |
sleep 5; | |
} | |
else { | |
Carp::croak($_); | |
} | |
undef; | |
} | |
} | |
if ($tl && @$tl) { | |
my @tl = sort { | |
Math::BigInt->new($b->{id_str})->bcmp( | |
Math::BigInt->new($a->{id_str})) } @$tl; | |
# the *highest* ID of this request is the *lowest* id of the last | |
shift @tl if $max and $tl[0]{id_str} eq $max; | |
if (@tl) { | |
$max = Math::BigInt->new($tl[-1]{id_str})->bsub(1)->bstr; | |
#$max = $tl[-1]{id_str}; | |
warn $max; | |
warn scalar @tl; | |
push @out, @tl; | |
} | |
else { | |
undef $start; | |
} | |
} | |
else { | |
undef $start; | |
} | |
} | |
push @out, $first; | |
wantarray ? @out : \@out; | |
} | |
sub get_thread { | |
my $start = shift; | |
my @tweets = @{shift()}; | |
my %tmap; | |
my %rmap; | |
for my $t (@tweets) { | |
my $id = $t->{id_str}; | |
$tmap{$id} = $t; | |
if (my $rid = $t->{in_reply_to_status_id_str}) { | |
my $r = $rmap{$rid} ||= {}; | |
$r->{$id} = $id; | |
} | |
} | |
my $t = $tmap{$start} or return; | |
my @out; | |
while ($t) { | |
push @out, $t; | |
my $id = $t->{id_str}; | |
my ($rep) = sort { Math::BigInt->new($a)->bcmp | |
(Math::BigInt->new($b)) } keys %{$rmap{$id} || {}}; | |
$t = $rep ? $tmap{$rep} : undef; | |
} | |
wantarray ? @out : \@out; | |
} | |
sub parse_vgr { | |
my @thread = @{shift()}; | |
my %wad; | |
for my $t (@thread) { | |
my @urls; | |
if (my $e = $t->{entities}) { | |
if (ref $e eq 'HASH' and my $u = $e->{urls}) { | |
if (ref $u eq 'ARRAY' and @$u) { | |
@urls = grep { defined $_ and $_ ne '' } | |
map { $_->{expanded_url} } | |
grep { ref $_ eq 'HASH' } @$u; | |
} | |
} | |
} | |
if (@urls) { | |
print STDERR "$t->{text}\n"; | |
} | |
for my $url (@urls) { | |
warn $url; | |
next unless $url =~ m!^https://twitter\.com/[^/]+/status/(\d+)$!; | |
my $st = $1; | |
my @lolz = fetch_since($st); | |
@lolz = get_thread($st, \@lolz); | |
if (@lolz) { | |
my $uid = $lolz[0]{user}{id_str}; | |
$wad{$uid} = \@lolz; | |
} | |
} | |
} | |
return wantarray ? %wad : \%wad; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment