Skip to content

Instantly share code, notes, and snippets.

@doriantaylor
Created December 18, 2019 02:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save doriantaylor/f89caee7badb1581fd8b9799477fdcae to your computer and use it in GitHub Desktop.
Save doriantaylor/f89caee7badb1581fd8b9799477fdcae to your computer and use it in GitHub Desktop.
this is what was used to get @vgr's threadapalooza
#!/usr/bin/perl
use strict;
use warnings FATAL => 'all';
use Storable ();
use Math::BigInt ();
use JSON ();
use Path::Class ();
use Net::Twitter ();
use Try::Tiny;
# put your tokens in here
our %TOKENS = (
consumer_key => $ENV{TWITTER_CONSUMER_KEY},
consumer_secret => $ENV{TWITTER_CONSUMER_SECRET},
);
my $state = 'twitar.state';
my $t = Net::Twitter->new
(ssl => 1, traits => [qw(API::RESTv1_1 OAuth)], %TOKENS);
# log into the api if necessary
my $tokens = try { Storable::retrieve($state) } || [];
if (@$tokens) {
$t->access_token($tokens->[0]);
$t->access_token_secret($tokens->[1]);
}
else {
my $uri = $t->get_authorization_url;
warn "Visit the auth URI: $uri\nto retrieve a PIN and enter it here: ";
chomp(my $pin = <STDIN>);
my @tokens = $t->request_access_token(verifier => $pin);
Storable::store(\@tokens, $state);
}
# stop perl from choking on utf8 output
binmode \*STDOUT, ':utf8';
binmode \*STDERR, ':utf8';
# hardcode the start because we are cheap
my $start = '1205983999274840064';
# get alllllll the tweets indiscriminately
my @vgr = fetch_since($start);
# prune out the thread
@vgr = get_thread($start, \@vgr);
# this will give us a structure of threads
my $wad = parse_vgr(\@vgr);
# this just prints to stdout so use tee or shell redirect or something
print JSON->new->canonical->pretty->encode($wad);
# note since /statuses/user_timeline maxes out at 3200, this will
# probably not work after a while
### SUBROUTINES
sub fetch_since {
my $start = shift;
my $max = undef;
my $first = try {
$t->show_status($start)
} catch {
warn $_;
undef;
};
return unless $first;
my $uid = $first->{user}{id_str};
my @out;
while ($start) {
my %p = (
user_id => $uid, count => 200, include_rts => 0,
exclude_replies => 0, since_id => $start,
);
$p{max_id} = $max if $max;
my $tl;
until ($tl) {
$tl = try { $t->user_timeline(\%p) } catch {
Carp::croak($_) unless Scalar::Util::blessed($_)
and $_->isa('Net::Twitter::Error');
if ($_->code >= 500) {
sleep 5;
}
else {
Carp::croak($_);
}
undef;
}
}
if ($tl && @$tl) {
my @tl = sort {
Math::BigInt->new($b->{id_str})->bcmp(
Math::BigInt->new($a->{id_str})) } @$tl;
# the *highest* ID of this request is the *lowest* id of the last
shift @tl if $max and $tl[0]{id_str} eq $max;
if (@tl) {
$max = Math::BigInt->new($tl[-1]{id_str})->bsub(1)->bstr;
#$max = $tl[-1]{id_str};
warn $max;
warn scalar @tl;
push @out, @tl;
}
else {
undef $start;
}
}
else {
undef $start;
}
}
push @out, $first;
wantarray ? @out : \@out;
}
sub get_thread {
my $start = shift;
my @tweets = @{shift()};
my %tmap;
my %rmap;
for my $t (@tweets) {
my $id = $t->{id_str};
$tmap{$id} = $t;
if (my $rid = $t->{in_reply_to_status_id_str}) {
my $r = $rmap{$rid} ||= {};
$r->{$id} = $id;
}
}
my $t = $tmap{$start} or return;
my @out;
while ($t) {
push @out, $t;
my $id = $t->{id_str};
my ($rep) = sort { Math::BigInt->new($a)->bcmp
(Math::BigInt->new($b)) } keys %{$rmap{$id} || {}};
$t = $rep ? $tmap{$rep} : undef;
}
wantarray ? @out : \@out;
}
sub parse_vgr {
my @thread = @{shift()};
my %wad;
for my $t (@thread) {
my @urls;
if (my $e = $t->{entities}) {
if (ref $e eq 'HASH' and my $u = $e->{urls}) {
if (ref $u eq 'ARRAY' and @$u) {
@urls = grep { defined $_ and $_ ne '' }
map { $_->{expanded_url} }
grep { ref $_ eq 'HASH' } @$u;
}
}
}
if (@urls) {
print STDERR "$t->{text}\n";
}
for my $url (@urls) {
warn $url;
next unless $url =~ m!^https://twitter\.com/[^/]+/status/(\d+)$!;
my $st = $1;
my @lolz = fetch_since($st);
@lolz = get_thread($st, \@lolz);
if (@lolz) {
my $uid = $lolz[0]{user}{id_str};
$wad{$uid} = \@lolz;
}
}
}
return wantarray ? %wad : \%wad;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment