Last active
January 4, 2018 01:34
-
-
Save tardisx/83ae5265fdc3ad0fe7039dbc787d44f3 to your computer and use it in GitHub Desktop.
Scrape mastodon timelines to create a list of mastodon instances.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Generate a list of mastodon instances by scraping the timeline and finding toots from | |
# other federated instances. | |
# Mojolicious non-blocking HTTP FTW! | |
# Now with 100% more self-modifying! | |
use strict; | |
use warnings; | |
use Mojo::UserAgent; | |
use feature 'say'; | |
use Mojo::UserAgent; | |
use Mojo::Promise; | |
my $ua = Mojo::UserAgent->new; | |
my @instance_list = get_instance_list_from_data(); | |
my $sleep = 3; | |
while (1) { | |
say 'current instance list:'; | |
say ' - ' . $_ foreach sort @instance_list; | |
say ''; | |
say 'scanning '.scalar(@instance_list).' instances...'; | |
my @new_hosts; | |
my @promises; | |
# create a promise for each request | |
foreach my $host (@instance_list) { | |
my $url = 'https://'.$host.'/api/v1/timelines/public'; | |
push @promises, $ua->get_p($url); | |
} | |
# do them all at once | |
Mojo::Promise->all(@promises)->then(sub { | |
my (@requests) = @_; | |
foreach my $req (@requests) { | |
my $host = $req->[0]->req->url->to_abs->host; | |
my $res = $req->[0]->result; | |
if ($res->is_success) { } | |
elsif ($res->is_error) { say " * $host error: ". $res->message; next; } | |
elsif ($res->code == 301) { say " * $host unexpected redirect to: " . $res->headers->location; next; } | |
else { say " * $host unexpected non-success"; next; } | |
my $json = $res->json; | |
foreach my $entry (@$json) { | |
my $uri = $entry->{uri}; | |
if ( my ($new_host) = ($uri =~ m{^https://(.+)/users/}) ) { | |
if (! grep /^$new_host$/, @instance_list, @new_hosts) { | |
say " - adding new instance $new_host"; | |
push @new_hosts, $new_host; | |
} | |
} | |
} | |
} | |
})->wait; | |
push @instance_list, @new_hosts; | |
store_instance_list_to_data(@instance_list); | |
say "- sleeping for $sleep seconds"; | |
say ''; | |
sleep $sleep; | |
$sleep *= 1.5; | |
$sleep = 3600 if $sleep > 3600; | |
} | |
sub get_instance_list_from_data { | |
my @instances; | |
while (<DATA>) { | |
chomp; | |
s/^\s*//; | |
s/\s*$//; | |
push @instances, $_; | |
} | |
return @instances; | |
} | |
sub store_instance_list_to_data { | |
# lets overwrite ourself? what could go wrong? | |
my @instances = @_; | |
open (my $fh, "+<", $0) || die "could not open ourself for update: $!"; | |
my $found = 0; | |
while (my $line = <$fh>) { | |
chomp $line; | |
next unless $line eq '__DATA__'; | |
$found = 1; | |
my $location = tell $fh; | |
truncate $fh, $location; | |
print $fh "$_\n" foreach sort @instances; | |
close $fh; | |
last; | |
} | |
die "could not find __DATA__" unless $found; | |
} | |
__DATA__ | |
mastodon.social | |
mastodon.xyz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment