Skip to content

Instantly share code, notes, and snippets.

@tardisx
Last active January 4, 2018 01:34
Show Gist options
  • Save tardisx/83ae5265fdc3ad0fe7039dbc787d44f3 to your computer and use it in GitHub Desktop.
Save tardisx/83ae5265fdc3ad0fe7039dbc787d44f3 to your computer and use it in GitHub Desktop.
Scrape mastodon timelines to create a list of mastodon instances.
#!/usr/bin/env perl
# Generate a list of mastodon instances by scraping the timeline and finding toots from
# other federated instances.
# Mojolicious non-blocking HTTP FTW!
# Now with 100% more self-modifying!
use strict;
use warnings;
use Mojo::UserAgent;
use feature 'say';
use Mojo::UserAgent;
use Mojo::Promise;
my $ua = Mojo::UserAgent->new;
my @instance_list = get_instance_list_from_data();
my $sleep = 3;
while (1) {
say 'current instance list:';
say ' - ' . $_ foreach sort @instance_list;
say '';
say 'scanning '.scalar(@instance_list).' instances...';
my @new_hosts;
my @promises;
# create a promise for each request
foreach my $host (@instance_list) {
my $url = 'https://'.$host.'/api/v1/timelines/public';
push @promises, $ua->get_p($url);
}
# do them all at once
Mojo::Promise->all(@promises)->then(sub {
my (@requests) = @_;
foreach my $req (@requests) {
my $host = $req->[0]->req->url->to_abs->host;
my $res = $req->[0]->result;
if ($res->is_success) { }
elsif ($res->is_error) { say " * $host error: ". $res->message; next; }
elsif ($res->code == 301) { say " * $host unexpected redirect to: " . $res->headers->location; next; }
else { say " * $host unexpected non-success"; next; }
my $json = $res->json;
foreach my $entry (@$json) {
my $uri = $entry->{uri};
if ( my ($new_host) = ($uri =~ m{^https://(.+)/users/}) ) {
if (! grep /^$new_host$/, @instance_list, @new_hosts) {
say " - adding new instance $new_host";
push @new_hosts, $new_host;
}
}
}
}
})->wait;
push @instance_list, @new_hosts;
store_instance_list_to_data(@instance_list);
say "- sleeping for $sleep seconds";
say '';
sleep $sleep;
$sleep *= 1.5;
$sleep = 3600 if $sleep > 3600;
}
sub get_instance_list_from_data {
my @instances;
while (<DATA>) {
chomp;
s/^\s*//;
s/\s*$//;
push @instances, $_;
}
return @instances;
}
sub store_instance_list_to_data {
# lets overwrite ourself? what could go wrong?
my @instances = @_;
open (my $fh, "+<", $0) || die "could not open ourself for update: $!";
my $found = 0;
while (my $line = <$fh>) {
chomp $line;
next unless $line eq '__DATA__';
$found = 1;
my $location = tell $fh;
truncate $fh, $location;
print $fh "$_\n" foreach sort @instances;
close $fh;
last;
}
die "could not find __DATA__" unless $found;
}
__DATA__
mastodon.social
mastodon.xyz
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment