Skip to content

Instantly share code, notes, and snippets.

@ernix
Last active June 29, 2017 13:55
Show Gist options
  • Save ernix/93ceef2748acc6371de4a0d9017d1cda to your computer and use it in GitHub Desktop.
Save ernix/93ceef2748acc6371de4a0d9017d1cda to your computer and use it in GitHub Desktop.
mastodonのownerをスクレイピング
#!/usr/bin/perl
use strict;
use warnings;
use Mojo::UserAgent;
use Mojo::IOLoop;
our $INSTANCES_URL = 'https://instances.mastodon.xyz/instances.json';
my $ua = Mojo::UserAgent->new;
$ua->max_redirects(3);
my $instances = $ua->get($INSTANCES_URL)->result->json
or die "GET $INSTANCES_URL failed\n";
my $delay = Mojo::IOLoop->delay;
for my $instance (@{$instances}) {
my $host = $instance->{name};
my $about_more = sprintf '%s/about/more', $host;
my $end = $delay->begin(0);
$ua->get($about_more => sub {
my ($ua, $tx) = @_;
my $account = eval {
$tx->res->dom->find('span.username')->first->text;
} or return $end->();
my (undef, $user) = split /\@/, $account;
my $id = join '@', $user, $host;
print "$id\n";
return $end->($id);
});
}
$delay->wait unless Mojo::IOLoop->is_running;
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment