Last active
April 16, 2016 00:19
-
-
Save alpha123/fd73ec4013b522332405fb186b3a1ba0 to your computer and use it in GitHub Desktop.
MAL Downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use v5.16; # given/when | |
use strict; | |
use warnings; | |
use Getopt::Long::Descriptive; | |
use LWP::Simple; | |
use Mojo::DOM; | |
use Data::Dumper; | |
use Text::CSV::Slurp; | |
use constant { | |
WATCHING => 1, | |
COMPLETED => 2, | |
HOLD => 3, | |
DROPPED => 4, | |
PTW => 6, # yep, no idea what list status 5 is | |
ALL => 7 | |
}; | |
sub parse_one_list { | |
my ($user, $type, $tag, $skip_unrated) = @_; | |
my $url = "http://myanimelist.net/animelist/$user?status=$type&tag=$tag"; | |
my $dom = Mojo::DOM->new(LWP::Simple::get($url)); | |
my @anime = (); | |
my $skip = 3; | |
for my $a ($dom->find('#list_surround > table')->each) { | |
if ($skip) { --$skip; next; } | |
my $title = $a->at('.animetitle > span'); | |
next unless defined $title; | |
$title = $title->text; | |
my ($id) = $a->at('.animetitle')->{href} =~ /\/anime\/(\d+)/; | |
my $type = lc $a->at('td[width=50]')->text; | |
my $eps = $a->at('td[width=70]')->text; | |
$eps = 0 if $eps eq '-'; | |
my $progress = $a->at('span[id^=output]'); | |
if (defined $progress) { | |
$progress = $progress->text; | |
$eps = substr $eps, 1; | |
} | |
else { | |
$progress = $eps; | |
} | |
$progress = 0 if $progress eq '-'; | |
my $score = $a->at('td[width=45]')->text; | |
next if $skip_unrated && $score eq '-'; | |
$score = 0 if $score eq '-'; | |
push @anime, { | |
id => $id, | |
title => $title, | |
type => $type, | |
episodes => $eps, | |
progress => $progress, | |
score => $score | |
}; | |
} | |
return @anime; | |
} | |
sub parse_list { | |
my $user = shift; | |
my %opts = ( | |
types => ALL, | |
tags => "", | |
skip_unrated => 0, | |
@_ | |
); | |
my $types = $opts{types}; | |
$types = [$types] if ref($types) ne 'ARRAY'; | |
my $tags = $opts{tags}; | |
$tags = [$tags] if ref($tags) ne 'ARRAY'; | |
if ($types->[0] == ALL) { | |
$types = [WATCHING, COMPLETED, HOLD, DROPPED, PTW]; | |
} | |
my @list = (); | |
for my $type (@$types) { | |
for (@$tags) { | |
push @list, parse_one_list($user, $type, $_, $opts{skip_unrated}); | |
} | |
} | |
return @list; | |
} | |
sub list_to_csv { | |
my $list = shift; | |
return Text::CSV::Slurp->create(input => $list, field_order => | |
['id', 'title', 'type', 'episodes', 'progress', 'score']); | |
} | |
sub list_to_perl { | |
my $list = shift; | |
return Dumper($list); | |
} | |
sub main { | |
my ($opt, $usage) = describe_options( | |
'mal_grab.pl %o <user>', | |
['output|o=s', 'Direct output to a file'], | |
['export|x=s', 'Export a dump of the user\'s list'], | |
[], | |
['tags|t=s', 'Comma-separated list of tags used to filter the list'], | |
['status|s=s', 'Select list to fetch', {default => 'complete'}], | |
['skip-unrated', 'Skip titles that are not scored by the user'], | |
[], | |
['help', 'Print usage information'], | |
[], | |
['Export formats:'], | |
[' • csv – output to comma-separated values'], | |
[' • perl – dump in a format that can be eval()\'d by perl'], | |
[], | |
['List statuses:'], | |
[' • complete, completed'], | |
[' • watching'], | |
[' • hold, held'], | |
[' • dropped'], | |
[' • ptw, planned'], | |
[' • all'], | |
['Multiple statuses may be separated with commas: --status complete,watching'], | |
{show_defaults => 1} | |
); | |
print($usage->text), exit if $opt->help; | |
my ($user) = @ARGV; | |
my @types = (); | |
my @tags = (); | |
@tags = split ',', $opt->tags if defined $opt->tags; | |
if (scalar @tags == 0) { | |
@tags = (''); | |
} | |
if ($opt->status =~ /all/) { | |
push @types, ALL; | |
} | |
else { | |
my @status = split ',', $opt->status; | |
for (@status) { | |
when (/compl/) { | |
push @types, COMPLETED; | |
} | |
when (/watch/) { | |
push @types, WATCHING; | |
} | |
when (/ptw|plan/) { | |
push @types, PTW; | |
} | |
when (/h[oe]ld/) { | |
push @types, HOLD; | |
} | |
when (/drop/) { | |
push @types, DROPPED; | |
} | |
} | |
} | |
my $file = \*STDOUT; | |
if ($opt->output) { | |
open($file, '>', $opt->output); | |
} | |
my @anime = parse_list($user, types => \@types, tags => \@tags, skip_unrated => $opt->skip_unrated); | |
given ($opt->export) { | |
when ('csv') { | |
print $file list_to_csv(\@anime); | |
} | |
when ('perl') { | |
print $file list_to_perl(\@anime); | |
} | |
} | |
print $file "\n"; | |
return 0; | |
} | |
exit main(@ARGV); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment