Skip to content

Instantly share code, notes, and snippets.

@sekia
Created February 19, 2012 10:29
Show Gist options
  • Save sekia/1863006 to your computer and use it in GitHub Desktop.
Save sekia/1863006 to your computer and use it in GitHub Desktop.
Dumps mixi diaries out to a YAML file.
#!/usr/bin/env perl
use 5.014;
use strict;
use warnings;
use opts;
use Encode;
use File::HomeDir;
use Path::Class;
use Storable qw/dclone/;
use YAML;
use WWW::Mixi::Scraper;
opts my $email => +{ isa => 'Str' },
my $password => +{ isa => 'Str' },
my $dump_file => +{ isa => 'Str' };
my $home = dir(File::HomeDir->my_home);
my $config_file = $home->file('.dump_mixi_diary');
my %config = %{ -e $config_file ? eval($config_file->slurp) : +{} };
# Command line option takes precedence over config file.
$config{email} = $email if defined $email;
$config{password} = $password if defined $password;
my $out = defined $dump_file
? do {
open my $fh, '>', $dump_file or die $!;
$fh;
} : \*STDOUT;
binmode $out, ':utf8';
for my $param (qw/email password/) {
die qq/Specify mandatory parameter "$param"/ unless $config{$param};
}
my $mixi = WWW::Mixi::Scraper->new(mode => 'TEXT', %config);
warn "Fetching diary list...\n";
my @diaries;
for (my $i = 1; ; $i++) {
my @parsed = $mixi->list_diary->parse(page => $i);
last if @parsed == 0;
push @diaries, @parsed;
}
warn "Dumps @{[ @diaries + 0 ]} diaries.\n";
for my $diary_meta (@diaries) {
my $url = $diary_meta->{link};
my $diary = decode_all($mixi->parse($url));
say $out Dump($diary);
}
sub decode_all {
my $entry = dclone shift;
$entry->{link} = $entry->{link}->as_string;
$_->{link} = $_->{link}->as_string for @{ $entry->{comments} // [] };
for my $image (@{ $entry->{images} // [] }) {
$image->{link} = $image->{link}->as_string;
$image->{thumb_link} = $image->{thumb_link}->as_string;
}
$entry;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment