Created
November 20, 2015 02:00
-
-
Save Skarsnik/cc6a997fe72b1caae73d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use v6; | |
use lib './', '/root/testmodif/s/perl6-html-parser-xml/lib/'; | |
use FicClass; | |
use Gumbo; | |
use StoryFactory; | |
use HTTP::UserAgent; | |
use HTTP::Cookie; | |
my $bbaseurl = "http://www.fimfiction.net/bookshelf/"; | |
my $fimbaseurl = "http://www.fimfiction.net/"; | |
my $bookid = @*ARGS[0]; | |
my $story_id = @*ARGS[1]; | |
initDBstuff(); | |
if ($story_id.defined) { | |
my Story $story = get_story($story_id.Int); | |
$story.character_tags = "Soarin", "Original Character"; | |
saveToDB($story); | |
return ; | |
} | |
my Bookshelf $bookshelf = Bookshelf.new; | |
$bookshelf.id = $bookid.Int; | |
my $ua = HTTP::UserAgent.new; | |
$ua.cookies.set-cookie('Set-Cookie:view_mature=true; '); | |
my $url = "$bbaseurl$bookid"; | |
get_bookshelf(); | |
saveToDB($bookshelf); | |
#say $bookshelf.perl; | |
sub get_story (Int $id) { | |
my $surl = "$fimbaseurl" ~ "api/story.php?story=$id"; | |
say "Getting info for story $id - $surl"; | |
my $time = time; | |
my $rep = $ua.get($surl); | |
my $p = $rep.content; | |
#my $p = qqx{wget -o /dev/null -O - $surl}; | |
if False { | |
my $timewget = time - $time; | |
$time = time; | |
#my $rep = $ua.get($surl); | |
my $timeua = time - $time; | |
$time = time; | |
$p = LWP::Simple.get($surl); | |
my $timelwp = time - $time; | |
say "Request took (wget, ua, lwp) : $timewget ; $timeua ; $timelwp (" ~ $p.defined ~ ")"; | |
} | |
my Story $story = fromJSON($p); | |
return $story; | |
} | |
sub get_bookshelf { | |
my $rep = $ua.get("$bbaseurl$bookid"); | |
if ! $rep.is-success { | |
die "Can't contact $bbaseurl"; | |
} | |
# first we need to know the number of page | |
my $xmldoc = parse-html($rep.content, :TAG<div>, :class<page_list>, :SINGLE, :nowhitespace); | |
my @pages = $xmldoc.elements(:TAG<li>, :RECURSE); | |
my $number_of_page; | |
if (@pages.elems eq 1) { | |
$number_of_page = 1; | |
} else { | |
$number_of_page = @pages[@pages.elems-2][0][0].text; | |
} | |
say "Number of page : $number_of_page"; | |
for (1..$number_of_page) { | |
$rep = $ua.get("$url?order=date_added&page=$_"); | |
if ! $rep.is-success { | |
die "can't get $url?order=date_added&page=$_"; | |
} | |
my $innerdiv = parse-html($rep.content, :TAG<div>, :class<inner>, :SINGLE, :nowhitespace); | |
if $_ == 1 { | |
my $ul = $innerdiv.elements(:TAG<span>, :class<bookshelf-name>, :SINGLE, :RECURSE); | |
$bookshelf.name = $ul[0][0].text; | |
} | |
my @storydiv = $innerdiv.lookfor(:TAG<div>, :class<story_content_box>); | |
for @storydiv -> $storydiv { | |
my $idstory = substr($storydiv<id>, 6); | |
my Story $story = get_story($idstory.Int); | |
my $extradiv = $storydiv.lookfor(:TAG<div>, :class<extra_story_data>, :SINGLE); | |
my @charactera = $extradiv.lookfor(:TAG<a>, :class<character_icon>); | |
for @charactera -> $aelem { | |
$story.character_tags.push($aelem<title>); | |
} | |
say $story.title; | |
#show_mem(); | |
$bookshelf.stories.push($story); | |
} | |
} | |
} | |
sub show_mem { | |
say $*PID; | |
say slurp("/proc/self/statm"); | |
#my @statm = slurp("/proc/$*PID/statm").split(/\s+/); | |
#say @statm; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment