Skarsnik/article1.p6

## article1.p6
use v6;

use Gumbo;
use XML;
use HTTP::UserAgent;

#We define some base url, notice that I don't use https
my $bbaseurl = "http://www.fimfiction.net/bookshelf/";
my $fimbaseurl = "http://www.fimfiction.net/";

my $bookshelfid = @*ARGS[0];

my $url = $bbaseurl~$bookshelfid;
my $ua = HTTP::UserAgent.new;
#Fimfiction hide mature content (violent story/sex stories) as default.
$ua.cookies.set-cookie('Set-Cookie:view_mature=true; ');

#HTTP::UserAgent get give us a response object
my $rep = $ua.get($url);

if ! $rep.is-success {
    die "Can't contact $url" ~ $rep.status-line;
}

#First we are only interested in the number of page

# We could have only called parse-html($rep.content) and search on the xml tree created
# But parse-html provided by Gumbo offer some basic filtering, that speed up the parsing
# :SINGLE make it stop at the first element that match div class="page_list"
# :nowhitespace tell him to not add all the whitespaces that are outside elements (like identation tab)
my $xmldoc = parse-html($rep.content, :TAG<div>, :class<page_list>, :SINGLE, :nowhitespace);

# Note: $xmldoc contains the html tag as root, not the <div>
# We don't care for the <ul> or extra content of this div, so let get all the <li> tags

my @pages_li = $xmldoc.lookfor(:TAG<li>);

my $number_of_page = 1;

#if we have more than one <li>
if @pages_li.elems > 1 {
    # get the text of the second last element
    $number_of_page = @pages_li[@pages_li.elems-2][0][0].text;
}

say "Bookshelf n°$bookshelfid has $number_of_page page(s)";

for 1..$number_of_page {
  $rep = $ua.get("$url?order=date_added&page=$_");
  if ! $rep.is-success {
    die "Can't get $url?order=date_added&page=$_";
  }
  # No :SINGLE this time, since we want all the story_content_box div
  my $stories_div = parse-html($rep.content, :TAG<div>, :class<story_content_box>, :nowhitespace);
  for $stories_div.root.nodes -> $story_div {
    my %story;
    # :SINGLE make lookfor returns a XML::Element instead of an Array of it
    %story<title> = $story_div.lookfor(:TAG<a>, :class<story_name>, :SINGLE)[0].text;
    # Author name is the text of a link with fancy stuff around the a tag, in an author div
    %story<author> = $story_div.lookfor(:TAG<div>, :class<author>, :SINGLE).lookfor(:TAG<a>, :SINGLE)[0].text;
    # Tags are more tricky, but regex are here to the rescue
    my $description_div = $story_div.lookfor(:TAG<div>, :class<description>, :SINGLE);
    my @tags = $description_div.lookfor(:TAG<a>, :class(/^story_category/));
    for @tags -> $atag {
      %story<tags>.push($atag[0].text);
    }
    # Character name can be found on their character clickable image
    my $extradiv = $story_div.lookfor(:TAG<div>, :class<extra_story_data>, :SINGLE);
    my @charactera = $extradiv.lookfor(:TAG<a>, :class<character_icon>);
    for @charactera -> $aelem {
      # Accessing one attribute
      %story<character_tags>.push($aelem<title>);
    }
    say "%story<title> by %story<author> "~%story<tags>.gist~" involving : "~%story<character_tags>.join(', ');
  }

}
	use v6;

	use Gumbo;
	use XML;
	use HTTP::UserAgent;

	#We define some base url, notice that I don't use https
	my $bbaseurl = "http://www.fimfiction.net/bookshelf/";
	my $fimbaseurl = "http://www.fimfiction.net/";

	my $bookshelfid = @*ARGS[0];

	my $url = $bbaseurl~$bookshelfid;
	my $ua = HTTP::UserAgent.new;
	#Fimfiction hide mature content (violent story/sex stories) as default.
	$ua.cookies.set-cookie('Set-Cookie:view_mature=true; ');

	#HTTP::UserAgent get give us a response object
	my $rep = $ua.get($url);

	if ! $rep.is-success {
	die "Can't contact $url" ~ $rep.status-line;
	}

	#First we are only interested in the number of page

	# We could have only called parse-html($rep.content) and search on the xml tree created
	# But parse-html provided by Gumbo offer some basic filtering, that speed up the parsing
	# :SINGLE make it stop at the first element that match div class="page_list"
	# :nowhitespace tell him to not add all the whitespaces that are outside elements (like identation tab)
	my $xmldoc = parse-html($rep.content, :TAG<div>, :class<page_list>, :SINGLE, :nowhitespace);

	# Note: $xmldoc contains the html tag as root, not the <div>
	# We don't care for the <ul> or extra content of this div, so let get all the <li> tags

	my @pages_li = $xmldoc.lookfor(:TAG<li>);

	my $number_of_page = 1;

	#if we have more than one <li>
	if @pages_li.elems > 1 {
	# get the text of the second last element
	$number_of_page = @pages_li[@pages_li.elems-2][0][0].text;
	}

	say "Bookshelf n°$bookshelfid has $number_of_page page(s)";

	for 1..$number_of_page {
	$rep = $ua.get("$url?order=date_added&page=$_");
	if ! $rep.is-success {
	die "Can't get $url?order=date_added&page=$_";
	}
	# No :SINGLE this time, since we want all the story_content_box div
	my $stories_div = parse-html($rep.content, :TAG<div>, :class<story_content_box>, :nowhitespace);
	for $stories_div.root.nodes -> $story_div {
	my %story;
	# :SINGLE make lookfor returns a XML::Element instead of an Array of it
	%story<title> = $story_div.lookfor(:TAG<a>, :class<story_name>, :SINGLE)[0].text;
	# Author name is the text of a link with fancy stuff around the a tag, in an author div
	%story<author> = $story_div.lookfor(:TAG<div>, :class<author>, :SINGLE).lookfor(:TAG<a>, :SINGLE)[0].text;
	# Tags are more tricky, but regex are here to the rescue
	my $description_div = $story_div.lookfor(:TAG<div>, :class<description>, :SINGLE);
	my @tags = $description_div.lookfor(:TAG<a>, :class(/^story_category/));
	for @tags -> $atag {
	%story<tags>.push($atag[0].text);
	}
	# Character name can be found on their character clickable image
	my $extradiv = $story_div.lookfor(:TAG<div>, :class<extra_story_data>, :SINGLE);
	my @charactera = $extradiv.lookfor(:TAG<a>, :class<character_icon>);
	for @charactera -> $aelem {
	# Accessing one attribute
	%story<character_tags>.push($aelem<title>);
	}
	say "%story<title> by %story<author> "~%story<tags>.gist~" involving : "~%story<character_tags>.join(', ');
	}

	}