RogerDodger/writeoff-scrape.pl

## writeoff-scrape.pl
#!/usr/bin/env perl
# Scrape writeoff.me events for stories and write them to files

use Mojo::Base -strict;
use Mojo::UserAgent;
use File::Spec;

# IDs of events to scrape
my @events = (1..23);

# Directory to write stories to
my $root = 'writeoff';
mkdir $root unless -d $root;

my $ua = Mojo::UserAgent->new(max_redirects => 2);

for my $eid (@events) {
	my $tx = $ua->get("http://writeoff.me/event/$eid/fic/gallery");
	if (my $res = $tx->success) {
		# Event exists, make sure dir exists to write stories to
		#
		# The URL is different from the one we requested because there was a redirect,
		#   e.g., /event/23/... -> /event/23-One-Little-Mistake/...
		# We want to extract the "23-One-Little-Mistake" to use as the dir name.
		my ($id_uri) = $tx->req->url =~ m{/ (\d+ - [^/]+) /}x;
		my $dir = File::Spec->catfile($root, $id_uri);
		mkdir $dir unless -d $dir;

		say "Downloading stories from $id_uri...";
		# Iterate through all the "txt" links (there's one for each story) and
		# write their responses to files
		$res->dom('a')->each(sub {
			my $e = shift;
			return unless $e->text eq 'txt';
			my $filename = $e->attr('href') =~ s{.+/}{}r;

			my $tx = $ua->get($e->attr('href'));
			if (my $res = $tx->success) {
				open my $fh, '>', File::Spec->catfile($dir, $filename);
				print $fh $res->body;
				close $fh;
			}
			else {
				say $tx->error;
			}
		});
	}
	else {
		say $tx->error;
	}
}
	#!/usr/bin/env perl
	# Scrape writeoff.me events for stories and write them to files

	use Mojo::Base -strict;
	use Mojo::UserAgent;
	use File::Spec;

	# IDs of events to scrape
	my @events = (1..23);

	# Directory to write stories to
	my $root = 'writeoff';
	mkdir $root unless -d $root;

	my $ua = Mojo::UserAgent->new(max_redirects => 2);

	for my $eid (@events) {
	my $tx = $ua->get("http://writeoff.me/event/$eid/fic/gallery");
	if (my $res = $tx->success) {
	# Event exists, make sure dir exists to write stories to
	#
	# The URL is different from the one we requested because there was a redirect,
	# e.g., /event/23/... -> /event/23-One-Little-Mistake/...
	# We want to extract the "23-One-Little-Mistake" to use as the dir name.
	my ($id_uri) = $tx->req->url =~ m{/ (\d+ - [^/]+) /}x;
	my $dir = File::Spec->catfile($root, $id_uri);
	mkdir $dir unless -d $dir;

	say "Downloading stories from $id_uri...";
	# Iterate through all the "txt" links (there's one for each story) and
	# write their responses to files
	$res->dom('a')->each(sub {
	my $e = shift;
	return unless $e->text eq 'txt';
	my $filename = $e->attr('href') =~ s{.+/}{}r;

	my $tx = $ua->get($e->attr('href'));
	if (my $res = $tx->success) {
	open my $fh, '>', File::Spec->catfile($dir, $filename);
	print $fh $res->body;
	close $fh;
	}
	else {
	say $tx->error;
	}
	});
	}
	else {
	say $tx->error;
	}
	}