Last active
March 9, 2017 19:42
-
-
Save remorse/8db4a635e58f526a7ef43716a3b68964 to your computer and use it in GitHub Desktop.
New version of download tumblr posts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl6 | |
use v6; | |
sub croak ($msg) { note $msg; exit(1); } # because Perl 6 doesn't have the Perl 5 "\n" magic for die | |
# retrieve network data | |
use HTTP::UserAgent; | |
use JSON::Tiny; | |
# template | |
use Template::Mustache; | |
# transcode images into data urls | |
use MIME::Base64; | |
sub MAIN (Str :$url!, Str :$base-directory = './', Bool :$photo = False) { | |
# change directory to the appropriate base directory | |
(try chdir $base-directory) orelse croak("Couldn't change to the base directory '$base-directory': $!"); | |
# retrieve the data from the web and extract the common elements | |
my %tdata = get-tumblr-data($url); | |
my %pdata = get-common-data(%tdata); | |
# @NOTE not implemented: handling download only photo | |
# add the special elements for the particular post type | |
add-tumblr-data(%pdata<_type>, %pdata); | |
# create an "output" object(ish) | |
my %output = create-output-html(%pdata); | |
# turn all imgs into data URIs | |
convert-imgs(%output); | |
# save the output | |
save-post(%output); | |
} | |
# retrieve the JSON(ish) data for the post | |
sub get-tumblr-data (Str $url) { | |
my $data = HTTP::UserAgent.new.get($url ~ '?format=json'); | |
$data.is-success or croak("HTTP error retrieving post: {$data.status-line}."); | |
($data.content ~~ m:s/^var tumblr_api_read \= /) or croak("Error retrieving data: doesn't appear to be a tumblr."); | |
# Tumblr actually returns a JavaScript snippet, which we want to turn into JSON | |
my $content = $data.content.chomp; | |
$content ~~ s:s/^var tumblr_api_read \= //; | |
$content ~~ s/\;$//; | |
my %json = from-json($content); | |
%json<tumblelog> or croak("Malformed JSON data received."); | |
%json<posts> or croak("Not enough posts received."); | |
%json<posts>:v.elems == 1 or croak("Too many posts received ({%json<posts>:v.elems})."); | |
return %json; | |
} | |
# extract the data pieces that we will want for all post types | |
sub get-common-data (%tdata) { | |
my %t = %tdata<tumblelog>; | |
my %p = %tdata<posts>[0]; | |
my %post = ( | |
_tumblr => %t, | |
_post => %p, | |
_type => %p<type>, | |
tumblr_key => %t<name> || 'unknown', | |
tumblr_title => %t<title> || 'unknown', | |
post_id => %p<id>, | |
post_slug => %p<slug> || %p<type>, | |
post_url => %p<url-with-slug> || %p<url>, | |
post_date => %p<date-gmt>, | |
post_tags => {}, | |
post_body => '', | |
); | |
# fix tags | |
%p<tags> and %post<post_tags><tags> = [ %p<tags>.list.map: { tag => $_ } ]; | |
return %post; | |
} | |
# generic handler for unknown post types | |
multi add-tumblr-data ($type, %pdata) { | |
croak("Unimplemented post type: {%pdata<_type>}."); | |
} | |
multi add-tumblr-data ($ where 'regular', %pdata) { | |
my %s = %pdata<_post>; | |
if %s<regular-title> -> $title { | |
%pdata<post_body> = "<h2>$title</h2>\n\n"; | |
} | |
%pdata<post_body> ~= %s<regular-body>; | |
} | |
multi add-tumblr-data ($ where 'photo', %pdata) { | |
my %s = %pdata<_post>; | |
%pdata<post_photos> = []; | |
if !%s<photos> { | |
%pdata<post_photos>.push: { | |
caption => %s<photo-caption>, | |
url => %s<photo-url-1280>, | |
}; | |
} | |
else { | |
for |%s<photos> -> %ph { | |
%pdata<post_photos>.push: { | |
caption => %ph<caption>, | |
url => %ph<photo-url-1280>, | |
}; | |
} | |
} | |
} | |
multi add-tumblr-data ($ where 'video', %pdata) { | |
croak("I'm not implementing video for this talk."); | |
} | |
# create a spec for saving the data to disk, rendering off the template | |
sub create-output-html (%pdata) { | |
my %output = ( | |
type => 'html', | |
dir => %pdata<tumblr_key> ~ '/', | |
file => %pdata<post_id> ~ '--' ~ %pdata<post_slug> ~ '.pl6.html', | |
data => Template::Mustache.render($=finish, %pdata), | |
); | |
# @NOTE not implemented download handling | |
return %output; | |
} | |
# change all img tag sources to use data URIs | |
sub convert-imgs (%output) { | |
my $ua = HTTP::UserAgent.new(); | |
sub convert-imgs-helper ($url) { | |
my $r = $ua.get: ~$url; | |
if $r.is-success { | |
return "data:{$r.content-type};base64," ~ MIME::Base64.encode($r.content, :oneline); | |
} | |
else { | |
return "error: {$r.response-line}"; | |
} | |
} | |
%output<data> ~~ s:g/ <?after \<img .+? src\=\"> (<-["]>+) /{ convert-imgs-helper($0) }/; | |
} | |
# write the file to the disk, in the appropriate directory | |
sub save-post (%output) { | |
if !%output<dir>.IO.d { | |
(try mkdir %output<dir>) orelse croak("Could not create output directory '%output<dir>': $_"); # doesn't actually work for error checking... | |
} | |
with %output<dir file>.join -> $f { | |
(try spurt $f, %output<data>) orelse croak("Could not write output file '$f': $_"); | |
} | |
# @NOTE not implemented download handling | |
} | |
=finish | |
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8" /> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
<meta name="origin" content="{{ post_url }}" /> | |
<meta name="tumblr-type" content="{{ _type }}" /> | |
{{# post_tags }}<meta name="keywords" content="tumblr{{# tags }}, {{ tag }}{{/ tags }}" />{{/ post_tags }} | |
<title>{{ tumblr_title }} — {{ post_id }} — {{ post_slug }}</title> | |
<!-- #bbinclude "../tumblr.css" --> | |
<style type="text/css"> | |
body { | |
margin: 0; | |
padding: 0; | |
font-family: "Hoefler Text", serif; | |
} | |
article { | |
margin: 0; | |
padding: 0; | |
padding: 0.5rem; | |
background: #EEEEEE; | |
border: 0.25rem solid #DDDDDD; | |
} | |
article header { | |
margin: 0; | |
margin-bottom: 1rem; | |
padding: 0; | |
border-bottom: thin solid #BBBBBB; | |
} | |
article header h1 { | |
margin: 0; | |
padding: 0; | |
margin-top: 1rem; | |
margin-bottom: 0.25rem; | |
font-size: 1.2rem; | |
} | |
article header p.date { | |
margin: 0; | |
padding: 0; | |
margin-top: 0.5rem; | |
margin-bottom: 0.5rem; | |
font-size: 0.8rem; | |
} | |
article header p.tags { | |
margin: 0; | |
padding: 0; | |
margin-top: 0.5rem; | |
margin-bottom: 0.5rem; | |
font-size: 0.8rem; | |
font-style: italic; | |
} | |
article header p.tags span.tag { | |
padding-right: 1rem; | |
} | |
article header p.tags span.tag:before { | |
content: '# '; | |
color: gray; | |
} | |
article figure img { | |
max-width: 100%; | |
} | |
article blockquote { | |
margin-left: 1rem; | |
border-left: thin dashed #CCCCCC; | |
padding-left: 0.5rem; | |
margin-right: 0; | |
} | |
video { | |
max-width: 100%; | |
} | |
</style> | |
<!-- end bbinclude --> | |
<style type="text/css"> | |
</style> | |
</head> | |
<body> | |
<article> | |
<header> | |
<h1><a href="{{ post_url }}">{{ tumblr_title }} — {{ post_id }} — {{ post_slug }}</a></h1> | |
<p class="date">{{ post_date }}</p> | |
{{# post_tags }}<p class="tags">{{# tags }}<span class="tag">{{ tag }}</span>{{/ tags }}</p>{{/ post_tags }} | |
</header> | |
{{# post_photos }} | |
<figure> | |
<a href="{{ url }}"><img src="{{ url }}" /></a> | |
<figcaption>{{& caption }}</figcaption> | |
</figure> | |
{{/ post_photos }} | |
{{# post_videos }} | |
<!-- {{& source }} --> | |
{{ controller }} | |
{{& caption }} | |
{{/ post_videos }} | |
{{& post_body }} | |
</article> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment