Created
March 9, 2017 16:37
-
-
Save remorse/7fbfd973dc828268a077f9258ea192dc to your computer and use it in GitHub Desktop.
Save tumblr posts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl6 | |
use v6; | |
sub croak ($msg) { note $msg; exit(1); } # because Perl 6 doesn't have the Perl 5 "\n" magic for die | |
# retrieve network data | |
use HTTP::UserAgent; | |
use JSON::Tiny; | |
# template | |
use Template::Mustache; | |
# transcode images into data urls | |
use MIME::Base64; | |
sub MAIN (Str :$url!, Str :$base-directory = './', Bool :$photo = False) { | |
# retrieve the data from the web and extract the common elements | |
my %tdata = get_tumblr_data($url); | |
my %pdata = get_common_data(%tdata); | |
# @TODO handle photo stuff here | |
# add the special elements for the particular post type | |
add_tumblr_data(%pdata<_type>, %pdata); | |
# create an "output" object(ish) | |
my %output = create_output_html(%pdata); | |
# turn all imgs into data URIs | |
convert_imgs(%output); | |
# save the output | |
save_post(%output, $base-directory); | |
} | |
# retrieve the JSON(ish) data for the post | |
sub get_tumblr_data ($url) { | |
my $data = HTTP::UserAgent.new.get($url ~ '?format=json'); | |
unless ($data.is-success) { | |
croak("Error retrieving post: {$data.status-line}."); | |
} | |
unless ($data.content ~~ m:s/^var tumblr_api_read \= /) { | |
croak("Error retrieving data: doesn't appear to be a tumblr."); | |
} | |
# Tumblr actually returns a JavaScript snippet, which we want to turn into JSON | |
my $content = $data.content.chomp; | |
$content ~~ s:s/^var tumblr_api_read \= //; | |
$content ~~ s/\;$//; | |
my %json = from-json($content); | |
if (!%json<tumblelog>) { | |
croak("Malformed JSON data received."); | |
} | |
if (!%json<posts>) { | |
croak("Not enough posts received."); | |
} | |
if (%json<posts>:v.elems != 1) { | |
croak("Too many posts received ({%json<posts>:v.elems})."); | |
} | |
return %json; | |
} | |
# extract the data pieces that we will want for all post types | |
sub get_common_data (%tdata) { | |
my %t = %tdata<tumblelog>; | |
my %p = %tdata<posts>[0]; | |
my %post = ( | |
'_tumblr' => %t, | |
'_post' => %p, | |
'_type' => %p<type>, | |
'tumblr_key' => %t<name> || 'unknown', | |
'tumblr_title' => %t<title> || 'unknown', | |
'post_id' => %p<id>, | |
'post_slug' => %p<slug> || %p<type>, | |
'post_url' => %p<url-with-slug> || %p<url>, | |
'post_date' => %p<date-gmt>, | |
'post_tags' => {}, | |
'post_body' => '', | |
); | |
# fix tags | |
if (%p<tags>) { | |
%post<post_tags><tags> = [ %p<tags>.list.map: { 'tag' => $_ } ]; | |
} | |
return %post; | |
} | |
# generic handler for unknown post types | |
multi add_tumblr_data ($type, %pdata) { | |
croak("Unimplemented post type: {%pdata<_type>}."); | |
} | |
multi add_tumblr_data ($ where 'regular', %pdata) { | |
my %s = %pdata<_post>; | |
if (%s<regular-title>) { # note that I don't have to test defined and not empty! | |
%pdata<post_body> = "<h2>%s<regular-title></h2>\n\n"; | |
} | |
%pdata<post_body> ~= %s<regular-body>; | |
} | |
multi add_tumblr_data ($ where 'photo', %pdata) { | |
my %s = %pdata<_post>; | |
%pdata<post_photos> = []; | |
if (!%s<photos>) { | |
%pdata<post_photos>.push: { | |
caption => %s<photo-caption>, | |
url => %s<photo-url-1280>, | |
}; | |
} | |
else { | |
for |%s<photos> -> %ph { | |
%pdata<post_photos>.push: { | |
caption => %ph<caption>, | |
url => %ph<photo-url-1280>, | |
}; | |
} | |
} | |
} | |
multi add_tumblr_data ($ where 'video', %pdata) { | |
croak("I'm not implementing video for this talk."); | |
} | |
# create a spec for saving the data to disk, rendering off the template | |
sub create_output_html (%pdata) { | |
my %output = ( | |
'type' => 'html', | |
'dir' => %pdata<tumblr_key> ~ '/', | |
'file' => %pdata<post_id> ~ '--' ~ %pdata<post_slug> ~ '.pl6.html', | |
'data' => Template::Mustache.render($=finish, %pdata), | |
); | |
# @TODO downloads | |
return %output; | |
} | |
# change all img tag sources to use data URIs | |
sub convert_imgs (%output) { | |
my $ua = HTTP::UserAgent.new(); | |
my $_convert_imgs_helper = sub ($url) { | |
my $img_resp = $ua.get("$url"); | |
if ($img_resp.is-success) { | |
return "data:{$img_resp.content-type};base64," ~ MIME::Base64.encode($img_resp.content, :oneline); | |
} | |
else { | |
return "error: {$img_resp.response-line}"; | |
} | |
} | |
%output<data> ~~ s:g/ <?after \<img .+? src\=\"> (<-["]>+) /{ $_convert_imgs_helper($0) }/; | |
} | |
# write the file to the disk, in the appropriate directory | |
sub save_post (%output, $base-directory) { | |
%output<dir> = $base-directory ~ %output<dir>; | |
if (not %output<dir>.IO.d) { | |
mkdir($base-directory ~ %output<dir>) | |
or croak("Could not create output directory %output<dir>: $_"); | |
} | |
spurt %output<dir> ~ %output<file>, %output<data> | |
or croak("Could not write output file %output<dir>%output<file>: $_"); | |
# @TODO downloads | |
} | |
=finish | |
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8" /> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
<meta name="origin" content="{{ post_url }}" /> | |
<meta name="tumblr-type" content="{{ _type }}" /> | |
{{# post_tags }}<meta name="keywords" content="tumblr{{# tags }}, {{ tag }}{{/ tags }}" />{{/ post_tags }} | |
<title>{{ tumblr_title }} — {{ post_id }} — {{ post_slug }}</title> | |
<!-- #bbinclude "../tumblr.css" --> | |
<style type="text/css"> | |
body { | |
margin: 0; | |
padding: 0; | |
font-family: "Hoefler Text", serif; | |
} | |
article { | |
margin: 0; | |
padding: 0; | |
padding: 0.5rem; | |
background: #EEEEEE; | |
border: 0.25rem solid #DDDDDD; | |
} | |
article header { | |
margin: 0; | |
margin-bottom: 1rem; | |
padding: 0; | |
border-bottom: thin solid #BBBBBB; | |
} | |
article header h1 { | |
margin: 0; | |
padding: 0; | |
margin-top: 1rem; | |
margin-bottom: 0.25rem; | |
font-size: 1.2rem; | |
} | |
article header p.date { | |
margin: 0; | |
padding: 0; | |
margin-top: 0.5rem; | |
margin-bottom: 0.5rem; | |
font-size: 0.8rem; | |
} | |
article header p.tags { | |
margin: 0; | |
padding: 0; | |
margin-top: 0.5rem; | |
margin-bottom: 0.5rem; | |
font-size: 0.8rem; | |
font-style: italic; | |
} | |
article header p.tags span.tag { | |
padding-right: 1rem; | |
} | |
article header p.tags span.tag:before { | |
content: '# '; | |
color: gray; | |
} | |
article figure img { | |
max-width: 100%; | |
} | |
article blockquote { | |
margin-left: 1rem; | |
border-left: thin dashed #CCCCCC; | |
padding-left: 0.5rem; | |
margin-right: 0; | |
} | |
video { | |
max-width: 100%; | |
} | |
</style> | |
<!-- end bbinclude --> | |
<style type="text/css"> | |
</style> | |
</head> | |
<body> | |
<article> | |
<header> | |
<h1><a href="{{ post_url }}">{{ tumblr_title }} — {{ post_id }} — {{ post_slug }}</a></h1> | |
<p class="date">{{ post_date }}</p> | |
{{# post_tags }}<p class="tags">{{# tags }}<span class="tag">{{ tag }}</span>{{/ tags }}</p>{{/ post_tags }} | |
</header> | |
{{# post_photos }} | |
<figure> | |
<a href="{{ url }}"><img src="{{ url }}" /></a> | |
<figcaption>{{& caption }}</figcaption> | |
</figure> | |
{{/ post_photos }} | |
{{# post_videos }} | |
<!-- {{& source }} --> | |
{{ controller }} | |
{{& caption }} | |
{{/ post_videos }} | |
{{& post_body }} | |
</article> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment