Skip to content

Instantly share code, notes, and snippets.

@remorse
Created March 9, 2017 16:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save remorse/7fbfd973dc828268a077f9258ea192dc to your computer and use it in GitHub Desktop.
Save remorse/7fbfd973dc828268a077f9258ea192dc to your computer and use it in GitHub Desktop.
Save tumblr posts
#!/usr/bin/env perl6
use v6;
sub croak ($msg) { note $msg; exit(1); } # because Perl 6 doesn't have the Perl 5 "\n" magic for die
# retrieve network data
use HTTP::UserAgent;
use JSON::Tiny;
# template
use Template::Mustache;
# transcode images into data urls
use MIME::Base64;
sub MAIN (Str :$url!, Str :$base-directory = './', Bool :$photo = False) {
# retrieve the data from the web and extract the common elements
my %tdata = get_tumblr_data($url);
my %pdata = get_common_data(%tdata);
# @TODO handle photo stuff here
# add the special elements for the particular post type
add_tumblr_data(%pdata<_type>, %pdata);
# create an "output" object(ish)
my %output = create_output_html(%pdata);
# turn all imgs into data URIs
convert_imgs(%output);
# save the output
save_post(%output, $base-directory);
}
# retrieve the JSON(ish) data for the post
sub get_tumblr_data ($url) {
my $data = HTTP::UserAgent.new.get($url ~ '?format=json');
unless ($data.is-success) {
croak("Error retrieving post: {$data.status-line}.");
}
unless ($data.content ~~ m:s/^var tumblr_api_read \= /) {
croak("Error retrieving data: doesn't appear to be a tumblr.");
}
# Tumblr actually returns a JavaScript snippet, which we want to turn into JSON
my $content = $data.content.chomp;
$content ~~ s:s/^var tumblr_api_read \= //;
$content ~~ s/\;$//;
my %json = from-json($content);
if (!%json<tumblelog>) {
croak("Malformed JSON data received.");
}
if (!%json<posts>) {
croak("Not enough posts received.");
}
if (%json<posts>:v.elems != 1) {
croak("Too many posts received ({%json<posts>:v.elems}).");
}
return %json;
}
# extract the data pieces that we will want for all post types
sub get_common_data (%tdata) {
my %t = %tdata<tumblelog>;
my %p = %tdata<posts>[0];
my %post = (
'_tumblr' => %t,
'_post' => %p,
'_type' => %p<type>,
'tumblr_key' => %t<name> || 'unknown',
'tumblr_title' => %t<title> || 'unknown',
'post_id' => %p<id>,
'post_slug' => %p<slug> || %p<type>,
'post_url' => %p<url-with-slug> || %p<url>,
'post_date' => %p<date-gmt>,
'post_tags' => {},
'post_body' => '',
);
# fix tags
if (%p<tags>) {
%post<post_tags><tags> = [ %p<tags>.list.map: { 'tag' => $_ } ];
}
return %post;
}
# generic handler for unknown post types
multi add_tumblr_data ($type, %pdata) {
croak("Unimplemented post type: {%pdata<_type>}.");
}
multi add_tumblr_data ($ where 'regular', %pdata) {
my %s = %pdata<_post>;
if (%s<regular-title>) { # note that I don't have to test defined and not empty!
%pdata<post_body> = "<h2>%s<regular-title></h2>\n\n";
}
%pdata<post_body> ~= %s<regular-body>;
}
multi add_tumblr_data ($ where 'photo', %pdata) {
my %s = %pdata<_post>;
%pdata<post_photos> = [];
if (!%s<photos>) {
%pdata<post_photos>.push: {
caption => %s<photo-caption>,
url => %s<photo-url-1280>,
};
}
else {
for |%s<photos> -> %ph {
%pdata<post_photos>.push: {
caption => %ph<caption>,
url => %ph<photo-url-1280>,
};
}
}
}
multi add_tumblr_data ($ where 'video', %pdata) {
croak("I'm not implementing video for this talk.");
}
# create a spec for saving the data to disk, rendering off the template
sub create_output_html (%pdata) {
my %output = (
'type' => 'html',
'dir' => %pdata<tumblr_key> ~ '/',
'file' => %pdata<post_id> ~ '--' ~ %pdata<post_slug> ~ '.pl6.html',
'data' => Template::Mustache.render($=finish, %pdata),
);
# @TODO downloads
return %output;
}
# change all img tag sources to use data URIs
sub convert_imgs (%output) {
my $ua = HTTP::UserAgent.new();
my $_convert_imgs_helper = sub ($url) {
my $img_resp = $ua.get("$url");
if ($img_resp.is-success) {
return "data:{$img_resp.content-type};base64," ~ MIME::Base64.encode($img_resp.content, :oneline);
}
else {
return "error: {$img_resp.response-line}";
}
}
%output<data> ~~ s:g/ <?after \<img .+? src\=\"> (<-["]>+) /{ $_convert_imgs_helper($0) }/;
}
# write the file to the disk, in the appropriate directory
sub save_post (%output, $base-directory) {
%output<dir> = $base-directory ~ %output<dir>;
if (not %output<dir>.IO.d) {
mkdir($base-directory ~ %output<dir>)
or croak("Could not create output directory %output<dir>: $_");
}
spurt %output<dir> ~ %output<file>, %output<data>
or croak("Could not write output file %output<dir>%output<file>: $_");
# @TODO downloads
}
=finish
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="origin" content="{{ post_url }}" />
<meta name="tumblr-type" content="{{ _type }}" />
{{# post_tags }}<meta name="keywords" content="tumblr{{# tags }}, {{ tag }}{{/ tags }}" />{{/ post_tags }}
<title>{{ tumblr_title }} — {{ post_id }} — {{ post_slug }}</title>
<!-- #bbinclude "../tumblr.css" -->
<style type="text/css">
body {
margin: 0;
padding: 0;
font-family: "Hoefler Text", serif;
}
article {
margin: 0;
padding: 0;
padding: 0.5rem;
background: #EEEEEE;
border: 0.25rem solid #DDDDDD;
}
article header {
margin: 0;
margin-bottom: 1rem;
padding: 0;
border-bottom: thin solid #BBBBBB;
}
article header h1 {
margin: 0;
padding: 0;
margin-top: 1rem;
margin-bottom: 0.25rem;
font-size: 1.2rem;
}
article header p.date {
margin: 0;
padding: 0;
margin-top: 0.5rem;
margin-bottom: 0.5rem;
font-size: 0.8rem;
}
article header p.tags {
margin: 0;
padding: 0;
margin-top: 0.5rem;
margin-bottom: 0.5rem;
font-size: 0.8rem;
font-style: italic;
}
article header p.tags span.tag {
padding-right: 1rem;
}
article header p.tags span.tag:before {
content: '# ';
color: gray;
}
article figure img {
max-width: 100%;
}
article blockquote {
margin-left: 1rem;
border-left: thin dashed #CCCCCC;
padding-left: 0.5rem;
margin-right: 0;
}
video {
max-width: 100%;
}
</style>
<!-- end bbinclude -->
<style type="text/css">
</style>
</head>
<body>
<article>
<header>
<h1><a href="{{ post_url }}">{{ tumblr_title }} — {{ post_id }} — {{ post_slug }}</a></h1>
<p class="date">{{ post_date }}</p>
{{# post_tags }}<p class="tags">{{# tags }}<span class="tag">{{ tag }}</span>{{/ tags }}</p>{{/ post_tags }}
</header>
{{# post_photos }}
<figure>
<a href="{{ url }}"><img src="{{ url }}" /></a>
<figcaption>{{& caption }}</figcaption>
</figure>
{{/ post_photos }}
{{# post_videos }}
<!-- {{& source }} -->
{{ controller }}
{{& caption }}
{{/ post_videos }}
{{& post_body }}
</article>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment