Skip to content

Instantly share code, notes, and snippets.

@tokiwatch
Last active July 6, 2016 07:41
Show Gist options
  • Save tokiwatch/e2c81e139e393ae388a6c690f4c2df27 to your computer and use it in GitHub Desktop.
Save tokiwatch/e2c81e139e393ae388a6c690f4c2df27 to your computer and use it in GitHub Desktop.
convert blogger backup file to octopress style entry files.
## -*- mode: perl; coding: utf-8 -*-
# blogger2octopress.pl
# Author: Takuji Tokiwa
#
# About:
# Export entries from blogger backup file to octpress format entries.
#
# Usage:
# 1. Install XML::Simpe, DateTime::Format::ISO8601 and HTML::WikiConverter::Markdown via cpan, cpanm or 'Carton Install'.
# 2. Change $blogger_fn as your backupfile.
# 3. Change $timezone as your location.
# 4. Run
# 5. copy exported files to octopress_dir/source/_posts .
use strict;
use XML::Simple;
use DateTime::Format::ISO8601;
use HTML::WikiConverter::Markdown;
# config
my $blogger_fn = 'blog.xml';
my $timezone = '+0900';
my $exp_dir = "./entries/";
my $xml = XML::Simple->new;
my $data = $xml->XMLin($blogger_fn) or die;
my $entries = $data->{entry} or die;
foreach my $id (keys %$entries) {
if ($id =~ /.*post.*/) {
my ($eid) = $id =~ /post-(\d+)$/;
my $published = $$entries{$id}->{published};
my $title = $$entries{$id}->{title}->{content};
my $author = $$entries{$id}->{author}->{name};
my $html = $$entries{$id}->{content}->{content};
my $categories = @$entries{$id}->{category};
my $categories_str;
if (ref($categories) eq 'ARRAY') {
foreach my $category_hash (@$categories) {
unless ($category_hash->{term} =~ /^http.*/) {
$categories_str = $categories_str . "'" . $category_hash->{term} . "'" . ", ";
}
}
$categories_str =~ s/, $//;
}
if ($categories_str) {
$categories_str = '[' . $categories_str . "]";
}
# gen date
my $dt = DateTime::Format::ISO8601->parse_datetime($published);
my $date = $dt->ymd('-') . ' ' . $dt->hms(':') . ' ' . $timezone;
# convert from HTML to Markdown
my $wc = HTML::WikiConverter->new(dialect => 'Markdown');
Encode::_utf8_off($html);
my $content = $wc->html2wiki($html);
Encode::_utf8_on($content);
$content =~ s/<br *\/*>//g;
$content =~ s/<>//g;
$content =~ s/\n+/\n\n/g;
# gen filename
my $fn = $dt->ymd('-') . '-' . $eid . ".markdown";
open my $fh, '>', $exp_dir . $fn or die;
binmode($fh, ":utf8");
# gen string
my $str
= "---\n"
. "layout: post\n"
. 'title: "'
. $title . "\"\n"
. 'date: '
. $date . "\n"
. "comments: false\n"
. 'categories: '
. $categories_str . "\n" . "---\n"
. $content;
print "filename: " . $fn . "\n";
# write to file
print $fh $str or die;
close $fh or die;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment