Skip to content

Instantly share code, notes, and snippets.

@satooshi
Created February 28, 2013 14:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save satooshi/5057260 to your computer and use it in GitHub Desktop.
Save satooshi/5057260 to your computer and use it in GitHub Desktop.
Convert movable type text exported from hatena diary to markdown.
<?php
// decoder
function decodePost($data)
{
$lines = explode("\n", $data);
$readMeta = true;
$post = array();
$body = array();
$meta = null;
foreach ($lines as $line) {
if (preg_match('/^-----$/', $line, $matches)) {
$readMeta = true;
} elseif ($readMeta && preg_match('/^(?P<meta>[\w\s]+?)\:$/', $line, $matches)) {
$readMeta = false;
$meta = strtolower($matches['meta']);
} elseif ($readMeta && preg_match('/^(?P<meta>[\w\s]+?)\: (?P<value>.*)$/', $line, $matches)) {
$key = strtolower($matches['meta']);
$value = $matches['value'];
if (array_key_exists($key, $post)) {
if (is_array($post[$key])) {
$post[$key][] = $value;
} else {
$old = $post[$key];
$post[$key] = array($old, $value);
}
} else {
$post[$key] = $value;
}
} elseif ($meta === 'body') {
$body[] = $line;
}
}
$post['body'] = implode("\n", $body);
return $post;
}
function getPosts($path)
{
$contents = file_get_contents($path);
$postContents = preg_split("/\n--------\n/", $contents);
$posts = array();
foreach ($postContents as $data) {
$post = getPost($data);
if ($post !== null) {
$posts[] = $post;
}
}
return $posts;
}
function getPost($data)
{
$data = trim($data);
if (empty($data)) {
return null;
}
$post = decodePost($data);
// post process
// category
if (array_key_exists('category', $post)) {
$post['category'] = (array)$post['category'];
} else {
$post['category'] = array();
}
// to lower case
$post['category'] = categoryToLower($post['category']);
// date time
// "11/23/2009 00:09:52 PM"
if (isset($post['date'])) {
$post['datetime'] = \DateTime::createFromFormat('m/d/Y h:i:s A', $post['date']);
}
return $post;
}
function categoryToLower(array $categories)
{
$lowers = array();
foreach ($categories as $category) {
$lowers[] = strtolower($category);
}
return $lowers;
}
// converter
function toMarkdown(array $post)
{
$meta = toMetaMarkdown($post);
$dir = '_tmp';
if (!is_dir($dir)) {
mkdir($dir);
}
$body = toMarkdownBody($dir, $post['body']);
return sprintf("%s\n%s", $meta, $body);
}
function toMetaMarkdown(array $post)
{
$template = "---
layout: post
title: %s
date: %s
comments: false
categories: %s
published: false
---
";
if (!empty($post['category'])) {
$categories = sprintf('[%s]', implode(', ', $post['category']));
} else {
$categories = '';
}
return sprintf($template, $post['title'], $post['datetime']->format('Y-m-d H:i'), $categories);
}
function toMarkdownBody($dir, $body)
{
$bodyHtml = 'body.html';
$bodyPath = $dir . "/" . $bodyHtml;
file_put_contents($bodyPath, $body);
$filename = 'body.markdown';
$bodyMarkdownPath = $dir . "/" . $filename;
$cmd = sprintf('cd %s; pandoc -f html -t markdown %s -o %s', $dir, $bodyHtml, $filename);
exec($cmd, $output, $returnCode);
unlink($bodyPath);
if ($returnCode !== 0 || !empty($output)) {
throw new \RuntimeException('pandoc failure.');
}
$body = file_get_contents($bodyMarkdownPath);
unlink($bodyMarkdownPath);
return $body;
}
// dumper
function dump($dir, array $post)
{
$markdown = toMarkdown($post);
$filename = sprintf('%s-%s.markdown', $post['datetime']->format('Y-m-d'), $post['datetime']->format('YmdHis'));
$path = $dir . '/' . $filename;
file_put_contents($path, $markdown);
}
function dumpAll($dir, array $posts)
{
foreach ($posts as $post) {
dump($dir, $post);
}
}
// run
$dir = '_posts';
if (!is_dir($dir)) {
mkdir($dir);
}
$file = 'movable_type.txt';
$posts = getPosts($file);
dumpAll($dir, $posts);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment