Skip to content

Instantly share code, notes, and snippets.

@zohooo
Last active December 18, 2015 01:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zohooo/5704672 to your computer and use it in GitHub Desktop.
Save zohooo/5704672 to your computer and use it in GitHub Desktop.
migrate blogs from cnblogs to markdown
var feedparser = require('feedparser'),
async = require('async'),
moment = require('moment'),
tomd = require('to-markdown').toMarkdown,
fsextra = require('fs-extra');
var source = 'CNBlogs_BlogBackup.xml',
target = 'post';
function tidyHtml(html) {
return html.replace(/<br \/><\/a>/g, '</a><br />');
}
function joinYaml(head, mark) {
var mark = mark.replace(/ *\n\[YAML\] ([\w\W]+?)(\n|$)/, function(m, c1){
var ar = c1.replace(/&nbsp;/g, ' ').split(', '), key, value;
ar.forEach(function(item){
key = item.split(': ')[0];
value = item.slice(key.length + 2);
head[key.toLowerCase()] = value;
});
return '';
});
var yaml = '';
for (key in head) {
yaml += key + ': ' + head[key] + '\n'
}
return '---\n' + yaml + '---\n\n' + mark;
}
function tidyMark(mark) {
var re = /<div class="cnblogs_Highlighter">\n<pre class="brush:(\w+?);gutter:false;">([\w\W]+?)\n?<\/pre>\n<\/div>/g;
mark = mark.replace(/\r\n|\r/g, '\n')
.replace(re, function(m, c1, c2){
c2 = c2.replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&amp;/g, '&');
return '```' + c1 + '\n' + c2 + '\n```'
})
.replace(/(\d)\\./g, '$1.')
.replace(/\n\[/g, ' \n[');
return mark;
}
async.waterfall([
function(next){
feedparser.parseFile(source, next);
},
function(meta, posts, next){
async.forEach(posts, function(item, next){
var link = (item.origlink ? item.origlink : item.link).split('/').reverse()[0];
var date = moment(item.pubdate);
var name = target + '/' + date.format('YYYY-MM-DD') + '-' + link.split('.')[0] + '.md';
var mark = tomd(tidyHtml(item.description));
var head = {
title: item.title,
date: date.format('YYYY-MM-DD HH:mm:ss')
};
mark = tidyMark(joinYaml(head, mark));
fsextra.outputFile(name, mark, next);
}, function(){
next(null, posts.length);
});
}
], function(err, length){
if (err) throw err;
console.log('%d posts migrated.', length);
});
{
"name": "migrator-cnblogs",
"version": "0.0.1",
"description": "migrate blogs from cnblogs to markdown",
"dependencies": {
"async": "*",
"feedparser": "*",
"moment": "*",
"to-markdown": "*",
"fs-extra": "*"
}
}
@nicolastinkl
Copy link

nice

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment