Last active
August 29, 2015 14:18
-
-
Save hughgrigg/4c2449ebb15ecbf22e92 to your computer and use it in GitHub Desktop.
Jekyll WordPress import post processing gulpfile for Hugo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is a throwaway / single-use gulpfile I used to process and tidy up the | |
// files resulting from using Jekyll's WordPress exporter. I wanted to move from | |
// WordPress to Hugo with a relatively large number of posts; this gulpfile let | |
// me quickly process all the content files and get on with building the site. | |
var | |
gulp = require('gulp'), | |
replace = require('gulp-replace'), | |
rename = require('gulp-rename'), | |
entities = require('gulp-html-entities'), | |
filter = require('gulp-filter'), | |
chalk = require('chalk'), | |
through = require('through-gulp'); | |
function fileName(file) { | |
var pathParts = file.path.split('/'); | |
return pathParts[pathParts.length - 1]; | |
} | |
gulp.task('posts', function() { | |
return gulp.src('./_posts/*.markdown') | |
// get rid of junk files and warn about short ones | |
.pipe(filter(function(file) { | |
if (file.contents.length < 2000) { | |
var name = fileName(file); | |
if (file.contents.length < 1500) { | |
console.log(chalk.red( | |
'Removed junk file ' + name +' (' + file.contents.length + ' chars)' | |
)); | |
return false; | |
} else { | |
console.log(chalk.yellow( | |
name + ' is quite short (' + file.contents.length + ' chars)' | |
)); | |
} | |
} | |
return true; | |
})) | |
// organise into year-month folders | |
.pipe(rename(function(path) { | |
var year, month; | |
year = path.basename.slice(0,4); | |
month = path.basename.slice(5,7); | |
path.dirname += '/' + year + '/' + month + '/'; | |
path.extname = '.md'; | |
})) | |
// save slug in front matter | |
.pipe(through.map(function(file) { | |
var name = fileName(file), | |
slug = name.slice(11, name.length - 3); | |
file.contents = new Buffer(file.contents.toString().replace( | |
'---\nlayout: post\n', | |
'---\nlayout: post\nslug: ' + slug + '\n' | |
)); | |
return file; | |
})) | |
// tidy up front matter | |
.pipe(replace(/(wordpress_id|wordpress_url|author_login|author_email|author_url): .*?\n/g, '')) | |
.pipe(replace(/\npublished: .*\n/, '\n')) | |
.pipe(replace(/\nstatus: .*\n/, '\n')) | |
.pipe(replace(/author:\n(.*?\n){4}/, 'author: Hugh Grigg\n')) | |
.pipe(replace(/.*?: !binary \|-\n .*?\n/g, '')) | |
.pipe(replace(/\n(title|excerpt):[\s!]+(.*)\n/g, '\n$1: >\n $2\n')) | |
.pipe(replace(/\ntitle: >\n '(.*?)'/, '\ntitle: >\n $1')) | |
.pipe(replace(/\nexcerpt:/, '\ndescription:')) | |
// decode wordpress' html entity encoding | |
.pipe(entities('decode')) | |
// fix urls | |
.pipe(replace('http://eastasiastudent.net/wp-content/uploads/', '/img/')) | |
.pipe(replace('http://eastasiastudent.net/', '/')) | |
// apply some html -> markdown replacements | |
.pipe(replace(/<br ?\/?>\n/g, '\n')) | |
.pipe(replace(/<a href="(.*?)">(.*?)<\/a>/g, '[$2]($1)')) | |
.pipe(replace(/<a href="(.*?)" title="(.*?)">(.*?)<\/a>/g, '[$3]($1 "$2")')) | |
.pipe(replace(/<a title="(.*?)" href="(.*?)">(.*?)<\/a>/g, '[$3]($2 "$1")')) | |
.pipe(replace(/<em>(.*?)<\/em>/g, '_$1_')) | |
.pipe(replace(/<strong>(.*?)<\/strong>/g, '*$1*')) | |
.pipe(replace(/<h([0-9])>(.*?)<\/h([0-9])>/g, '\n<h$1>$2</h$3>\n')) | |
.pipe(replace(/<(ul|ol|p|h[0-9]|blockquote)/g, '\n<$1')) | |
.pipe(replace(/<\/(ul|ol|p|h[0-9]|blockquote)>/g, '\n</$1>\n')) | |
.pipe(replace(/<h1>([\s\S]*?)<\/h1>/g, '# $1')) | |
.pipe(replace(/<h2>([\s\S]*?)<\/h2>/g, '## $1')) | |
.pipe(replace(/<h3>([\s\S]*?)<\/h3>/g, '### $1')) | |
.pipe(replace(/<h4>([\s\S]*?)<\/h4>/g, '#### $1')) | |
.pipe(replace(/<h5>([\s\S]*?)<\/h5>/g, '##### $1')) | |
.pipe(replace(/<h6>([\s\S]*?)<\/h6>/g, '###### $1')) | |
.pipe(replace(/<p( style=".*?")?>([\s\S]*?)<\/p>/g, '\n$2\n')) | |
.pipe(replace(/<li>(.*?)<\/li>/g, ' - $1')) | |
.pipe(replace(/<\/?[ou]l( style=.*?)?>/g, '')) | |
.pipe(replace(/\n\s?<\/p>\n?/g, '')) | |
.pipe(replace(/\n\s+\n/g, '\n\n')) | |
// apply some custom hugo tags | |
.pipe(replace(/<span class="chinese"( lang="zh")?>(.*?)<\/span>/g, '{{< hanzi >}}$2{{< /hanzi >}}')) | |
.pipe(replace(/<span class="hanzi"( lang="zh")?>(.*?)<\/span>/g, '{{< hanzi >}}$2{{< /hanzi >}}')) | |
.pipe(replace(/<span class="reading">(.*?)<\/span>/g, '{{< reading >}}$1{{< /reading >}}')) | |
.pipe(replace(/<span class="pinyin">(.*?)<\/span>/g, '{{< reading >}}$1{{< /reading >}}')) | |
.pipe(replace(/<span class="gloss">(.*?)<\/span>/g, '{{< gloss >}}$1{{< /gloss >}}')) | |
.pipe(replace(/<span class="note">(.*?)<\/span>/g, '{{< note >}}$1{{< /note >}}')) | |
.pipe(replace(/<span style="font-size: smaller;">(.*?)<\/span>/g, '{{< note >}}$1{{< /note >}}')) | |
.pipe(replace(/<p style="font-size: smaller;">(.*?)<\/p>/g, '\n{{< note >}}$1{{< /note >}}\n')) | |
.pipe(replace(/<span style="color: #888888;">(.*?)<\/span>/g, '{{< note >}}$1{{< /note >}}')) | |
.pipe(replace(/<p style="font-size: larger;">(.*?)<\/p>/g, '{{< larger >}}$1{{< /larger >}}')) | |
// tidy up some leftovers | |
.pipe(replace(/<p style="font-size: larger;">/, '')) | |
.pipe(replace(/<span style="line-height: ?(.*?);?">(.*?)<\/span>/g, '$2')) | |
.pipe(replace(/ ?wp-image-[0-9]+/g, '')) | |
.pipe(replace(/ (width|height)="[0-9]+"/g, '')) | |
.pipe(replace(/\n\n\n/g, '\n\n')) | |
.pipe(replace(/\n\n$/, '\n')) | |
// write | |
.pipe(gulp.dest('./processed/')); | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "post-process-jekyll-wordpress", | |
"version": "0.0.0", | |
"description": "", | |
"main": "gulpfile.js", | |
"author": "", | |
"license": "BSD-2-Clause", | |
"devDependencies": { | |
"gulp-rename": "~1.2.2", | |
"gulp-replace": "~0.5.3", | |
"gulp": "~3.8.11", | |
"gulp-html-entities": "0.0.3", | |
"gulp-filter": "~2.0.2", | |
"chalk": "~1.0.0", | |
"through-gulp": "~0.3.8" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment