Skip to content

Instantly share code, notes, and snippets.

Created October 4, 2015 18:35
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
Quick & Dirty conversion of exported Wordpress posts into markdown
(ns word-parsos.core
(:require [clojure.xml]
[ :as zip]
[ :as zip-xml]
[clj-time.core :as t]
[clj-time.format :as f]
[cuerdas.core :as str]
[clojure.pprint :as pprint]
[clj-commons-exec :as exec])
(:import (
(java.util Locale)
( StringWriter)))
(def xml-file "dev-resources/wordpress.2015-09-23_posts.xml")
(def html-out "dev-resources/out/")
(def asc-out "dev-resources/md-out/posts/")
(defn parse-xml [s]
( (.getBytes (slurp s)))))
(def date-formatter (f/with-locale (f/formatters :rfc822) (Locale. "en")))
(def fname-formatter (f/formatter "yyyy-MM-dd"))
(defn parse-date [date-str]
(f/parse date-formatter date-str))
(defn title-as-fname [title]
(-> title
(defn create-filename [date title]
(str (f/unparse fname-formatter date) "-" (title-as-fname title) ))
(defn write-html [item]
(let [fname (str html-out (:filename item) ".html")]
(spit fname (:content item))
(defn convert-to-md [html-file out-file]
(println "convert " html-file " to " out-file)
(let [re @(exec/sh ["sh" "-c"
(str "cat " html-file " | "
"sed -e 's/http:\\/\\/\\/wp-content/\\/img/g' | "
"sed -e 's/http:\\/\\/\\/wp-content/\\/img/g' | "
"pandoc -f html -t markdown_github" " > " out-file
(println "Result " (pr-str re)))
(defn add-header-info [filename item]
(let [pre (str
:title \"" (:title item) "\"
:layout :post
:banner \"/img/home-bg.jpg\"
:hide-disqus? true
:tags " (:category item) "
}\n\n" )
content (slurp filename)
out-str (str pre content)]
(spit filename out-str)))
(defn item->map [item]
(let [m
{:title (zip-xml/xml1-> item :title zip-xml/text)
:date (parse-date (zip-xml/xml1-> item :pubDate zip-xml/text))
;:date (Long/valueOf (zip-xml/attr item :date))
:category (vec (zip-xml/xml-> item :category (zip-xml/attr :nicename)))
:content (zip-xml/xml1-> item (keyword "content:encoded") zip-xml/text)
;:segments (mapv segment->map
; (zip-xml/xml-> item :segments :segment))
(merge m {
:filename (create-filename (:date m) (:title m))
(defn -main []
(let [xml (parse-xml xml-file)
root (zip/xml-zip xml)
items (mapv item->map (zip-xml/xml-> root :channel :item))]
(println "Converting " (count items) " posts.")
(doseq [item items]
(let [html-file (write-html item)
md-file (convert-to-md html-file (str asc-out (:filename item) ".md"))]
(add-header-info md-file item)))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment