Skip to content

Instantly share code, notes, and snippets.

@redraiment
Created December 3, 2013 15:15
Show Gist options
  • Save redraiment/7770887 to your computer and use it in GitHub Desktop.
Save redraiment/7770887 to your computer and use it in GitHub Desktop.
iKnowledge 1.0 HTML格式自动转换成2.0 Markdown格式
#!/usr/bin/ruby
# -*- coding: utf-8 -*-
require 'cgi'
module Html
def self.tag(content)
n, a = content.split /\s+/, 2
{:tag => n}.update(attr: Hash[(a or '').scan(/(\w+)="([^"]*)"/)])
end
def self.yacc(token)
doc = []
loop do
e = token.next
break if e.start_with? '</'
if !e.start_with? '<'
doc << {
:tag => '#text',
:text => CGI.unescapeHTML(e)
.gsub("&ldquo;", "“")
.gsub("&rdquo;", "”")
.gsub("&nbsp;", " ")
.gsub("&hellip;", "…")
}
elsif e.end_with? '/>'
doc << tag(e[1..-3])
else
doc << tag(e[1..-2]).update(node: yacc(token))
end
end
doc
end
def self.lex(content)
content.split(/(?:(?=<)|(?<=>))/).to_enum
end
def self.parse(xml)
yacc lex xml
end
end
class Markdown
attr :date
def initialize(html)
@title = html[:attr]['title']
@timestamp = html[:node][1][:node][0][:text]
@date = @timestamp.split(/\s+/)[0]
@tags = html[:node][2..-1]
end
def front_matter
<<EOF
---
layout: article
title: #@title
date: #@timestamp
category:
excerpt:
---
EOF
end
def md(tags, prefix = "")
tags.map do |e|
case e[:tag]
when '#text'
e[:text].strip
when /h([1-6])/
'#' * $~[1].to_i + ' ' + md(e[:node]) + "\n\n"
when 'p'
(prefix == "" ? '' : ' ' * 4) + md(e[:node]) + "\n\n"
when 'a'
'[' + md(e[:node]) + '](' + e[:attr]['href'] + ')'
when 'span'
'[' + md(e[:node]) + '](' + e[:attr]['title'] + ')'
when 'i', 'b', 'strong'
'*' + md(e[:node]) + '*'
when 'pre'
content = md(e[:node])
if e[:node][0][:tag] == '#text'
content.gsub!(/^/m, ' ' * (prefix == "" ? 4 : 8))
end
content + "\n\n"
when 'blockquote'
content = md(e[:node]).rstrip
if e[:node][0][:tag] == '#text'
content.gsub!(/^/m, '> ')
end
content + "\n\n"
when 'code'
if e[:attr] && e[:attr].key?('language')
'```' + e[:attr]['language'].sub('shell', 'bash') + "\n" + md(e[:node]) + "\n```"
else
'`' + md(e[:node]) + '`'
end
when 'ol'
md(e[:node], prefix + '1. ') + "\n"
when 'ul'
md(e[:node], prefix + '+ ') + "\n"
when 'li'
prefix + md(e[:node], ' ' + prefix) + "\n"
when 'dl'
"<dl>\n" + md(e[:node], ' ') + "</dl>\n\n"
when 'dt', 'dd'
"#{prefix}<#{e[:tag]}>#{CGI.escapeHTML(md(e[:node]))}</#{e[:tag]}>\n"
when 'img'
'{% img ' + File.basename(e[:attr]['src']) + " %}\n\n"
when 'sub', 'sup'
"<#{e[:tag]}>#{CGI.escapeHTML(md(e[:node]))}</#{e[:tag]}>"
else
puts "Unknow: #{e}"
exit 1
end
end.join ''
end
def to_s
front_matter + md(@tags)
end
end
class IKnowledge
def initialize(filename)
@home = File.join("/", "Users", "redraiment", "Documents", "zzp.me")
html = Html.parse File.read filename
@md = Markdown.new html[0]
@filename = @md.date + '-' + File.basename(filename).sub('.html', '.md')
end
def save
fn = File.join(@home, "_posts", @filename)
File.open(fn, 'w') do |f|
f.write self.to_s
end
puts "save to #{fn}"
end
def to_s
@md.to_s
end
end
ikn = IKnowledge.new ARGV[0]
ikn.save
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment