Skip to content

Instantly share code, notes, and snippets.

@thekad
Created April 5, 2011 22:55
Show Gist options
  • Save thekad/904757 to your computer and use it in GitHub Desktop.
Save thekad/904757 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import argparse
import os
import MySQLdb
import re
import sys
import unicodedata
import yaml
class Dumper(object):
prefix = ''
posts = {}
cursor = None
def __init__(self, user='root', host='localhost', password='', port=3306, database='jaws', prefix='jaws_'):
self.prefix = prefix
try:
conn = MySQLdb.connect(host=host, user=user, passwd=password, db=database, port=port)
conn.set_character_set('utf8')
self.cursor = conn.cursor()
sql = 'SET NAMES utf8;'
self.cursor.execute(sql)
sql = 'SET CHARACTER SET utf8;'
self.cursor.execute(sql)
sql = 'SET character_set_connection=utf8;'
self.cursor.execute(sql)
sql = 'SELECT COUNT(*) FROM %(prefix)susers' % { 'prefix': self.prefix }
self.cursor.execute(sql)
except Exception as e:
print str(e)
sys.exit(1)
def get_tags(self, post_id):
sql = (
'SELECT cat.name '
'FROM %(prefix)sblog_category AS cat, %(prefix)sblog_entrycat AS entrycat '
'WHERE entrycat.category_id = cat.id AND entrycat.entry_id = %(post)s' % { 'prefix': self.prefix, 'post': post_id }
)
try:
self.cursor.execute(sql)
except Exception as e:
print str(e)
sys.exit(3)
cats = []
for cat, in self.cursor.fetchall():
cats.append(cat.lower())
self.posts[post_id]['tags'] = cats
def get_posts(self):
sql = (
'SELECT blog.id, blog.title, user.username, blog.text, blog.createtime '
'FROM %(prefix)sblog AS blog, %(prefix)susers as user '
'WHERE blog.user_id = user.id '
'AND published = 1' % { 'prefix': self.prefix }
)
try:
self.cursor.execute(sql)
except Exception as e:
print str(e)
sys.exit(2)
url1 = re.compile(r'\[url=[\'|\"](?P<url>.*?)[\'|\"]\](?P<text>.*?)\[\/url\]', re.U)
url2 = re.compile(r'\[url\](?P<url>.*?)\[\/url\]', re.U)
code = re.compile(r'\[code.*?\](?P<code>.*?)\[\/code\]', re.U | re.S)
for id, title, author, text, createtime in self.cursor.fetchall():
text = text.replace('[friend]', '').replace('[/friend]', '')
text = text.replace('[term]', '`').replace('[/term]', '`')
text = text.replace('\r\n', '\n')
text = url1.sub(r'[\g<text>](\g<url>)', text)
text = url2.sub(r'(\g<url>)', text)
text = code.sub(r'\n<pre>\n<code>\g<code></code>\n</pre>\n', text, count=0)
#text = text.decode('unicode-escape')
#text = unicode(text.decode('string-escape'))
#text = unicodedata.normalize('NFKD', unicode(text))
#print text
row = { 'title': title, 'body': text, 'createtime': createtime, 'author': author }
self.posts[id] = row
def dump(self):
self.get_posts()
for id in self.posts.keys():
self.get_tags(id)
print yaml.safe_dump(self.posts, default_flow_style=False, encoding='utf-8', explicit_start=True)
def main(argv):
ap = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
ap.add_argument('-H', '--host', help='MySQL host', default='localhost')
ap.add_argument('-u', '--user', help='MySQL user', default=os.environ['USER'])
ap.add_argument('-p', '--password', help='MySQL password', default='')
ap.add_argument('-d', '--database', help='MySQL database', default='jaws')
ap.add_argument('-r', '--prefix', help='Jaws prefix', default='')
a = ap.parse_args(argv)
d = Dumper(user=a.user, password=a.password, database=a.database, host=a.host, prefix=a.prefix)
d.dump()
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment