Created
April 5, 2011 22:55
-
-
Save thekad/904757 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import os | |
import MySQLdb | |
import re | |
import sys | |
import unicodedata | |
import yaml | |
class Dumper(object): | |
prefix = '' | |
posts = {} | |
cursor = None | |
def __init__(self, user='root', host='localhost', password='', port=3306, database='jaws', prefix='jaws_'): | |
self.prefix = prefix | |
try: | |
conn = MySQLdb.connect(host=host, user=user, passwd=password, db=database, port=port) | |
conn.set_character_set('utf8') | |
self.cursor = conn.cursor() | |
sql = 'SET NAMES utf8;' | |
self.cursor.execute(sql) | |
sql = 'SET CHARACTER SET utf8;' | |
self.cursor.execute(sql) | |
sql = 'SET character_set_connection=utf8;' | |
self.cursor.execute(sql) | |
sql = 'SELECT COUNT(*) FROM %(prefix)susers' % { 'prefix': self.prefix } | |
self.cursor.execute(sql) | |
except Exception as e: | |
print str(e) | |
sys.exit(1) | |
def get_tags(self, post_id): | |
sql = ( | |
'SELECT cat.name ' | |
'FROM %(prefix)sblog_category AS cat, %(prefix)sblog_entrycat AS entrycat ' | |
'WHERE entrycat.category_id = cat.id AND entrycat.entry_id = %(post)s' % { 'prefix': self.prefix, 'post': post_id } | |
) | |
try: | |
self.cursor.execute(sql) | |
except Exception as e: | |
print str(e) | |
sys.exit(3) | |
cats = [] | |
for cat, in self.cursor.fetchall(): | |
cats.append(cat.lower()) | |
self.posts[post_id]['tags'] = cats | |
def get_posts(self): | |
sql = ( | |
'SELECT blog.id, blog.title, user.username, blog.text, blog.createtime ' | |
'FROM %(prefix)sblog AS blog, %(prefix)susers as user ' | |
'WHERE blog.user_id = user.id ' | |
'AND published = 1' % { 'prefix': self.prefix } | |
) | |
try: | |
self.cursor.execute(sql) | |
except Exception as e: | |
print str(e) | |
sys.exit(2) | |
url1 = re.compile(r'\[url=[\'|\"](?P<url>.*?)[\'|\"]\](?P<text>.*?)\[\/url\]', re.U) | |
url2 = re.compile(r'\[url\](?P<url>.*?)\[\/url\]', re.U) | |
code = re.compile(r'\[code.*?\](?P<code>.*?)\[\/code\]', re.U | re.S) | |
for id, title, author, text, createtime in self.cursor.fetchall(): | |
text = text.replace('[friend]', '').replace('[/friend]', '') | |
text = text.replace('[term]', '`').replace('[/term]', '`') | |
text = text.replace('\r\n', '\n') | |
text = url1.sub(r'[\g<text>](\g<url>)', text) | |
text = url2.sub(r'(\g<url>)', text) | |
text = code.sub(r'\n<pre>\n<code>\g<code></code>\n</pre>\n', text, count=0) | |
#text = text.decode('unicode-escape') | |
#text = unicode(text.decode('string-escape')) | |
#text = unicodedata.normalize('NFKD', unicode(text)) | |
#print text | |
row = { 'title': title, 'body': text, 'createtime': createtime, 'author': author } | |
self.posts[id] = row | |
def dump(self): | |
self.get_posts() | |
for id in self.posts.keys(): | |
self.get_tags(id) | |
print yaml.safe_dump(self.posts, default_flow_style=False, encoding='utf-8', explicit_start=True) | |
def main(argv): | |
ap = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
ap.add_argument('-H', '--host', help='MySQL host', default='localhost') | |
ap.add_argument('-u', '--user', help='MySQL user', default=os.environ['USER']) | |
ap.add_argument('-p', '--password', help='MySQL password', default='') | |
ap.add_argument('-d', '--database', help='MySQL database', default='jaws') | |
ap.add_argument('-r', '--prefix', help='Jaws prefix', default='') | |
a = ap.parse_args(argv) | |
d = Dumper(user=a.user, password=a.password, database=a.database, host=a.host, prefix=a.prefix) | |
d.dump() | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv[1:])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment