Skip to content

Instantly share code, notes, and snippets.

@lesthack
Last active January 31, 2018 23:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lesthack/cf09452a8063e21868b1380227e9cd62 to your computer and use it in GitHub Desktop.
Save lesthack/cf09452a8063e21868b1380227e9cd62 to your computer and use it in GitHub Desktop.
from BeautifulSoup import BeautifulSoup as bs
import sqlite3
import os
import sys
import json
reload(sys)
sys.setdefaultencoding('utf-8')
base = os.path.dirname(os.path.realpath(__file__))
cnn = sqlite3.connect('ht.sqlite')
cursor = cnn.cursor()
query_years = '''
SELECT
strftime('%Y', post_date)
FROM (
SELECT
post_date,
post_name,
post_title,
post_content
FROM wp_posts
WHERE
post_status = 'publish'
AND post_type = 'post'
) tmp
GROUP BY strftime('%Y', post_date)
'''
cursor.execute(query_years)
data_years = cursor.fetchall()
data = {}
for year in data_years:
year_path = os.path.join(base,year[0])
#data = {}
if not os.path.isdir(year_path):
os.mkdir(year_path)
print 'Create {} directory'.format(year[0])
query_year = '''
SELECT
post_date,
post_name,
post_title,
post_content
FROM wp_posts
WHERE
post_status = 'publish'
AND post_type = 'post'
AND strftime('%Y', post_date) = '{}'
ORDER BY post_date ASC
'''.format(year[0])
cursor.execute(query_year)
data_posts = cursor.fetchall()
for post in data_posts:
post_date = post[0]
post_name = post[1]
post_title = post[2]
post_content = post[3].replace('\\n','')
post_path = os.path.join(year_path, post_name + '.md')
try:
print 'Creating file:', year[0], post_name,
soup = bs(post_content)
post_html = soup.prettify()
post_file = open(post_path, 'w')
post_file.write(post_html)
post_file.close()
data[post_name] = {
"title": post_title,
"date": post_date[0:10],
"year": post_date[0:4],
"description": ""
}
print '[ok]'
except Exception as e:
print '[fail] = ', e
data_file = open(os.path.join(base,'_data.json'), 'w')
data_file.write(json.dumps(data, sort_keys=True, indent=4, separators=(',',': ')))
data_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment