Skip to content

Instantly share code, notes, and snippets.

@mkmark
Created February 10, 2021 17:27
Show Gist options
  • Save mkmark/d537af5a59236af8a316c5b37e1bc8f7 to your computer and use it in GitHub Desktop.
Save mkmark/d537af5a59236af8a316c5b37e1bc8f7 to your computer and use it in GitHub Desktop.
wordpress-markdown-exporter
#!/usr/bin/env python3
# variables
# table prefix default to 'wp_'
table_prefix = 'wp_'
# wp_config.php abosolute path
wp_config_path = r'/home/www/default/wp-config.php'
# export markdown file path, make sure it exists
export_path = r'/home/www/'
import mysql.connector
import re
import html
from wpconfigr import WpConfigFile
# connect database
wp_config = WpConfigFile(wp_config_path)
con = mysql.connector.connect(
host=wp_config.get('DB_HOST'),
user=wp_config.get('DB_USER'),
password=wp_config.get('DB_PASSWORD'),
database=wp_config.get('DB_NAME')
)
wp_posts = table_prefix + 'posts'
wp_terms = table_prefix + 'terms'
wp_term_taxonomy = table_prefix + 'term_taxonomy'
wp_term_relationships = table_prefix + 'term_relationships'
wp_users = table_prefix + 'users'
# posts info
"""
SELECT ID, post_author, post_date_gmt, post_content, post_title, post_content_filtered, post_type, post_password, post_status, comment_status
FROM wp_posts
WHERE post_type = 'post'
AND post_title <> 'Auto Draft';
"""
cur = con.cursor()
cur.execute("\
SELECT ID, post_author, post_date_gmt, post_content, post_title, post_content_filtered, post_type, post_password, post_status, comment_status \
FROM " + wp_posts + " \
WHERE post_type = 'post' \
AND post_title <> 'Auto Draft'; \
")
postsd = {}
columns = tuple( [d[0] for d in cur.description] )
for row in cur:
postsd[row[0]]=(dict(zip(columns, row)))
# terms info
"""
SELECT wp_terms.name, wp_term_taxonomy.taxonomy
FROM wp_posts
LEFT OUTER JOIN wp_term_relationships
ON wp_posts.ID = wp_term_relationships.object_id
LEFT OUTER JOIN wp_term_taxonomy
ON wp_term_relationships.term_taxonomy_id = wp_term_taxonomy.term_taxonomy_id
LEFT OUTER JOIN wp_terms
ON wp_term_taxonomy.term_id = wp_terms.term_id
WHERE ID = '';
"""
# author info
"""
SELECT display_name
FROM wp_users
WHERE ID = '';
"""
for ID in postsd:
cur.execute("\
SELECT " + wp_terms + ".name, " + wp_term_taxonomy + ".taxonomy \
FROM " + wp_posts + " \
LEFT OUTER JOIN " + wp_term_relationships + " \
ON " + wp_posts + ".ID = " + wp_term_relationships + ".object_id \
LEFT OUTER JOIN " + wp_term_taxonomy + " \
ON " + wp_term_relationships + ".term_taxonomy_id = " + wp_term_taxonomy + ".term_taxonomy_id \
LEFT OUTER JOIN " + wp_terms + " \
ON " + wp_term_taxonomy + ".term_id = " + wp_terms + ".term_id \
WHERE ID = " + str(ID) + " \
")
postsd[ID]['categories'] = []
postsd[ID]['tags'] = []
for row in cur:
if row[1] == 'category':
postsd[ID]['categories'].append(row[0])
if row[1] == 'post_tag':
postsd[ID]['tags'].append(row[0])
cur.execute("\
SELECT display_name \
FROM " + wp_users + " \
WHERE ID = " + str(postsd[ID]['post_author']) + " \
")
for row in cur:
postsd[ID]['author'] = row[0]
def make_title_path_valid(_str):
_str = re.sub(r'[\/]', '-', _str)
_str = re.sub(r'[\\]', '-', _str)
_str = re.sub(r'[\"]', '-', _str)
_str = re.sub(r'[\:]', '-', _str)
_str = re.sub(r'[\*]', '-', _str)
_str = re.sub(r'[\?]', '-', _str)
_str = re.sub(r'[\<]', '-', _str)
_str = re.sub(r'[\>]', '-', _str)
_str = re.sub(r'[\|]', '-', _str)
_str = re.sub(r'[\s]', '-', _str)
_str = _str.lower()
return _str
def make_title_md_valid(_str):
_str = r'"' + _str + r'"'
return _str
for ID in postsd:
file_name = postsd[ID]['post_date_gmt'].strftime("%Y-%m-%d-") + make_title_path_valid(postsd[ID]['post_title']) + '.md'
file_path = export_path + file_name
with open(file_path, 'w', encoding='utf-8', errors='ignore') as md_file:
file_content = '---\n'
file_content = file_content + 'layout: post\n'
file_content = file_content + 'title: ' + make_title_md_valid(postsd[ID]['post_title']) + '\n'
file_content = file_content + 'date: ' + postsd[ID]['post_date_gmt'].strftime("%Y-%m-%d %H:%M") + '\n'
file_content = file_content + 'author: ' + postsd[ID]['author'] + '\n'
file_content = file_content + 'comments: ' + ('true' if postsd[ID]['comment_status'] == 'open' else 'false') + '\n'
file_content = file_content + 'categories: ' + str(postsd[ID]['categories']) + '\n'
file_content = file_content + 'tags: ' + str(postsd[ID]['tags']) + '\n'
file_content = file_content + 'published: ' + ('true' if (postsd[ID]['post_status'] == 'publish') and (postsd[ID]['post_password'] == '') else 'false') + '\n'
file_content = file_content + '---\n'
if postsd[ID]['post_content_filtered'] != '':
file_content = file_content + html.unescape(postsd[ID]['post_content_filtered']) + '\n'
else:
file_content = file_content + html.unescape(postsd[ID]['post_content']) + '\n'
md_file.write(file_content)
con.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment