Last active
August 23, 2016 02:40
-
-
Save darkdreamingdan/f0f523cc6782252654bb7a50fa897b99 to your computer and use it in GitHub Desktop.
Fix mojibaking and convert [lua] tags for MTA forum migration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import ftfy | |
import codecs | |
f = codecs.open('php3_db.sql', 'r', 'utf-8') | |
lineNo = 0 | |
for line in f: | |
outline = line | |
# php makes a mess of the encoding. So let's fix that | |
# Some of our earlier posts are encoded correctly...other's are mojibaked | |
outline = ftfy.fix_encoding(outline) | |
# We bludgeon our HTML in Lua code to make it more IPB friendly | |
if outline.count("[lua:") > 0: | |
codestart = -1 | |
codeend = -1 | |
for i in xrange(len(outline)): | |
char = outline[i] | |
if char == "[": | |
if outline[i:i+5] == "[lua:": | |
codestart = i+5 | |
elif codestart > 0 and outline[i:i+6] == "[/lua:": | |
codeend = i+6 | |
if codestart > 0 and codeend > 0 and codestart < codeend: | |
head = outline[:codestart] | |
code = outline[codestart:codeend] | |
tail = outline[codeend:] | |
code = code.replace("<div","<xxx") | |
code = code.replace("</div>","</xxx>") | |
code = code.replace("<li","<xx") | |
code = code.replace("</li>"," <br>") | |
code = code.replace("<ol","<xx") | |
code = code.replace("</ol>","</xx>") | |
outline = head + code + tail | |
codestart = -1 | |
codeend = -1 | |
# We convert lua to xml. IPB converts this into a generic code tag, stripping the formatting | |
# We also place a marker so we can revisit later | |
outline = outline.replace("[lua:","!LUACODE![xml:") | |
outline = outline.replace("[/lua:","[/xml:") | |
outline = outline.replace("[youtube:","[you_tube:") | |
outline = outline.replace("[/youtube:","[/you_tube:") | |
out = codecs.open('php3_db_out.sql', 'a', 'utf-8') | |
out.write(outline) | |
out.close() | |
lineNo += 1 | |
if (lineNo % 50) == 0: | |
print("Line " + str(lineNo) + " completed") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment