Skip to content

Instantly share code, notes, and snippets.

@darkdreamingdan
Last active August 23, 2016 02:40
Show Gist options
  • Save darkdreamingdan/f0f523cc6782252654bb7a50fa897b99 to your computer and use it in GitHub Desktop.
Save darkdreamingdan/f0f523cc6782252654bb7a50fa897b99 to your computer and use it in GitHub Desktop.
Fix mojibaking and convert [lua] tags for MTA forum migration
#!/usr/bin/python
# -*- coding: utf-8 -*-
import ftfy
import codecs
f = codecs.open('php3_db.sql', 'r', 'utf-8')
lineNo = 0
for line in f:
outline = line
# php makes a mess of the encoding. So let's fix that
# Some of our earlier posts are encoded correctly...other's are mojibaked
outline = ftfy.fix_encoding(outline)
# We bludgeon our HTML in Lua code to make it more IPB friendly
if outline.count("[lua:") > 0:
codestart = -1
codeend = -1
for i in xrange(len(outline)):
char = outline[i]
if char == "[":
if outline[i:i+5] == "[lua:":
codestart = i+5
elif codestart > 0 and outline[i:i+6] == "[/lua:":
codeend = i+6
if codestart > 0 and codeend > 0 and codestart < codeend:
head = outline[:codestart]
code = outline[codestart:codeend]
tail = outline[codeend:]
code = code.replace("<div","<xxx")
code = code.replace("</div>","</xxx>")
code = code.replace("<li","<xx")
code = code.replace("</li>"," <br>")
code = code.replace("<ol","<xx")
code = code.replace("</ol>","</xx>")
outline = head + code + tail
codestart = -1
codeend = -1
# We convert lua to xml. IPB converts this into a generic code tag, stripping the formatting
# We also place a marker so we can revisit later
outline = outline.replace("[lua:","!LUACODE![xml:")
outline = outline.replace("[/lua:","[/xml:")
outline = outline.replace("[youtube:","[you_tube:")
outline = outline.replace("[/youtube:","[/you_tube:")
out = codecs.open('php3_db_out.sql', 'a', 'utf-8')
out.write(outline)
out.close()
lineNo += 1
if (lineNo % 50) == 0:
print("Line " + str(lineNo) + " completed")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment