Skip to content

Instantly share code, notes, and snippets.

@darkdreamingdan
Last active August 29, 2016 15:37
Show Gist options
  • Save darkdreamingdan/d9ec9e6ad38dcac661925842e3bea96f to your computer and use it in GitHub Desktop.
Save darkdreamingdan/d9ec9e6ad38dcac661925842e3bea96f to your computer and use it in GitHub Desktop.
IPB Post Process: Find and replace for MTA forum migration
#!/usr/bin/python
# -*- coding: utf-8 -*-
import codecs
import re
f = codecs.open('ips_posts.sql', 'r', 'utf-8')
lineNo = 0
_ESCAPE_RE = re.compile('[&<>"]')
_ESCAPE_DICT = {'&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;'}
def bbcode_escape(value):
return _ESCAPE_RE.sub(lambda match: _ESCAPE_DICT[match.group(0)], value)
def sql_escape(str):
return str.replace("\\","\\\\").replace('"','\\"').replace("'","\\'")
def sql_unescape(str):
return str.replace("\\'","'").replace('\\"','"').replace("\\\\","\\")
yt_re = re.compile("\[you_tube\](.+?)\[/you_tube\]")
def yt_repl(m):
content = bbcode_escape(m.group(1)).strip()
if ( m.group(1).find("http") != -1 ): #Maybe they put the full url incorrectly
return m.group(1)
else:
return sql_escape('<div style="max-width:640px;margin:0 auto; padding:5px;"><div style="position: relative;padding-bottom: 56.25%; height: 0; overflow: hidden;"><iframe width="640" height="385" frameborder="0" allowfullscreen="" src="http://www.youtube.com/embed/'+content+'?showinfo=0" style="position: absolute; top: 0px; left: 0px; width: 100%; height: 100%; max-width: 640px; max-height: 385px;"></iframe></div></div>')
progress_re = re.compile("\[progress\](.+?)\[\/progress\]")
def progress_repl(m):
content = bbcode_escape(m.group(1)).strip()
return sql_escape('<progress max="100" value="'+content+'"></progress>')
wiki_re = re.compile("\[wiki\](.+?)\[\/wiki\]")
def wiki_repl(m):
content = bbcode_escape(m.group(1)).strip()
return sql_escape('<a class="postlink" href="http://wiki.multitheftauto.com/wiki/'+content+'">'+content+'</a>')
imageleft_re = re.compile("\[imageleft\](.+?)\[\/imageleft\]")
url_re = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
def imageleft_repl(m):
#IPB may have already formated this into an HTML image or HTML url, so let's grab the raw URL
content = sql_unescape(m.group(1)) #Convert from SQL string to HTML for searching
mURL = url_re.search(content)
if mURL:
return sql_escape('<img src="'+mURL.group(0)+'" style="float:left;padding-right: 5px;" />' )
return m.group(1)
divbox_re = re.compile("\[divbox=(.+?)\](.+?)\[\/divbox\]")
def divbox_repl(m):
color = bbcode_escape(m.group(1)).strip()
content = m.group(2)
# Don't escape the content - we want to preserve IPB's SQL HTML here
return sql_escape('<div style="background-color:'+color+'; border:1px solid black; margin:10px 10px; padding-left: 5px; padding-right: 5px;">')+content+sql_escape('</div>')
vid_re = re.compile("\[vid=(.+?)\](.+?)\[\/vid\]")
def vid_repl(m):
url = bbcode_escape(m.group(1)).strip()
content = bbcode_escape(m.group(2)).strip()
content = content.split(":")
if len(content) >= 3:
return sql_escape('<video style="width:100%; max-width:'+content[0]+'px; height:auto;" height="'+content[1]+'" controls=""><source src="'+url+'" type="video/'+content[2]+'">Your browser does not support HTML5 or the video tag. Please upgrade to a more modern browser.</video>')
return m.group(0)
for line in f:
outline = line
LUACODE_find = outline.find("!LUACODE!",0)
while LUACODE_find != -1:
# Skip to the end of the marker
LUACODE_find += 9
# Replace the first occurance of ipsCode after !LUACODE!
head = outline[:LUACODE_find]
tail = outline[LUACODE_find:]
tail = tail.replace ( "ipsCode","prettyprint lang-lua ipsCode",1 )
outline = head + tail
LUACODE_find = outline.find("!LUACODE!",LUACODE_find)
outline = outline.replace ( "<p>!LUACODE!</p>","") #Most commonly appears like this
outline = outline.replace ( "!LUACODE!","") #Remove any remaining inline ones
# Simple, safe tag replacements:
outline = outline.replace("[table]",sql_escape('<table cellpadding="10" style="margin: 1em; table-layout:fixed; width: 100%;">') )
outline = outline.replace("[/table]",'</table>' )
outline = outline.replace("[td]",sql_escape('<td align="left" style="padding: .3em; border: 1px #ccc solid; background-color: #FFFFcc;">') )
outline = outline.replace("[/td]",'</td>' )
outline = outline.replace("[th]",sql_escape('<th style="color:white; padding: .3em; border: 1px ##575757; solid; background: #4F4F4F;">') )
outline = outline.replace("[/th]",'</th>' )
outline = outline.replace("[tr]",'<tr>' )
outline = outline.replace("[/tr]",'</tr>' )
# Already handled by IPB so commented out for now
#outline = outline.replace("[center]",'<center>' )
#outline = outline.replace("[/center]",'</center>' )
# Replace our unicode chars used to fix quote="" tags
outline = outline.replace(u'\u2997',u'[' )
outline = outline.replace(u'\u2998',u']' )
outline = outline.replace(u'\uff04',u'$' )
# Process complex tag replacements:
outline = yt_re.sub(yt_repl,outline)
outline = progress_re.sub(progress_repl,outline)
outline = wiki_re.sub(wiki_repl,outline)
outline = imageleft_re.sub(imageleft_repl,outline)
outline = divbox_re.sub(divbox_repl,outline)
outline = vid_re.sub(vid_repl,outline)
out = codecs.open('ips_posts_out.sql', 'a', 'utf-8')
out.write(outline)
out.close()
lineNo += 1
if (lineNo % 50) == 0:
print("Line " + str(lineNo) + " completed")
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment