Last active
August 29, 2016 15:37
-
-
Save darkdreamingdan/d9ec9e6ad38dcac661925842e3bea96f to your computer and use it in GitHub Desktop.
IPB Post Process: Find and replace for MTA forum migration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import codecs | |
import re | |
f = codecs.open('ips_posts.sql', 'r', 'utf-8') | |
lineNo = 0 | |
_ESCAPE_RE = re.compile('[&<>"]') | |
_ESCAPE_DICT = {'&': '&', '<': '<', '>': '>', '"': '"'} | |
def bbcode_escape(value): | |
return _ESCAPE_RE.sub(lambda match: _ESCAPE_DICT[match.group(0)], value) | |
def sql_escape(str): | |
return str.replace("\\","\\\\").replace('"','\\"').replace("'","\\'") | |
def sql_unescape(str): | |
return str.replace("\\'","'").replace('\\"','"').replace("\\\\","\\") | |
yt_re = re.compile("\[you_tube\](.+?)\[/you_tube\]") | |
def yt_repl(m): | |
content = bbcode_escape(m.group(1)).strip() | |
if ( m.group(1).find("http") != -1 ): #Maybe they put the full url incorrectly | |
return m.group(1) | |
else: | |
return sql_escape('<div style="max-width:640px;margin:0 auto; padding:5px;"><div style="position: relative;padding-bottom: 56.25%; height: 0; overflow: hidden;"><iframe width="640" height="385" frameborder="0" allowfullscreen="" src="http://www.youtube.com/embed/'+content+'?showinfo=0" style="position: absolute; top: 0px; left: 0px; width: 100%; height: 100%; max-width: 640px; max-height: 385px;"></iframe></div></div>') | |
progress_re = re.compile("\[progress\](.+?)\[\/progress\]") | |
def progress_repl(m): | |
content = bbcode_escape(m.group(1)).strip() | |
return sql_escape('<progress max="100" value="'+content+'"></progress>') | |
wiki_re = re.compile("\[wiki\](.+?)\[\/wiki\]") | |
def wiki_repl(m): | |
content = bbcode_escape(m.group(1)).strip() | |
return sql_escape('<a class="postlink" href="http://wiki.multitheftauto.com/wiki/'+content+'">'+content+'</a>') | |
imageleft_re = re.compile("\[imageleft\](.+?)\[\/imageleft\]") | |
url_re = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') | |
def imageleft_repl(m): | |
#IPB may have already formated this into an HTML image or HTML url, so let's grab the raw URL | |
content = sql_unescape(m.group(1)) #Convert from SQL string to HTML for searching | |
mURL = url_re.search(content) | |
if mURL: | |
return sql_escape('<img src="'+mURL.group(0)+'" style="float:left;padding-right: 5px;" />' ) | |
return m.group(1) | |
divbox_re = re.compile("\[divbox=(.+?)\](.+?)\[\/divbox\]") | |
def divbox_repl(m): | |
color = bbcode_escape(m.group(1)).strip() | |
content = m.group(2) | |
# Don't escape the content - we want to preserve IPB's SQL HTML here | |
return sql_escape('<div style="background-color:'+color+'; border:1px solid black; margin:10px 10px; padding-left: 5px; padding-right: 5px;">')+content+sql_escape('</div>') | |
vid_re = re.compile("\[vid=(.+?)\](.+?)\[\/vid\]") | |
def vid_repl(m): | |
url = bbcode_escape(m.group(1)).strip() | |
content = bbcode_escape(m.group(2)).strip() | |
content = content.split(":") | |
if len(content) >= 3: | |
return sql_escape('<video style="width:100%; max-width:'+content[0]+'px; height:auto;" height="'+content[1]+'" controls=""><source src="'+url+'" type="video/'+content[2]+'">Your browser does not support HTML5 or the video tag. Please upgrade to a more modern browser.</video>') | |
return m.group(0) | |
for line in f: | |
outline = line | |
LUACODE_find = outline.find("!LUACODE!",0) | |
while LUACODE_find != -1: | |
# Skip to the end of the marker | |
LUACODE_find += 9 | |
# Replace the first occurance of ipsCode after !LUACODE! | |
head = outline[:LUACODE_find] | |
tail = outline[LUACODE_find:] | |
tail = tail.replace ( "ipsCode","prettyprint lang-lua ipsCode",1 ) | |
outline = head + tail | |
LUACODE_find = outline.find("!LUACODE!",LUACODE_find) | |
outline = outline.replace ( "<p>!LUACODE!</p>","") #Most commonly appears like this | |
outline = outline.replace ( "!LUACODE!","") #Remove any remaining inline ones | |
# Simple, safe tag replacements: | |
outline = outline.replace("[table]",sql_escape('<table cellpadding="10" style="margin: 1em; table-layout:fixed; width: 100%;">') ) | |
outline = outline.replace("[/table]",'</table>' ) | |
outline = outline.replace("[td]",sql_escape('<td align="left" style="padding: .3em; border: 1px #ccc solid; background-color: #FFFFcc;">') ) | |
outline = outline.replace("[/td]",'</td>' ) | |
outline = outline.replace("[th]",sql_escape('<th style="color:white; padding: .3em; border: 1px ##575757; solid; background: #4F4F4F;">') ) | |
outline = outline.replace("[/th]",'</th>' ) | |
outline = outline.replace("[tr]",'<tr>' ) | |
outline = outline.replace("[/tr]",'</tr>' ) | |
# Already handled by IPB so commented out for now | |
#outline = outline.replace("[center]",'<center>' ) | |
#outline = outline.replace("[/center]",'</center>' ) | |
# Replace our unicode chars used to fix quote="" tags | |
outline = outline.replace(u'\u2997',u'[' ) | |
outline = outline.replace(u'\u2998',u']' ) | |
outline = outline.replace(u'\uff04',u'$' ) | |
# Process complex tag replacements: | |
outline = yt_re.sub(yt_repl,outline) | |
outline = progress_re.sub(progress_repl,outline) | |
outline = wiki_re.sub(wiki_repl,outline) | |
outline = imageleft_re.sub(imageleft_repl,outline) | |
outline = divbox_re.sub(divbox_repl,outline) | |
outline = vid_re.sub(vid_repl,outline) | |
out = codecs.open('ips_posts_out.sql', 'a', 'utf-8') | |
out.write(outline) | |
out.close() | |
lineNo += 1 | |
if (lineNo % 50) == 0: | |
print("Line " + str(lineNo) + " completed") | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment