Skip to content

Instantly share code, notes, and snippets.

@rht
Created February 15, 2013 03:22
Show Gist options
  • Save rht/4958340 to your computer and use it in GitHub Desktop.
Save rht/4958340 to your computer and use it in GitHub Desktop.
import json
import re
petitions = json.load(open('aaronsw.json'))
#ntotal = 761
#nempty = 384
#wpcomment = 5
#nyan = 7 # by yan
sanitizedpetitions = []
msglength = []
for p in petitions:
#print p[1][1] # flags seen
#print p[1][0][0] # rfc822.text
message = p[1][0][1] # rfc822.text
if ("gh4kyiet@expressemail.org" not in message) and ("yan@mit.edu" not in message) and ("zyan/www" not in message) and ("A new pingback on the post" not in message): #filter out private comments and spams
#filtering out empty comment
emptycomment = "\\[Optional: Insert a personal message to the MIT administration here.\\].*\n.*\n--"
if not re.search(emptycomment, message):
#wp comments
if "A new comment" in message:
email = re.search('E-mail : .*\n',
message).group(0).replace('E-mail : ','').strip()
author = re.search('Author : .*\(',
message).group(0).replace('(', '').replace('Author : ', '').strip()
comment = re.search('Comment: (.*\n)*$', message).group()
#cleaning out the comment
comment = re.sub('(Approve it:.*\n|Trash it:.*\n|Spam it:.*\n|Currently.*comments are waiting.*\n|http://open.scripts.mit.edu/blog/wp-admin/edit-comments.php.*\n|Comment:.*\n)','',comment).strip()
sanitizedpetitions.append("**%s(%s)**\n%s\n" %(author,email,comment))
msglength.append(len(comment))
else:
filteredmessage = re.sub(".*Insert a personal message to the MIT administration here.*\n",'',message)
comment = re.search('(.*\n)*--', filteredmessage)
if comment:
#print comment.group()
sender = re.search('--.*\n(.*\n)*', filteredmessage).group()
sender = re.sub('--.*\n','',sender).strip().split('\n')
sender.pop(1)
sender = [i.strip() for i in sender]
sender = ', '.join(sender)
comment = comment.group().replace('--','').strip()
sanitizedpetitions.append("**%s**\n%s\n" %(sender, comment))
msglength.append(len(comment))
else:
#anonymous
sanitizedpetitions.append("**Anonymous**\n%s\n" %filteredmessage)
if comment:
msglength.append(len(comment))
else:
msglength.append(0)
else:
filteredmessage = re.sub(".*Insert a personal message to the MIT administration here.*\n",'',message).replace('--','').strip()
sender = filteredmessage.split('\n')
sender.pop(1)
sender = [i.strip() for i in sender]
sender = ', '.join(sender)
sanitizedpetitions.append("**%s**\n" %sender)
msglength.append(0)
else:
msglength.append(0)
for i in sanitizedpetitions: print i.encode('utf-8')
#maxlength = max(msglength)
#maxindex = [i for i, j in enumerate(msglength) if j == maxlength][0]
#print maxindex
#print maxlength
#print petitions[maxindex][1][0][1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment