Skip to content

Instantly share code, notes, and snippets.

@abhisheksoni27
Last active July 15, 2018 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abhisheksoni27/e7b79a7374c867c70d3733785babc79f to your computer and use it in GitHub Desktop.
Save abhisheksoni27/e7b79a7374c867c70d3733785babc79f to your computer and use it in GitHub Desktop.
import re
mediaPattern = r"(\<Media omitted\>)" # Because it serves no purpose
regexMedia = re.compile(mediaPattern, flags=re.M)
dateAndTimepattern = r"(\d+\/\d+\/\d+)(,)(\s)(\d+:\d+)(\s)(\w+)(\s)(-)(\s\w+)*(:)"
regexDate = re.compile(dateAndTimepattern, flags=re.M)
def cleanText(filename):
chat = open(filename)
chatText = chat.read()
chat.close()
# 01/09/17, 11:34 PM - Amfa:
"""
Removes the matches and
replace them with an empty string
"""
chatText = regexMedia.sub("", chatText)
chatText = regexDate.sub("", chatText)
lines = []
for line in chatText.splitlines():
if line.strip() is not "": # If it's empty, we don't need it
lines.append(line.strip())
return lines
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment