Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created June 6, 2021 12:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amankharwal/3d19b1fddb94c43f4ebeb1c9232f5721 to your computer and use it in GitHub Desktop.
Save amankharwal/3d19b1fddb94c43f4ebeb1c9232f5721 to your computer and use it in GitHub Desktop.
import re
import pandas as pd
import numpy as np
import emoji
from collections import Counter
import matplotlib.pyplot as plt
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
# Extract Time
def date_time(s):
pattern = '^([0-9]+)(\/)([0-9]+)(\/)([0-9]+), ([0-9]+):([0-9]+)[ ]?(AM|PM|am|pm)? -'
result = re.match(pattern, s)
if result:
return True
return False
# Find Authors or Contacts
def find_author(s):
s = s.split(":")
if len(s)==2:
return True
else:
return False
# Finding Messages
def getDatapoint(line):
splitline = line.split(' - ')
dateTime = splitline[0]
date, time = dateTime.split(", ")
message = " ".join(splitline[1:])
if find_author(message):
splitmessage = message.split(": ")
author = splitmessage[0]
message = " ".join(splitmessage[1:])
else:
author= None
return date, time, author, message
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment