This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
myself_common = [] | |
common_count_me = [] | |
for index, item in enumerate(most_common_me): | |
if index == 10: | |
break | |
myself_common.append(item[0]) | |
common_count_me.append(item[1]) | |
mylove_common = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
#most common words | |
most_common_me = Counter(myself).most_common() | |
most_common_her = Counter(mylove).most_common() | |
print("Unique words used:") | |
print("Unique words She used: ",len(Counter(mylove).most_common())) | |
print("Unique words I used: ",len(Counter(myself).most_common())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
mylove = [] | |
myself = [] | |
for i in range(len(df)): | |
if "My Wife, I LOVE YOU" in df.at[i,"Sender"]: | |
message = df.at[i,"Message"] | |
word_list = message.strip().lower().translate(str.maketrans('', '', string.punctuation)).split(" ") | |
for word in word_list: | |
if word not in (("a","and","the")): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The most number of messsages in a day | |
maximum = gk[gk["Message"] == max(gk["Message"])] | |
print(maximum) | |
#the list number of messages in a day | |
minimum = gk[gk["Message"] == min(gk["Message"])] | |
print(minimum) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Calculate trend values | |
mv_average = gk["Message"].rolling(window=50, center=True, min_periods=3).mean() | |
#plot | |
plt.figure(figsize=(12,10)) | |
plt.title("Number of Messages Per Day (With trend)") | |
plt.axis("on") | |
plt.ylim(bottom=0,top=600) | |
plt.plot(time,number_of_messages,label="Messages per day") | |
# plot vertical line for the time I travelled: Feb 2, 2020 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Count the number of messages per day by simply grouping by Date | |
gk = pd.DataFrame(df.groupby("Date").count()["Message"]) | |
# plot results | |
time = list(gk.index) | |
number_of_messages = list(gk["Message"]) | |
plt.figure(figsize=(12,10)) | |
plt.axis("on") | |
plt.ylim(bottom=0,top=600) | |
plt.plot(time,number_of_messages) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Morning Messaging | |
first = df.sort_values(by=["Date",'Time'],ascending=True).drop_duplicates(subset=['Date'])["Sender"].value_counts() | |
print(first) | |
plt.figure(figsize=(6,6)) | |
plt.title("Morning Messages Count") | |
plt.bar(first.index,first) | |
plt.show() | |
#Late Night Messaging | |
last = df.sort_values(by=['Time'],ascending=False).drop_duplicates(subset=['Date'])["Sender"].value_counts() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Count the occurences of the Sender | |
print("Total Messages: ", len(df)) | |
messages_count = pd.DataFrame(df["Sender"].value_counts()) | |
print(messages_count) | |
#plot | |
sender = list(messages_count.index) | |
data = messages_count.Sender | |
plt.figure(figsize=(6,6)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df.Date = pd.to_datetime(df.Date) | |
# Getting the active days from the data | |
print("Active days: ",len(pd.unique(df.Date))) | |
# First day and the last day from the data | |
start = df.Date.iloc[0].date() | |
print("First Day:", start) | |
end = df.Date.iloc[-1].date() | |
print("Last Day: ",end) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import datetime | |
from matplotlib import pyplot as plt | |
df = pd.read_csv("messages.csv") | |
df.head() |