Last active
July 20, 2017 11:38
-
-
Save tomires/8a40ec1fa738144f1562ef1ced410779 to your computer and use it in GitHub Desktop.
FB messages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import json | |
import os | |
import datetime | |
import csv | |
#import numpy as np | |
#import pandas as pd | |
#import matplotlib.pyplot as plt | |
from collections import Counter | |
# config | |
MIN_MESSAGES = 100 | |
IGNORE_GROUP_CONVERSATIONS = True | |
DEBUG = True | |
names = {} | |
with open('names.csv', 'rb') as names_csv: | |
names_file = csv.reader(names_csv, delimiter=',', quotechar='|') | |
for row in names_file: | |
names[row[0].replace('@facebook.com','')] = row[1] | |
json_data = open('messages.json') | |
threads = json.load(json_data)['threads'] | |
sender_pop = {} | |
messages = [] | |
unknown_ids = [] | |
for t in range(len(threads)): | |
#print threads[t]['participants'] | |
if IGNORE_GROUP_CONVERSATIONS and len(threads[t]['participants']) > 2: | |
continue | |
for m in range(len(threads[t]['messages'])): | |
#print threads[t]['messages'][m]['sender'].encode('utf-8') + " ! " + threads[t]['messages'][m]['date'] + " ! " + threads[t]['messages'][m]['message'].encode('utf-8') | |
sender = threads[t]['messages'][m]['sender'].encode('utf-8') | |
if '@' in sender: | |
try: | |
sender = names[sender.replace('@facebook.com','')] | |
except KeyError: | |
if sender not in unknown_ids: | |
unknown_ids.append(sender) | |
if sender in sender_pop: | |
sender_pop[sender] += 1 | |
else: | |
sender_pop[sender] = 1 | |
t = sorted(sender_pop.iteritems(), key=lambda x:-x[1])[:len(sender_pop)] | |
total_messages = 0 | |
current_user = '' | |
owner_ratio = 0 | |
for x in t: | |
total_messages += x[1] | |
if x[1] > MIN_MESSAGES: | |
print "{0}: {1}".format(*x) | |
if current_user == '': | |
current_user = x[0] | |
owner_messages = x[1] | |
print '--------------------------' | |
print 'TOTAL MESSAGES: ' + str(total_messages) | |
print 'TOTAL USERS: ' + str(len(t)) | |
print 'Considering ' + current_user + ' as your account name. Hope it\'s correct!' | |
print 'Judging by the stats above, your messages make up for ' + str(int(owner_messages * 100 / total_messages)) + '% of total count.' | |
for t in range(len(threads)): | |
for m in range(len(threads[t]['messages'])): | |
sender = threads[t]['messages'][m]['sender'].encode('utf-8') | |
if '@' in sender: | |
try: | |
sender = names[sender.replace('@facebook.com','')] | |
except KeyError: | |
pass | |
if sender == current_user: | |
messages.append(threads[t]['messages'][m]['message'].encode('utf-8')) | |
combo = ' '.join(messages) | |
word_average = int(len(combo) / owner_messages) | |
print 'You have written ' + str(len(combo)) + ' words across ' + str(owner_messages) + ' messages. Your average message contains ' + str(word_average) + ' words.' | |
if DEBUG: | |
print '> DEBUG MODE ON' | |
print '> PLEASE SUPPLY NAMES FOR THE FOLLOWING IDS:' | |
for id in unknown_ids: | |
print '> ' + id |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment