|
import pandas |
|
import time |
|
import random |
|
import networkx as nx |
|
import matplotlib.pyplot as plt |
|
|
|
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB |
|
from sklearn.preprocessing import MultiLabelBinarizer |
|
|
|
df = pandas.DataFrame({"member": [], "present": []}) |
|
df.index.name = "timestamp" |
|
user_ids = ["user{}".format(n) for n in range(10)] |
|
|
|
starttime = int(time.time()) |
|
# spoof data |
|
for _ in range(500): |
|
member = str(random.choice(user_ids)) |
|
others = [str(uid) for uid in user_ids] |
|
others.remove(member) |
|
present = random.sample(others, random.randint(1, len(others))) |
|
|
|
# bias data to make user0 and user1 friends |
|
if member == str(user_ids[0]) and str(user_ids[1]) not in present: |
|
if random.random() < .99: |
|
present.append(str(user_ids[1])) |
|
elif member == str(user_ids[1]) and str(user_ids[0]) not in present: |
|
if random.random() < .99: |
|
present.append(str(user_ids[0])) |
|
# user5 and user9 friends |
|
elif member == str(user_ids[5]) and str(user_ids[9]) not in present: |
|
if random.random() < .99: |
|
present.append(str(user_ids[9])) |
|
elif member == str(user_ids[9]) and str(user_ids[5]) not in present: |
|
if random.random() < .99: |
|
present.append(str(user_ids[5])) |
|
|
|
df = df.append(pandas.Series({"member": str(member), "present": present}, name=starttime)) |
|
starttime += random.randint(1, 1000) |
|
|
|
# Just to validate formatting |
|
# df.to_csv(r"C:\Users\SawyerPC\Pictures\Saved Pictures\Data.csv") |
|
|
|
# Use MLB to create binary vectors for each "present" list |
|
enc = MultiLabelBinarizer() |
|
print(pandas.DataFrame(enc.fit_transform(df["present"]), columns=enc.classes_, index=df.index)) |
|
|
|
""" |
|
# Try different classifiers |
|
classifiers = [GaussianNB(), MultinomialNB(), BernoulliNB()] |
|
for clf in classifiers: |
|
print(type(clf).__name__+"\n--------") |
|
|
|
|
|
# Given that only user0 member is in the room, who is most likely to join? (user1) |
|
print(clf.predict([[1,0,0,0,0,0,0,0,0,0]])) |
|
prob_map = [(enc.classes_[n], clf.predict_proba([[1,0,0,0,0,0,0,0,0,0]])[0][n]) for n in range(len(enc.classes_))] |
|
print(sorted(prob_map, key=lambda i: i[1], reverse=True)) |
|
print("") |
|
""" |
|
|
|
# GaussianNB is probably best |
|
clf = GaussianNB() |
|
clf.fit(enc.fit_transform(df["present"]), list(df["member"])) |
|
|
|
social_graph = nx.DiGraph() |
|
social_graph.add_nodes_from(user_ids) |
|
for u in user_ids: |
|
others = list(user_ids) |
|
others.remove(u) |
|
for o in others: |
|
vec = enc.transform([[o]]) |
|
prob_map = {enc.classes_[n]: clf.predict_proba(vec)[0][n] for n in range(len(enc.classes_))} |
|
social_graph.add_edge(u, o, weight=float(prob_map[u])) |
|
|
|
plt.subplot(121) |
|
# pos = nx.spring_layout(social_graph) |
|
nx.draw(social_graph, with_labels=True, arrows=False, font_weight='bold') |
|
# nx.draw_networkx_edge_labels(social_graph, pos) |
|
plt.show() |