samclane/SocialAnalysisTest.py

## SocialAnalysisTest.py
import pandas
import time
import random
import networkx as nx
import matplotlib.pyplot as plt

from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.preprocessing import MultiLabelBinarizer

df = pandas.DataFrame({"member": [], "present": []})
df.index.name = "timestamp"
user_ids = ["user{}".format(n) for n in range(10)]

starttime = int(time.time())
# spoof data
for _ in range(500):
    member = str(random.choice(user_ids))
    others = [str(uid) for uid in user_ids]
    others.remove(member)
    present = random.sample(others, random.randint(1, len(others)))

    # bias data to make user0 and user1 friends
    if member == str(user_ids[0]) and str(user_ids[1]) not in present:
        if random.random() < .99:
            present.append(str(user_ids[1]))
    elif member == str(user_ids[1]) and str(user_ids[0]) not in present:
        if random.random() < .99:
            present.append(str(user_ids[0]))
    # user5 and user9 friends
    elif member == str(user_ids[5]) and str(user_ids[9]) not in present:
        if random.random() < .99:
            present.append(str(user_ids[9]))
    elif member == str(user_ids[9]) and str(user_ids[5]) not in present:
        if random.random() < .99:
            present.append(str(user_ids[5]))

    df = df.append(pandas.Series({"member": str(member), "present": present}, name=starttime))
    starttime += random.randint(1, 1000)

# Just to validate formatting
# df.to_csv(r"C:\Users\SawyerPC\Pictures\Saved Pictures\Data.csv")

# Use MLB to create binary vectors for each "present" list
enc = MultiLabelBinarizer()
print(pandas.DataFrame(enc.fit_transform(df["present"]), columns=enc.classes_, index=df.index))

"""
# Try different classifiers
classifiers = [GaussianNB(), MultinomialNB(), BernoulliNB()]
for clf in classifiers:
    print(type(clf).__name__+"\n--------")


    # Given that only user0 member is in the room, who is most likely to join? (user1)
    print(clf.predict([[1,0,0,0,0,0,0,0,0,0]]))
    prob_map = [(enc.classes_[n], clf.predict_proba([[1,0,0,0,0,0,0,0,0,0]])[0][n]) for n in range(len(enc.classes_))]
    print(sorted(prob_map, key=lambda i: i[1], reverse=True))
    print("")
"""

# GaussianNB is probably best
clf = GaussianNB()
clf.fit(enc.fit_transform(df["present"]), list(df["member"]))

social_graph = nx.DiGraph()
social_graph.add_nodes_from(user_ids)
for u in user_ids:
    others = list(user_ids)
    others.remove(u)
    for o in others:
        vec = enc.transform([[o]])
        prob_map = {enc.classes_[n]: clf.predict_proba(vec)[0][n] for n in range(len(enc.classes_))}
        social_graph.add_edge(u, o, weight=float(prob_map[u]))

plt.subplot(121)
# pos = nx.spring_layout(social_graph)
nx.draw(social_graph, with_labels=True, arrows=False, font_weight='bold')
# nx.draw_networkx_edge_labels(social_graph, pos)
plt.show()
	import pandas
	import time
	import random
	import networkx as nx
	import matplotlib.pyplot as plt

	from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
	from sklearn.preprocessing import MultiLabelBinarizer

	df = pandas.DataFrame({"member": [], "present": []})
	df.index.name = "timestamp"
	user_ids = ["user{}".format(n) for n in range(10)]

	starttime = int(time.time())
	# spoof data
	for _ in range(500):
	member = str(random.choice(user_ids))
	others = [str(uid) for uid in user_ids]
	others.remove(member)
	present = random.sample(others, random.randint(1, len(others)))

	# bias data to make user0 and user1 friends
	if member == str(user_ids[0]) and str(user_ids[1]) not in present:
	if random.random() < .99:
	present.append(str(user_ids[1]))
	elif member == str(user_ids[1]) and str(user_ids[0]) not in present:
	if random.random() < .99:
	present.append(str(user_ids[0]))
	# user5 and user9 friends
	elif member == str(user_ids[5]) and str(user_ids[9]) not in present:
	if random.random() < .99:
	present.append(str(user_ids[9]))
	elif member == str(user_ids[9]) and str(user_ids[5]) not in present:
	if random.random() < .99:
	present.append(str(user_ids[5]))

	df = df.append(pandas.Series({"member": str(member), "present": present}, name=starttime))
	starttime += random.randint(1, 1000)

	# Just to validate formatting
	# df.to_csv(r"C:\Users\SawyerPC\Pictures\Saved Pictures\Data.csv")

	# Use MLB to create binary vectors for each "present" list
	enc = MultiLabelBinarizer()
	print(pandas.DataFrame(enc.fit_transform(df["present"]), columns=enc.classes_, index=df.index))

	"""
	# Try different classifiers
	classifiers = [GaussianNB(), MultinomialNB(), BernoulliNB()]
	for clf in classifiers:
	print(type(clf).__name__+"\n--------")


	# Given that only user0 member is in the room, who is most likely to join? (user1)
	print(clf.predict([[1,0,0,0,0,0,0,0,0,0]]))
	prob_map = [(enc.classes_[n], clf.predict_proba([[1,0,0,0,0,0,0,0,0,0]])[0][n]) for n in range(len(enc.classes_))]
	print(sorted(prob_map, key=lambda i: i[1], reverse=True))
	print("")
	"""

	# GaussianNB is probably best
	clf = GaussianNB()
	clf.fit(enc.fit_transform(df["present"]), list(df["member"]))

	social_graph = nx.DiGraph()
	social_graph.add_nodes_from(user_ids)
	for u in user_ids:
	others = list(user_ids)
	others.remove(u)
	for o in others:
	vec = enc.transform([[o]])
	prob_map = {enc.classes_[n]: clf.predict_proba(vec)[0][n] for n in range(len(enc.classes_))}
	social_graph.add_edge(u, o, weight=float(prob_map[u]))

	plt.subplot(121)
	# pos = nx.spring_layout(social_graph)
	nx.draw(social_graph, with_labels=True, arrows=False, font_weight='bold')
	# nx.draw_networkx_edge_labels(social_graph, pos)
	plt.show()