Last active
February 8, 2018 08:47
-
-
Save cjue25/da4aa5c5ccfd1f92838440d898598506 to your computer and use it in GitHub Desktop.
Using Python for Research_Case Study 6
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Basics of NetworkX | |
import networkx as nx | |
G=nx.Graph() | |
#增加節點 | |
G.add_node(1) | |
#一次增加多個節點 | |
G.add_nodes_from([2,3,'u','v']) | |
#print (G.nodes()) | |
#增加邊 | |
G.add_edge(1,2) | |
G.add_edge('u','v') | |
#一次增加多個邊 | |
G.add_edges_from([(1,3),(1,4),(1,5),(1,6)]) | |
G.add_edge("u","w") #會自動增加沒有輸入的節點 | |
#print (G.edges()) | |
#刪除 | |
G.remove_node(2) | |
G.remove_nodes_from([4,5]) | |
G.remove_edge(1,3) | |
G.remove_edges_from([(1,2),('u','v')]) | |
#算個數 | |
G.number_of_nodes() | |
G.number_of_edges() | |
##畫Graph | |
import matplotlib.pyplot as plt | |
G=nx.karate_club_graph() | |
nx.draw(G, with_labels=True, node_color="lightblue", edge_color="gray") | |
plt.savefig('karate_graph.png') | |
#為一個DegreeView,類似dic的形式,key為node、value為邊數 | |
G.degree() | |
#索引有兩種方式 | |
G.degree(33) | |
G.degree()[33] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import bernoulli | |
##erdos-renyi graph | |
#p是機率,介於0~1之間,輸出值為0或1,也就是說P越高,輸出1的機率越大 | |
#搭配bernoulli.rvs(p=p) 使用 | |
def er_graph(N,p): | |
G=nx.Graph() | |
G.add_nodes_from(range(N)) | |
for node1 in G.nodes(): | |
for node2 in G.nodes(): | |
if node1<node2 and bernoulli.rvs(p=p): #1就是True,並且不易重複畫線 | |
G.add_edge(node1,node2) | |
return G | |
nx.draw(er_graph(50,0.08),node_size=40, node_color="gray") | |
plt.savefig('er_graph1.png') | |
def plot_degree_distribution(G): | |
degree_sequence = [d for n, d in G.degree()] | |
plt.hist(degree_sequence, histtype="step") | |
plt.xlabel("Degree $k$") | |
plt.ylabel("$P(k)$") | |
plt.title("Degree distribution") | |
G1=er_graph(500,0.08) | |
plot_degree_distribution(G1) | |
G2=er_graph(500,0.08) | |
plot_degree_distribution(G2) | |
G3=er_graph(500,0.08) | |
plot_degree_distribution(G3) | |
plt.savefig('hist_3.png') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
A1=np.loadtxt("adj_allVillageRelationships_vilno_1.csv", delimiter=",") | |
A2=np.loadtxt("adj_allVillageRelationships_vilno_2.csv", delimiter=",") | |
G1=nx.to_networkx_graph(A1) | |
G2=nx.to_networkx_graph(A2) | |
def basic_net_stats(G): | |
print ("Number of nodes: %d" % G.number_of_nodes()) | |
print ("Number of edges: %d" % G.number_of_edges()) | |
degree_sequence = [d for n, d in G.degree()] | |
print("Average degree: %.2f" % np.mean(degree_sequence)) | |
basic_net_stats(G1) | |
basic_net_stats(G2) | |
plot_degree_distribution(G1) | |
plot_degree_distribution(G2) | |
plt.savefig("village_hist.png") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## 利用gen=nx.connected_component_subgraphs(G1) | |
## len(gen.__next__()) 可以找出每一個compenets的node數量 | |
## 利用max找到最大的component | |
G1_LCC=max(nx.connected_component_subgraphs(G1), key=len) | |
G2_LCC=max(nx.connected_component_subgraphs(G2), key=len) | |
fraction_G1 = G1_LCC.number_of_nodes() / G1.number_of_nodes() | |
fraction_G2 = G2_LCC.number_of_nodes() / G2.number_of_nodes() | |
plt.figure() | |
nx.draw(G1_LCC, node_color='red', edge_color='gray', node_size=20) | |
plt.savefig('village1.png') | |
plt.figure() | |
nx.draw(G2_LCC, node_color='green', edge_color='gray', node_size=20) | |
plt.savefig('village2.png') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
data_filepath ='https://s3.amazonaws.com/assets.datacamp.com/production/course_974/datasets/' | |
""" | |
from collections import Counter | |
def frequency(chars): | |
return Counter(favorite_colors.values()) | |
def chance_homophily(chars): | |
fre = frequency(chars) | |
total=0 | |
for i in fre.values(): | |
total+=(i/sum(fre.values()))**2 | |
return total | |
favorite_colors = { | |
"ankit": "red", | |
"xiaoyu": "blue", | |
"mary": "blue" | |
} | |
color_homophily=chance_homophily(favorite_colors) | |
print (color_homophily) | |
import pandas as pd | |
df = pd.read_stata(data_filepath + "individual_characteristics.dta") | |
df1 = df[df.village==1] | |
df2 = df[df.village==2] | |
df1.head() | |
sex1 = dict(zip(df1.pid.values, df1.resp_gend)) | |
caste1 = dict(zip(df1.pid.values, df1.caste)) | |
religion1 = dict(zip(df1.pid.values, df1.religion)) | |
# Continue for df2 as well. | |
sex2 = dict(zip(df2.pid.values, df2.resp_gend)) | |
caste2 = dict(zip(df2.pid.values, df2.caste)) | |
religion2 = dict(zip(df2.pid.values, df2.religion)) | |
###另一個做法### | |
sex1 = df1.set_index("pid")["resp_gend"].to_dict() | |
caste1 = df1.set_index("pid")["caste"].to_dict() | |
religion1 = df1.set_index("pid")["religion"].to_dict() | |
sex2 = df2.set_index("pid")["resp_gend"].to_dict() | |
caste2 = df2.set_index("pid")["caste"].to_dict() | |
religion2 = df2.set_index("pid")["religion"].to_dict() | |
print("Village 1 chance of same sex:", chance_homophily(sex1)) | |
print("Village 1 chance of same caste:", chance_homophily(caste1)) | |
print("Village 1 chance of same religion:", chance_homophily(religion1)) | |
print("Village 2 chance of same sex:", chance_homophily(sex2)) | |
print("Village 2 chance of same caste:", chance_homophily(caste2)) | |
print("Village 2 chance of same religion:", chance_homophily(religion2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def homophily(G, chars, IDs): | |
""" | |
Given a network G, a dict of characteristics chars for node IDs, | |
and dict of node IDs for each node in the network, | |
find the homophily of the network. | |
""" | |
num_same_ties = 0 | |
num_ties = 0 | |
for n1, n2 in G.edges(): | |
if IDs[n1] in chars and IDs[n2] in chars: | |
if G.has_edge(n1, n2): | |
num_ties+=1 | |
# Should `num_ties` be incremented? What about `num_same_ties`? | |
if chars[IDs[n1]] == chars[IDs[n2]]: | |
num_same_ties+=1 | |
# Should `num_ties` be incremented? What about `num_same_ties`? | |
return (num_same_ties / num_ties) | |
#原始資料沒有標題所以讀的時候要設立header=None | |
pid1=pd.read_csv(data_filepath +"key_vilno_1.csv",header=None) | |
pid2=pd.read_csv(data_filepath +"key_vilno_2.csv",header=None) | |
print("Village 1 observed proportion of same sex:", homophily(G1, sex1, pid1)) | |
print("Village 1 observed proportion of same caste:", homophily(G1, caste1, pid1)) | |
print("Village 1 observed proportion of same religion:", homophily(G1, religion1, pid1)) | |
print("Village 2 observed proportion of same sex:", homophily(G2, sex2, pid2)) | |
print("Village 2 observed proportion of same caste:", homophily(G2, caste2, pid2)) | |
print("Village 2 observed proportion of same religion:", homophily(G2, religion2, pid2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment