Skip to content

Instantly share code, notes, and snippets.

@dray89
Last active June 20, 2022 06:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dray89/f6195ce9369257c79d89fd7bb9ed5287 to your computer and use it in GitHub Desktop.
Save dray89/f6195ce9369257c79d89fd7bb9ed5287 to your computer and use it in GitHub Desktop.
Social Graphs - Python
#pip install EIA-python
#pip install networkx
import numpy as np
import pandas as pd
import eia
import networkx as nx
import matplotlib.pyplot as plt
#Get API key from EIA website and pass into eia.API() method
apiKey = "5f54b3e66477e22ec068066b1de8026d"
api = eia.API(apiKey)
series_id_list = ["INTL.57-1-DZA-TBPD.M", "INTL.57-1-AGO-TBPD.M", "INTL.57-1-COG-TBPD.M", "INTL.57-1-COD-TBPD.M",
"INTL.57-1-ECU-TBPD.M","INTL.57-1-GNQ-TBPD.M","INTL.57-1-GAB-TBPD.M","INTL.57-1-IRN-TBPD.M",
"INTL.57-1-IRQ-TBPD.M", "INTL.57-1-KWT-TBPD.M", "INTL.57-1-LBY-TBPD.M", "INTL.57-1-NGA-TBPD.M",
"INTL.57-1-QAT-TBPD.M", "INTL.57-1-RUS-TBPD.M", "INTL.57-1-SAU-TBPD.M","INTL.57-1-ARE-TBPD.M",
"INTL.57-1-VEN-TBPD.M", "INTL.57-1-USA-TBPD.M"]
#call the method for each series within the api.data_by_series() method and plug into a pandas dataframe
df_list = [pd.DataFrame(api.data_by_series(series)) for series in series_id_list]
oil_data = pd.concat(df_list, axis=1)
#Drop NAN values
oil_data = oil_data.replace("--", np.nan)
oil_data_reduced = oil_data.dropna()
oil_data_reduced
#Rename Columns
oil_data_reduced.columns = ["Algeria", "Angola", "Congo-Brazzaville", "Congo-Kinshasa", "Ecuador", "Equatorial Guinea", "Gabon", "Iran",
"Iraq", "Kuwait", "Libya", "Nigeria", "Qatar", "Russia", "Saudi Arabia", "United Arab Emirates", "Venezuela", "USA"]
#Check out the highest oil producing countries by average volume
oil_data_reduced.mean(axis=0).sort_values(ascending=False)
#get correlations
corrs = oil_data_reduced.corr()
# Transform it in a links data frame (3 columns only):
links = corrs.stack().reset_index()
links
links.columns = ['country A', 'country B','value']
# Positive Correlations
# Keep only correlation over a threshold and remove self correlation (cor(A,A)=1)
positive_correlations =links.loc[ (links['value'] >= 0.5) & (links['country A'] != links['country B']) ]
positive_links = nx.from_pandas_edgelist(positive_correlations, 'country A', 'country B')
pos = nx.circular_layout(positive_links)
labels = list(pos.keys())
labels = dict(zip(labels, labels))
pos_higher = {}
for country, value in pos.items():
if(value[1]>0):
pos_higher[country] = (value[0], value[1]+0.06)
else:
pos_higher[country] = (value[0], value[1]-0.06)
pos_higher['Congo-Kinshasa'] = (pos_higher['Congo-Kinshasa'][0]+.06, pos_higher['Congo-Kinshasa'][1])
pos_higher['Algeria'] = (pos_higher['Algeria'][0]+.05, pos_higher['Algeria'][1]+.1)
pos_higher['Saudi Arabia'] = (pos_higher['Saudi Arabia'][0]-.04, pos_higher['Saudi Arabia'][1])
pos_higher['Qatar'] = (pos_higher['Qatar'][0]-.03, pos_higher['Qatar'][1])
pos_higher['Kuwait'] = (pos_higher['Kuwait'][0]-.03, pos_higher['Kuwait'][1])
pos_higher['Nigeria'] = (pos_higher['Nigeria'][0]+.01, pos_higher['Nigeria'][1])
pos_higher['Angola'] = (pos_higher['Angola'][0]+.01, pos_higher['Angola'][1])
fig, ax = plt.subplots(figsize=(20,20))
margin=.05
fig.subplots_adjust(margin, margin, 1.-margin, 1.-margin)
ax.axis('equal')
nx.draw(positive_links, pos=pos, edge_color='black')
nx.draw_networkx_labels(positive_links, pos_higher,labels)
plt.savefig("positive_correlation.jpg")
#Negative Correlations
negative_correlations =links.loc[ (links['value'] <= -0.5) & (links['country A'] != links['country B']) ]
negative_links = nx.from_pandas_edgelist(negative_correlations, 'country A', 'country B')
neg = nx.circular_layout(negative_links)
labels = list(neg.keys())
labels = dict(zip(labels, labels))
neg_higher = {}
for country, value in neg.items():
if(value[1]>0):
neg_higher[country] = (value[0], value[1]+0.06)
else:
neg_higher[country] = (value[0], value[1]-0.06)
neg_higher['Congo-Kinshasa'] = (neg_higher['Congo-Kinshasa'][0]+.05, neg_higher['Congo-Kinshasa'][1])
neg_higher['Algeria'] = (neg_higher['Algeria'][0]+.05, neg_higher['Algeria'][1]+.1)
fig, ax = plt.subplots(figsize=(20,20))
margin=.05
fig.subplots_adjust(margin, margin, 1.-margin, 1.-margin)
ax.axis('equal')
nx.draw(negative_links, pos=neg, edge_color='black')
nx.draw_networkx_labels(negative_links, neg_higher,labels)
plt.savefig("negative_correlation.jpg")
#Calculating Social Graph Measurements
#Positive Correlations: We can only calculate the shortest_path_length because a disconnected graph exists.
#Eccentricity and diameter would throw errors due to the disconnected graph.
list(nx.shortest_path_length(positive_links))
positive_correlation['country A'].value_counts()
#Negative Correlations
eccentricity = nx.eccentricity(negative_links)
ecc_df = pd.DataFrame.from_dict(eccentricity, orient='index', columns=['Eccentricity']).rename_axis('Country', axis='index')
ecc_df.sort_values(by='Eccentricity', ascending=False)
nx.diameter(negative_links, e=eccentricity)
list(nx.shortest_path_length(negative_links))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment