Last active
July 26, 2018 12:29
-
-
Save mikk-c/9284854b46553b3360c53582c349a0f1 to your computer and use it in GitHub Desktop.
Network Analysis Practice Session #01
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Importing the stuff we need\n", | |
"# NOTE: This notebook uses Networkx 2 on Python 3\n", | |
"import networkx as nx" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Edgelist format: one line per edge\n", | |
"# NodeID<delimiter>NodeID\n", | |
"\n", | |
"# https://homes.cs.washington.edu/~fire/datasets/TheMarkerAnonymized.zip\n", | |
"graph_uri = \"TheMarkerAnonymized.csv\"\n", | |
"# https://homes.cs.washington.edu/~fire/datasets/anybeatAnonymized.zip\n", | |
"dgraph_uri = \"anybeatAnonymized.csv\"\n", | |
"\n", | |
"# Networkx's default delimiter is the whitespace,\n", | |
"# in this case we need to change it\n", | |
"G = nx.read_edgelist(graph_uri, delimiter = \",\")\n", | |
"# Networkx's default graph is undirected (nx.Graph).\n", | |
"#In this case, we need to change it to a directed graph (nx.DiGraph)\n", | |
"dG = nx.read_edgelist(dgraph_uri, delimiter = \",\", create_using = nx.DiGraph()) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"69413 69413\n", | |
"1644849 1644849\n" | |
] | |
} | |
], | |
"source": [ | |
"# Helper function, returns the number of nodes...\n", | |
"num_nodes = G.number_of_nodes()\n", | |
"# ...but we can calculate the length of the node view.\n", | |
"num_nodes_2 = len(G.nodes)\n", | |
"\n", | |
"# Helper function, returns the number of edges...\n", | |
"num_edges = G.number_of_edges()\n", | |
"# ...but we can calculate the length of the edge view.\n", | |
"num_edges_2 = len(G.edges)\n", | |
"\n", | |
"# Checking that they return the same number...\n", | |
"print(num_nodes, num_nodes_2)\n", | |
"print(num_edges, num_edges_2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"# of selfloops: 6\n", | |
"Nodes with self loops:\n", | |
"61988\n", | |
"16566\n", | |
"31029\n", | |
"3524\n", | |
"23676\n", | |
"14421\n" | |
] | |
} | |
], | |
"source": [ | |
"# Helper function, returns the number of selfloops\n", | |
"print(\"# of selfloops: %d\" % G.number_of_selfloops())\n", | |
"\n", | |
"# Networkx can tell us which nodes have a selfloop,\n", | |
"# so we can easily list them\n", | |
"print(\"Nodes with self loops:\")\n", | |
"for n in G.nodes_with_selfloops():\n", | |
" print(n)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Cycle in G:\n", | |
"[('134', '30'), ('30', '51850'), ('51850', '302'), ('302', '113'), ('113', '48861'), ('48861', '134')]\n", | |
"G is a DAG: False\n", | |
"G is a tree: False\n", | |
"Cycle in dG:\n", | |
"[('1', '4152'), ('4152', '1')]\n", | |
"dG is a DAG: False\n", | |
"dG is a tree: False\n" | |
] | |
} | |
], | |
"source": [ | |
"# Does this network have a cycle?\n", | |
"# If it does, nx.find_cycle() will return a random one:\n", | |
"print(\"Cycle in G:\")\n", | |
"print(nx.find_cycle(G))\n", | |
"\n", | |
"# Note that, if there is no cycle in the graph,\n", | |
"# nx.find_cycle() will raise an exception\n", | |
"# But G isn't a DAG, as correctly reported by nx.is_directed_acyclic_graph(G)\n", | |
"print(\"G is a DAG: %s\" % nx.is_directed_acyclic_graph(G))\n", | |
"print(\"G is a tree: %s\" % nx.is_tree(G))\n", | |
"\n", | |
"print(\"Cycle in dG:\")\n", | |
"print(nx.find_cycle(dG))\n", | |
"print(\"dG is a DAG: %s\" % nx.is_directed_acyclic_graph(dG))\n", | |
"print(\"dG is a tree: %s\" % nx.is_tree(dG))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Degree of node 2 in G: 3\n", | |
"Degree of node 2 in dG: 3\n", | |
"In-degree of node 2 in dG: 2\n", | |
"Out-degree of node 2 in dG: 1\n" | |
] | |
} | |
], | |
"source": [ | |
"# If we want to know node 2's degree:\n", | |
"print(\"Degree of node 2 in G: %d\" % G.degree(\"2\"))\n", | |
"\n", | |
"# Works also for directed graphs:\n", | |
"print(\"Degree of node 2 in dG: %d\" % dG.degree(\"2\"))\n", | |
"# But here you should distinguish between in-degree and out-degree:\n", | |
"print(\"In-degree of node 2 in dG: %d\" % dG.in_degree(\"2\"))\n", | |
"print(\"Out-degree of node 2 in dG: %d\" % dG.out_degree(\"2\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"G's density: 0.07%\n", | |
"dG's density: 0.04%\n" | |
] | |
} | |
], | |
"source": [ | |
"# How dense are our graphs?\n", | |
"# Remember: density = number_of_edges / max_theoretical_number_of_edges\n", | |
"# Here I transform density into a percentage\n", | |
"print(\"G's density: %1.2f%%\" % (100 * nx.density(G)))\n", | |
"print(\"dG's density: %1.2f%%\" % (100 * nx.density(dG)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"dG's reciprocity: 53.45%\n", | |
"Node-centric reciprocities:\n", | |
"{'1': 1.0, '2': 0.6666666666666666, '3': 1.0}\n" | |
] | |
} | |
], | |
"source": [ | |
"# Reciprocity is the share of connections going both ways in a directed graph...\n", | |
"# ... also known as cycles of length 2\n", | |
"print(\"dG's reciprocity: %1.2f%%\" % (100 * nx.reciprocity(dG)))\n", | |
"\n", | |
"# We can also ask the reciprocities of a specific set of nodes\n", | |
"print(\"Node-centric reciprocities:\")\n", | |
"print(nx.reciprocity(dG, nodes = (\"1\", \"2\", \"3\")))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "AttributeError", | |
"evalue": "'Graph' object has no attribute 'predecessors'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-9-9c7504568477>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Does it make any sense to calculate the reciprocity in an undirected graph?\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# Not really...\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreciprocity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnodes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"1\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"2\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"3\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m<decorator-gen-376>\u001b[0m in \u001b[0;36mreciprocity\u001b[0;34m(G, nodes)\u001b[0m\n", | |
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/networkx/utils/decorators.py\u001b[0m in \u001b[0;36m_not_implemented_for\u001b[0;34m(not_implement_for_func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNetworkXNotImplemented\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnot_implement_for_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_not_implemented_for\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/networkx/algorithms/reciprocity.py\u001b[0m in \u001b[0;36mreciprocity\u001b[0;34m(G, nodes)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;31m# Otherwise, `nodes` represents an iterable of nodes, so return a\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;31m# dictionary mapping node to its reciprocity.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_reciprocity_iter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnodes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/networkx/algorithms/reciprocity.py\u001b[0m in \u001b[0;36m_reciprocity_iter\u001b[0;34m(G, nodes)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnbunch_iter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mnode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 71\u001b[0;31m \u001b[0mpred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredecessors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 72\u001b[0m \u001b[0msucc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msuccessors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0moverlap\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpred\u001b[0m \u001b[0;34m&\u001b[0m \u001b[0msucc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mAttributeError\u001b[0m: 'Graph' object has no attribute 'predecessors'" | |
] | |
} | |
], | |
"source": [ | |
"# Does it make any sense to calculate the reciprocity in an undirected graph?\n", | |
"# Not really...\n", | |
"nx.reciprocity(G, nodes = (\"1\", \"2\", \"3\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"G is a connected graph: False\n", | |
"dG is a strongly connected graph: False\n", | |
"dG is a weakly connected graph: True\n", | |
"# of G's connected components: 48\n", | |
"# of dG's strong components: 3956\n" | |
] | |
} | |
], | |
"source": [ | |
"# Are our networks connected?\n", | |
"print(\"G is a connected graph: %s\" % nx.is_connected(G))\n", | |
"# Note that for directed networks we have to specify\n", | |
"# if we mean strong or weak connectivity\n", | |
"print(\"dG is a strongly connected graph: %s\" % nx.is_strongly_connected(dG))\n", | |
"print(\"dG is a weakly connected graph: %s\" % nx.is_weakly_connected(dG))\n", | |
"\n", | |
"# Alright, not connected. So... how many components?\n", | |
"print(\"# of G's connected components: %d\" % nx.number_connected_components(G))\n", | |
"print(\"# of dG's strong components: %d\" % nx.number_strongly_connected_components(dG))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"C1's relative size (nodes): 99.86%\n", | |
"C1's relative size (edges): 100.00%\n", | |
"GC's relative size (nodes): 99.86%\n", | |
"GC's relative size (edges): 100.00%\n" | |
] | |
} | |
], | |
"source": [ | |
"# Can we access the components of the network?\n", | |
"# Yes, this will return a generator:\n", | |
"G_comps = nx.connected_components(G)\n", | |
"\n", | |
"# With a generator, we can pull out a component.\n", | |
"# This returns the set of node IDs in the first component...\n", | |
"G_comp_nodes = next(G_comps)\n", | |
"\n", | |
"# ... which we can use to induce the component's graph:\n", | |
"G_comp = G.subgraph(G_comp_nodes)\n", | |
"\n", | |
"# Now G_component_1_graph is a networkx graph and all functions seen so far apply to it.\n", | |
"# How big is G_component_1_graph relatively to G?\n", | |
"print(\"C1's relative size (nodes): %1.2f%%\" % (100 * len(G_comp.nodes) / len(G.nodes)))\n", | |
"print(\"C1's relative size (edges): %1.2f%%\" % (100 * len(G_comp.edges) / len(G.edges)))\n", | |
"\n", | |
"# C1 is G's giant component!\n", | |
"# Note, networkx won't necessarily return the giant component as the first one,\n", | |
"# so you have to do it yourself:\n", | |
"GC = G.subgraph(max(nx.connected_components(G), key = len))\n", | |
"print(\"GC's relative size (nodes): %1.2f%%\" % (100 * len(GC.nodes) / len(G.nodes)))\n", | |
"print(\"GC's relative size (edges): %1.2f%%\" % (100 * len(GC.edges) / len(G.edges)))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment