Skip to content

Instantly share code, notes, and snippets.

@mikk-c
Last active July 26, 2018 12:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikk-c/9284854b46553b3360c53582c349a0f1 to your computer and use it in GitHub Desktop.
Save mikk-c/9284854b46553b3360c53582c349a0f1 to your computer and use it in GitHub Desktop.
Network Analysis Practice Session #01
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Importing the stuff we need\n",
"# NOTE: This notebook uses Networkx 2 on Python 3\n",
"import networkx as nx"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Edgelist format: one line per edge\n",
"# NodeID<delimiter>NodeID\n",
"\n",
"# https://homes.cs.washington.edu/~fire/datasets/TheMarkerAnonymized.zip\n",
"graph_uri = \"TheMarkerAnonymized.csv\"\n",
"# https://homes.cs.washington.edu/~fire/datasets/anybeatAnonymized.zip\n",
"dgraph_uri = \"anybeatAnonymized.csv\"\n",
"\n",
"# Networkx's default delimiter is the whitespace,\n",
"# in this case we need to change it\n",
"G = nx.read_edgelist(graph_uri, delimiter = \",\")\n",
"# Networkx's default graph is undirected (nx.Graph).\n",
"#In this case, we need to change it to a directed graph (nx.DiGraph)\n",
"dG = nx.read_edgelist(dgraph_uri, delimiter = \",\", create_using = nx.DiGraph()) "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"69413 69413\n",
"1644849 1644849\n"
]
}
],
"source": [
"# Helper function, returns the number of nodes...\n",
"num_nodes = G.number_of_nodes()\n",
"# ...but we can calculate the length of the node view.\n",
"num_nodes_2 = len(G.nodes)\n",
"\n",
"# Helper function, returns the number of edges...\n",
"num_edges = G.number_of_edges()\n",
"# ...but we can calculate the length of the edge view.\n",
"num_edges_2 = len(G.edges)\n",
"\n",
"# Checking that they return the same number...\n",
"print(num_nodes, num_nodes_2)\n",
"print(num_edges, num_edges_2)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"# of selfloops: 6\n",
"Nodes with self loops:\n",
"61988\n",
"16566\n",
"31029\n",
"3524\n",
"23676\n",
"14421\n"
]
}
],
"source": [
"# Helper function, returns the number of selfloops\n",
"print(\"# of selfloops: %d\" % G.number_of_selfloops())\n",
"\n",
"# Networkx can tell us which nodes have a selfloop,\n",
"# so we can easily list them\n",
"print(\"Nodes with self loops:\")\n",
"for n in G.nodes_with_selfloops():\n",
" print(n)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cycle in G:\n",
"[('134', '30'), ('30', '51850'), ('51850', '302'), ('302', '113'), ('113', '48861'), ('48861', '134')]\n",
"G is a DAG: False\n",
"G is a tree: False\n",
"Cycle in dG:\n",
"[('1', '4152'), ('4152', '1')]\n",
"dG is a DAG: False\n",
"dG is a tree: False\n"
]
}
],
"source": [
"# Does this network have a cycle?\n",
"# If it does, nx.find_cycle() will return a random one:\n",
"print(\"Cycle in G:\")\n",
"print(nx.find_cycle(G))\n",
"\n",
"# Note that, if there is no cycle in the graph,\n",
"# nx.find_cycle() will raise an exception\n",
"# But G isn't a DAG, as correctly reported by nx.is_directed_acyclic_graph(G)\n",
"print(\"G is a DAG: %s\" % nx.is_directed_acyclic_graph(G))\n",
"print(\"G is a tree: %s\" % nx.is_tree(G))\n",
"\n",
"print(\"Cycle in dG:\")\n",
"print(nx.find_cycle(dG))\n",
"print(\"dG is a DAG: %s\" % nx.is_directed_acyclic_graph(dG))\n",
"print(\"dG is a tree: %s\" % nx.is_tree(dG))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Degree of node 2 in G: 3\n",
"Degree of node 2 in dG: 3\n",
"In-degree of node 2 in dG: 2\n",
"Out-degree of node 2 in dG: 1\n"
]
}
],
"source": [
"# If we want to know node 2's degree:\n",
"print(\"Degree of node 2 in G: %d\" % G.degree(\"2\"))\n",
"\n",
"# Works also for directed graphs:\n",
"print(\"Degree of node 2 in dG: %d\" % dG.degree(\"2\"))\n",
"# But here you should distinguish between in-degree and out-degree:\n",
"print(\"In-degree of node 2 in dG: %d\" % dG.in_degree(\"2\"))\n",
"print(\"Out-degree of node 2 in dG: %d\" % dG.out_degree(\"2\"))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"G's density: 0.07%\n",
"dG's density: 0.04%\n"
]
}
],
"source": [
"# How dense are our graphs?\n",
"# Remember: density = number_of_edges / max_theoretical_number_of_edges\n",
"# Here I transform density into a percentage\n",
"print(\"G's density: %1.2f%%\" % (100 * nx.density(G)))\n",
"print(\"dG's density: %1.2f%%\" % (100 * nx.density(dG)))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dG's reciprocity: 53.45%\n",
"Node-centric reciprocities:\n",
"{'1': 1.0, '2': 0.6666666666666666, '3': 1.0}\n"
]
}
],
"source": [
"# Reciprocity is the share of connections going both ways in a directed graph...\n",
"# ... also known as cycles of length 2\n",
"print(\"dG's reciprocity: %1.2f%%\" % (100 * nx.reciprocity(dG)))\n",
"\n",
"# We can also ask the reciprocities of a specific set of nodes\n",
"print(\"Node-centric reciprocities:\")\n",
"print(nx.reciprocity(dG, nodes = (\"1\", \"2\", \"3\")))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'Graph' object has no attribute 'predecessors'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-9-9c7504568477>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Does it make any sense to calculate the reciprocity in an undirected graph?\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# Not really...\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreciprocity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnodes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"1\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"2\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"3\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<decorator-gen-376>\u001b[0m in \u001b[0;36mreciprocity\u001b[0;34m(G, nodes)\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/networkx/utils/decorators.py\u001b[0m in \u001b[0;36m_not_implemented_for\u001b[0;34m(not_implement_for_func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNetworkXNotImplemented\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnot_implement_for_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_not_implemented_for\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/networkx/algorithms/reciprocity.py\u001b[0m in \u001b[0;36mreciprocity\u001b[0;34m(G, nodes)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;31m# Otherwise, `nodes` represents an iterable of nodes, so return a\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;31m# dictionary mapping node to its reciprocity.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_reciprocity_iter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnodes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/networkx/algorithms/reciprocity.py\u001b[0m in \u001b[0;36m_reciprocity_iter\u001b[0;34m(G, nodes)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnbunch_iter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mnode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 71\u001b[0;31m \u001b[0mpred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredecessors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 72\u001b[0m \u001b[0msucc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msuccessors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0moverlap\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpred\u001b[0m \u001b[0;34m&\u001b[0m \u001b[0msucc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'Graph' object has no attribute 'predecessors'"
]
}
],
"source": [
"# Does it make any sense to calculate the reciprocity in an undirected graph?\n",
"# Not really...\n",
"nx.reciprocity(G, nodes = (\"1\", \"2\", \"3\"))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"G is a connected graph: False\n",
"dG is a strongly connected graph: False\n",
"dG is a weakly connected graph: True\n",
"# of G's connected components: 48\n",
"# of dG's strong components: 3956\n"
]
}
],
"source": [
"# Are our networks connected?\n",
"print(\"G is a connected graph: %s\" % nx.is_connected(G))\n",
"# Note that for directed networks we have to specify\n",
"# if we mean strong or weak connectivity\n",
"print(\"dG is a strongly connected graph: %s\" % nx.is_strongly_connected(dG))\n",
"print(\"dG is a weakly connected graph: %s\" % nx.is_weakly_connected(dG))\n",
"\n",
"# Alright, not connected. So... how many components?\n",
"print(\"# of G's connected components: %d\" % nx.number_connected_components(G))\n",
"print(\"# of dG's strong components: %d\" % nx.number_strongly_connected_components(dG))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"C1's relative size (nodes): 99.86%\n",
"C1's relative size (edges): 100.00%\n",
"GC's relative size (nodes): 99.86%\n",
"GC's relative size (edges): 100.00%\n"
]
}
],
"source": [
"# Can we access the components of the network?\n",
"# Yes, this will return a generator:\n",
"G_comps = nx.connected_components(G)\n",
"\n",
"# With a generator, we can pull out a component.\n",
"# This returns the set of node IDs in the first component...\n",
"G_comp_nodes = next(G_comps)\n",
"\n",
"# ... which we can use to induce the component's graph:\n",
"G_comp = G.subgraph(G_comp_nodes)\n",
"\n",
"# Now G_component_1_graph is a networkx graph and all functions seen so far apply to it.\n",
"# How big is G_component_1_graph relatively to G?\n",
"print(\"C1's relative size (nodes): %1.2f%%\" % (100 * len(G_comp.nodes) / len(G.nodes)))\n",
"print(\"C1's relative size (edges): %1.2f%%\" % (100 * len(G_comp.edges) / len(G.edges)))\n",
"\n",
"# C1 is G's giant component!\n",
"# Note, networkx won't necessarily return the giant component as the first one,\n",
"# so you have to do it yourself:\n",
"GC = G.subgraph(max(nx.connected_components(G), key = len))\n",
"print(\"GC's relative size (nodes): %1.2f%%\" % (100 * len(GC.nodes) / len(G.nodes)))\n",
"print(\"GC's relative size (edges): %1.2f%%\" % (100 * len(GC.edges) / len(G.edges)))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment