Created
May 22, 2019 11:43
-
-
Save willcritchlow/814d2d04e5a204b0736ab7c3c49939bd to your computer and use it in GitHub Desktop.
Run PageRank on a small example site to show the PR of an orphaned page -- one with no links
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Build a dummy small site with some arbitrary links" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import networkx as nx\n", | |
"site = nx.DiGraph()\n", | |
"\n", | |
"def add_one_level_down(name):\n", | |
" site.add_edges_from([('home',name)])\n", | |
" site.add_edges_from([(name,'home')])\n", | |
"\n", | |
"add_one_level_down(\"one\")\n", | |
"add_one_level_down(\"two\")\n", | |
"add_one_level_down(\"three\")\n", | |
"add_one_level_down(\"four\")\n", | |
"add_one_level_down(\"five\")\n", | |
"\n", | |
"\n", | |
"def interlink(pages):\n", | |
" for page in pages:\n", | |
" for page2 in pages:\n", | |
" if page != page2:\n", | |
" site.add_edges_from([(page,page2)])\n", | |
"\n", | |
"interlink([\"one\", \"two\", \"three\", \"four\", \"five\"])\n", | |
"\n", | |
"def add_sub_page(page, sub_page):\n", | |
" site.add_edges_from([(page,sub_page),(sub_page,'home')])\n", | |
"\n", | |
"add_sub_page(\"one\", \"one1\")\n", | |
"add_sub_page(\"two\", \"two1\")\n", | |
"add_sub_page(\"three\", \"three1\")\n", | |
"add_sub_page(\"four\", \"four1\")\n", | |
"add_sub_page(\"five\", \"five1\")\n", | |
"\n", | |
"add_one_level_down(\"terms\")\n", | |
"add_one_level_down(\"contact\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Add an orphaned page and run the PR algo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"site.add_edges_from([(\"orphan\",\"one\")])\n", | |
"orphan_pr = nx.pagerank(site)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Name: \n", | |
"Type: DiGraph\n", | |
"Number of nodes: 14\n", | |
"Number of edges: 45\n", | |
"Average in degree: 3.2143\n", | |
"Average out degree: 3.2143\n" | |
] | |
} | |
], | |
"source": [ | |
"print(nx.info(site))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Show that the PR of the orphaned page is what we'd expect it to be from our naive calculations" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.0107142857\n" | |
] | |
} | |
], | |
"source": [ | |
"print(round(orphan_pr[\"orphan\"], 10))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.0107142857\n" | |
] | |
} | |
], | |
"source": [ | |
"print(round(1/len(nx.nodes(site))*.15, 10))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment