Created
September 15, 2014 05:46
-
-
Save keiono/6ab079851873c0639c04 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:8b0845806cde3e665feef3e99c35faf9ce3f34a71a35ad9774e06ad0958c5a31" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## GitHub API\u3092\u4f7f\u3063\u3066Data Visualization\u306e\u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u3092\u89b3\u5bdf\u3059\u308b\n", | |
"\n", | |
"GitHub\u3067\u30bd\u30fc\u30b9\u30b3\u30fc\u30c9\u304c\u30db\u30b9\u30c8\u3055\u308c\u3066\u3044\u308b\u3001\u8457\u540d\u306a\u53ef\u8996\u5316\u95a2\u9023\u30c4\u30fc\u30eb\u306e\u30b3\u30df\u30c3\u30bf\u30fc\u304b\u3089\u8fbf\u308b\u958b\u767a\u8005\u306e\u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u3092\u53ef\u8996\u5316\u3002\u305d\u3053\u304b\u3089\u30d5\u30a9\u30ed\u30fc\u3059\u3079\u304d\u4eba\u3084\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u304c\u898b\u3048\u308b\u3060\u308d\u3046\u304b\uff1f\n", | |
"\n", | |
"\u8a73\u7d30\u306f\u3053\u3061\u3089\u306e[\u8a18\u4e8b]()\u3067\u3002" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from github import Github\n", | |
"import networkx as nx\n", | |
"\n", | |
"# Replace this to your own GitHub API key\n", | |
"TOKEN = ''\n", | |
"\n", | |
"######## \u5b9a\u6570\u306e\u5b9a\u7fa9 ##########\n", | |
"# Edge Types (interaction)\n", | |
"COMMIT_TO = 'commit_to'\n", | |
"OWNER_OF = 'owner_of'\n", | |
"\n", | |
"# Node Types\n", | |
"USER = 'user'\n", | |
"REPO = 'repo'\n", | |
"SEED = 'seed'\n", | |
"SEED_COMMITTER = 'seed_committer'\n", | |
"\n", | |
"# \u30ec\u30dd\u30b8\u30c8\u30ea\u3092\u8ffd\u52a0\u3059\u308b\u304b\u3069\u3046\u304b\u5224\u65ad\u3059\u308b\u305f\u3081\u306e\u95be\u5024\n", | |
"STAR_TH = 100\n", | |
"\n", | |
"# \u30b0\u30e9\u30d5\u306eSeed\u3068\u306a\u308b\u306e\u30ec\u30dd\u30b8\u30c8\u30ea\u60c5\u5831\n", | |
"SEEDS = [\n", | |
" 'mbostock/d3',\n", | |
" 'nnnick/Chart.js',\n", | |
" 'trifacta/vega',\n", | |
" 'misoproject/d3.chart',\n", | |
" 'novus/nvd3',\n", | |
" 'simplegeo/polymaps',\n", | |
" 'lmccart/p5.js'\n", | |
"# 'ContinuumIO/bokeh',\n", | |
"# 'matplotlib/matplotlib',\n", | |
"# 'DmitryBaranovskiy/raphael/',\n", | |
"# 'processing/processing'\n", | |
"]\n", | |
"\n", | |
"client = Github(TOKEN, per_page=100)\n", | |
"\n", | |
"# \u6700\u7d42\u7684\u306b\u53ef\u8996\u5316\u3059\u308b\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u30c7\u30fc\u30bf\n", | |
"g = nx.MultiDiGraph(name='Github Universe')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u5404\u7a2e\u30d8\u30eb\u30d1\u30fc\u95a2\u6570\u306e\u5b9a\u7fa9" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Extract committers of the project\n", | |
"def extract_committers(repo, repo_node, expand_user_repos, user_type):\n", | |
" committers = repo.get_contributors()\n", | |
" for committer in committers:\n", | |
" login = committer.login\n", | |
" g.add_node(login, type = user_type)\n", | |
" extract_user_info(g, login, committer)\n", | |
" \n", | |
" g.add_edge(login, repo_node, interaction = COMMIT_TO)\n", | |
" \n", | |
" if expand_user_repos:\n", | |
" committer_repos = committer.get_repos()\n", | |
" add_user_repos(login, committer_repos)\n", | |
" \n", | |
"\n", | |
"# Filter major projects\n", | |
"def add_user_repos(user, repos):\n", | |
" for repo in repos:\n", | |
" # Pick only highly starred projects\n", | |
" stargazers = repo.stargazers_count\n", | |
" if stargazers < STAR_TH:\n", | |
" continue\n", | |
" \n", | |
" repo_name = repo.full_name\n", | |
" if g.has_node(repo_name) == False:\n", | |
" g.add_node(repo_name, type=REPO)\n", | |
" extract_repo_info(g, repo_name, repo)\n", | |
"\n", | |
" g.add_edge(user, repo_name, interaction = OWNER_OF)\n", | |
" extract_committers(repo, repo_name, False, USER)\n", | |
"\n", | |
"\n", | |
"# Extract user information\n", | |
"def extract_user_info(graph, user_id, user):\n", | |
" node = graph.node[user_id]\n", | |
" name = user.name\n", | |
" if name is None:\n", | |
" name = user_id\n", | |
" \n", | |
" node['login'] = user_id \n", | |
" node['name'] = name\n", | |
" node['followers'] = user.followers\n", | |
" node['location'] = user.location\n", | |
" node['bio'] = user.bio\n", | |
" node['score'] = node['followers']\n", | |
" \n", | |
"# Extract repository information\n", | |
"def extract_repo_info(graph, repo_id, repo):\n", | |
" node = graph.node[repo_id]\n", | |
" \n", | |
" node['name'] = repo.name\n", | |
" node['description'] = repo.description\n", | |
" node['homepage'] = repo.homepage\n", | |
" node['stargazers'] = repo.stargazers_count\n", | |
" node['watchers'] = repo.watchers_count\n", | |
" node['fork_count'] = repo.forks_count\n", | |
" node['score'] = node['watchers'] + node['stargazers']\n", | |
"\n", | |
" node['language'] = repo.language" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u5b9f\u969b\u306bGithHub\u306eAPI\u3092\u547c\u3073\u51fa\u3057\u3066\u30b0\u30e9\u30d5\u3092\u69cb\u7bc9\u3059\u308b\n", | |
"\u3068\u3066\u3082\u6642\u9593\u304c\u304b\u304b\u308a\u307e\u3059..." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"for seed in SEEDS:\n", | |
" repo = client.get_repo(seed)\n", | |
" repo_name = repo.full_name\n", | |
" g.add_node(repo_name, type=SEED)\n", | |
" extract_repo_info(g, repo_name, repo)\n", | |
" extract_committers(repo, repo_name, True, SEED_COMMITTER)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u30c7\u30fc\u30bf\u306e\u78ba\u8a8d\u3068\u4fee\u6b63\n", | |
"\u73fe\u5728\u306eNetworkX\u306eGraphML\u66f8\u304d\u51fa\u3057\u95a2\u6570\u306fNone\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u3066\u3044\u306a\u3044\u306e\u3067\u3001\u7a7a\u306e\u6587\u5b57\u5217\u306b\u7f6e\u304d\u63db\u3048\u307e\u3059\u3002" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print(g.number_of_nodes())\n", | |
"print(g.number_of_edges())\n", | |
"\n", | |
"# Replace None to empty string\n", | |
"nodes = g.nodes()\n", | |
"for node_id in nodes:\n", | |
" node = g.node[node_id]\n", | |
" keys = node.keys()\n", | |
" for key in keys:\n", | |
" if node[key] is None:\n", | |
" node[key] = ''" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"1423\n", | |
"1963\n" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u30b0\u30e9\u30d5\u30d5\u30a1\u30a4\u30eb\u306e\u66f8\u304d\u51fa\u3057\n", | |
"Cytoscape\u3067\u306e\u6271\u3044\u304c\u697d\u306a\u306e\u3067\u3001\u3053\u3053\u3067\u306fGraphML\u5f62\u5f0f\u3067\u66f8\u304d\u51fa\u3057\u307e\u3059\u3002" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"nx.write_graphml(g, 'github_universe.graphml')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment