Skip to content

Instantly share code, notes, and snippets.

@keiono
Created September 15, 2014 05:46
Show Gist options
  • Save keiono/6ab079851873c0639c04 to your computer and use it in GitHub Desktop.
Save keiono/6ab079851873c0639c04 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:8b0845806cde3e665feef3e99c35faf9ce3f34a71a35ad9774e06ad0958c5a31"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## GitHub API\u3092\u4f7f\u3063\u3066Data Visualization\u306e\u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u3092\u89b3\u5bdf\u3059\u308b\n",
"\n",
"GitHub\u3067\u30bd\u30fc\u30b9\u30b3\u30fc\u30c9\u304c\u30db\u30b9\u30c8\u3055\u308c\u3066\u3044\u308b\u3001\u8457\u540d\u306a\u53ef\u8996\u5316\u95a2\u9023\u30c4\u30fc\u30eb\u306e\u30b3\u30df\u30c3\u30bf\u30fc\u304b\u3089\u8fbf\u308b\u958b\u767a\u8005\u306e\u30b3\u30df\u30e5\u30cb\u30c6\u30a3\u3092\u53ef\u8996\u5316\u3002\u305d\u3053\u304b\u3089\u30d5\u30a9\u30ed\u30fc\u3059\u3079\u304d\u4eba\u3084\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u304c\u898b\u3048\u308b\u3060\u308d\u3046\u304b\uff1f\n",
"\n",
"\u8a73\u7d30\u306f\u3053\u3061\u3089\u306e[\u8a18\u4e8b]()\u3067\u3002"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from github import Github\n",
"import networkx as nx\n",
"\n",
"# Replace this to your own GitHub API key\n",
"TOKEN = ''\n",
"\n",
"######## \u5b9a\u6570\u306e\u5b9a\u7fa9 ##########\n",
"# Edge Types (interaction)\n",
"COMMIT_TO = 'commit_to'\n",
"OWNER_OF = 'owner_of'\n",
"\n",
"# Node Types\n",
"USER = 'user'\n",
"REPO = 'repo'\n",
"SEED = 'seed'\n",
"SEED_COMMITTER = 'seed_committer'\n",
"\n",
"# \u30ec\u30dd\u30b8\u30c8\u30ea\u3092\u8ffd\u52a0\u3059\u308b\u304b\u3069\u3046\u304b\u5224\u65ad\u3059\u308b\u305f\u3081\u306e\u95be\u5024\n",
"STAR_TH = 100\n",
"\n",
"# \u30b0\u30e9\u30d5\u306eSeed\u3068\u306a\u308b\u306e\u30ec\u30dd\u30b8\u30c8\u30ea\u60c5\u5831\n",
"SEEDS = [\n",
" 'mbostock/d3',\n",
" 'nnnick/Chart.js',\n",
" 'trifacta/vega',\n",
" 'misoproject/d3.chart',\n",
" 'novus/nvd3',\n",
" 'simplegeo/polymaps',\n",
" 'lmccart/p5.js'\n",
"# 'ContinuumIO/bokeh',\n",
"# 'matplotlib/matplotlib',\n",
"# 'DmitryBaranovskiy/raphael/',\n",
"# 'processing/processing'\n",
"]\n",
"\n",
"client = Github(TOKEN, per_page=100)\n",
"\n",
"# \u6700\u7d42\u7684\u306b\u53ef\u8996\u5316\u3059\u308b\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u30c7\u30fc\u30bf\n",
"g = nx.MultiDiGraph(name='Github Universe')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u5404\u7a2e\u30d8\u30eb\u30d1\u30fc\u95a2\u6570\u306e\u5b9a\u7fa9"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Extract committers of the project\n",
"def extract_committers(repo, repo_node, expand_user_repos, user_type):\n",
" committers = repo.get_contributors()\n",
" for committer in committers:\n",
" login = committer.login\n",
" g.add_node(login, type = user_type)\n",
" extract_user_info(g, login, committer)\n",
" \n",
" g.add_edge(login, repo_node, interaction = COMMIT_TO)\n",
" \n",
" if expand_user_repos:\n",
" committer_repos = committer.get_repos()\n",
" add_user_repos(login, committer_repos)\n",
" \n",
"\n",
"# Filter major projects\n",
"def add_user_repos(user, repos):\n",
" for repo in repos:\n",
" # Pick only highly starred projects\n",
" stargazers = repo.stargazers_count\n",
" if stargazers < STAR_TH:\n",
" continue\n",
" \n",
" repo_name = repo.full_name\n",
" if g.has_node(repo_name) == False:\n",
" g.add_node(repo_name, type=REPO)\n",
" extract_repo_info(g, repo_name, repo)\n",
"\n",
" g.add_edge(user, repo_name, interaction = OWNER_OF)\n",
" extract_committers(repo, repo_name, False, USER)\n",
"\n",
"\n",
"# Extract user information\n",
"def extract_user_info(graph, user_id, user):\n",
" node = graph.node[user_id]\n",
" name = user.name\n",
" if name is None:\n",
" name = user_id\n",
" \n",
" node['login'] = user_id \n",
" node['name'] = name\n",
" node['followers'] = user.followers\n",
" node['location'] = user.location\n",
" node['bio'] = user.bio\n",
" node['score'] = node['followers']\n",
" \n",
"# Extract repository information\n",
"def extract_repo_info(graph, repo_id, repo):\n",
" node = graph.node[repo_id]\n",
" \n",
" node['name'] = repo.name\n",
" node['description'] = repo.description\n",
" node['homepage'] = repo.homepage\n",
" node['stargazers'] = repo.stargazers_count\n",
" node['watchers'] = repo.watchers_count\n",
" node['fork_count'] = repo.forks_count\n",
" node['score'] = node['watchers'] + node['stargazers']\n",
"\n",
" node['language'] = repo.language"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u5b9f\u969b\u306bGithHub\u306eAPI\u3092\u547c\u3073\u51fa\u3057\u3066\u30b0\u30e9\u30d5\u3092\u69cb\u7bc9\u3059\u308b\n",
"\u3068\u3066\u3082\u6642\u9593\u304c\u304b\u304b\u308a\u307e\u3059..."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for seed in SEEDS:\n",
" repo = client.get_repo(seed)\n",
" repo_name = repo.full_name\n",
" g.add_node(repo_name, type=SEED)\n",
" extract_repo_info(g, repo_name, repo)\n",
" extract_committers(repo, repo_name, True, SEED_COMMITTER)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u30c7\u30fc\u30bf\u306e\u78ba\u8a8d\u3068\u4fee\u6b63\n",
"\u73fe\u5728\u306eNetworkX\u306eGraphML\u66f8\u304d\u51fa\u3057\u95a2\u6570\u306fNone\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u3066\u3044\u306a\u3044\u306e\u3067\u3001\u7a7a\u306e\u6587\u5b57\u5217\u306b\u7f6e\u304d\u63db\u3048\u307e\u3059\u3002"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print(g.number_of_nodes())\n",
"print(g.number_of_edges())\n",
"\n",
"# Replace None to empty string\n",
"nodes = g.nodes()\n",
"for node_id in nodes:\n",
" node = g.node[node_id]\n",
" keys = node.keys()\n",
" for key in keys:\n",
" if node[key] is None:\n",
" node[key] = ''"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1423\n",
"1963\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u30b0\u30e9\u30d5\u30d5\u30a1\u30a4\u30eb\u306e\u66f8\u304d\u51fa\u3057\n",
"Cytoscape\u3067\u306e\u6271\u3044\u304c\u697d\u306a\u306e\u3067\u3001\u3053\u3053\u3067\u306fGraphML\u5f62\u5f0f\u3067\u66f8\u304d\u51fa\u3057\u307e\u3059\u3002"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"nx.write_graphml(g, 'github_universe.graphml')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment