Skip to content

Instantly share code, notes, and snippets.

@k1ochiai
Created February 11, 2019 07:38
Show Gist options
  • Save k1ochiai/d9c66fc50bf3f7181f9337753c68b80a to your computer and use it in GitHub Desktop.
Save k1ochiai/d9c66fc50bf3f7181f9337753c68b80a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from networkx.readwrite import json_graph\n",
"import json\n",
"import numpy as np\n",
"import networkx as nx\n",
"from collections import defaultdict"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Make train_prefix-G.json of cora dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"G = nx.read_edgelist('cora/cora.cites', nodetype=int)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"np.random.seed(1)\n",
"num_nodes = 2708\n",
"rand_indices = np.random.permutation(num_nodes)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"nodes_key_list = list(G.nodes())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# number of samples for test, val is used same as pytorch implementation\n",
"# https://github.com/williamleif/graphsage-simple\n",
"for i, n in enumerate(nodes_key_list):\n",
" if i<1000:\n",
" G.node[n][\"test\"] = True\n",
" G.node[n][\"val\"] = False\n",
" if 1000<=i<1500:\n",
" G.node[n][\"test\"] = False\n",
" G.node[n][\"val\"] = True\n",
" if i>=1500:\n",
" G.node[n][\"test\"] = False\n",
" G.node[n][\"val\"] = False"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = json_graph.node_link_data(G)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"with open(\"data-G.json\", mode=\"w\") as f:\n",
" f.write(json.dumps(data))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# This function is used in pytorch implementation\n",
"def load_cora():\n",
" num_nodes = 2708\n",
" num_feats = 1433\n",
" feat_data = np.zeros((num_nodes, num_feats))\n",
" labels = np.empty((num_nodes,1), dtype=np.int64)\n",
" node_map = {}\n",
" label_map = {}\n",
" with open(\"cora/cora.content\") as fp:\n",
" for i,line in enumerate(fp):\n",
" info = line.strip().split()\n",
" feat_data[i,:] = list(map(float, info[1:-1]))\n",
" node_map[info[0]] = i\n",
" if not info[-1] in label_map:\n",
" label_map[info[-1]] = len(label_map)\n",
" labels[i] = label_map[info[-1]]\n",
"\n",
" adj_lists = defaultdict(set)\n",
" with open(\"cora/cora.cites\") as fp:\n",
" for i,line in enumerate(fp):\n",
" info = line.strip().split()\n",
" paper1 = node_map[info[0]]\n",
" paper2 = node_map[info[1]]\n",
" adj_lists[paper1].add(paper2)\n",
" adj_lists[paper2].add(paper1)\n",
" return feat_data, labels, adj_lists, node_map"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"feat_data, labels, adj_lists, nodes = load_cora()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"with open(\"data-id_map.json\", mode=\"w\") as f:\n",
" f.write(json.dumps(nodes))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"labels_reshape = labels.flatten()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"n_labels = len(np.unique(labels_reshape))\n",
"labels_one_hot = np.eye(n_labels,dtype=int)[labels_reshape]\n",
"class_map = {k: list(labels_one_hot[i]) for i,k in enumerate(nodes.keys())}"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"with open(\"data-class_map.json\", mode=\"w\") as f:\n",
" f.write(json.dumps(class_map,default=str))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"np.save('data-feats.npy', feat_data)"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda env:anaconda3-4.2.0]",
"language": "python",
"name": "conda-env-anaconda3-4.2.0-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
@danyang-liu
Copy link

great job! Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment