Skip to content

Instantly share code, notes, and snippets.

@christopherkullenberg
Last active November 16, 2017 21:47
Show Gist options
  • Save christopherkullenberg/a6cda0fa7deaa39a86b8e0a43e23c103 to your computer and use it in GitHub Desktop.
Save christopherkullenberg/a6cda0fa7deaa39a86b8e0a43e23c103 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import json\n",
"import pandas as pd\n",
"import networkx as nx\n",
"import re"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In case the json won't parse, try:\n",
"```python\n",
"try:\n",
" jsonobject = json.load(jsonfile)\n",
"except json.JSONDecodeError:\n",
" print(\"error\") # there is just one error in the beginning of file\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
"source": [
"sourcefile = \"fotografiska.json\"\n",
"jsonfile = open(sourcefile)\n",
"jsonobject = json.load(jsonfile)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def commentstodataframe(jsonobject):\n",
" '''This function makes a dataframe of selected content\n",
" in the json data structure.'''\n",
" commentcounter = 0\n",
" thedata = []\n",
" for j in jsonobject:\n",
" commentdata = {}\n",
" postid = j['shortcode']\n",
" posturl = 'https://www.instagram.com/p/' + postid\n",
" commentdata[\"id\"] = postid\n",
" commentdata[\"url\"] = posturl \n",
" commentlist = []\n",
" for k in j['comments']['data']:\n",
" \n",
" commentlist.append((k['owner']['username'], k['text']))\n",
" commentcounter += 1 \n",
" commentdata[\"comments\"] = commentlist\n",
" thedata.append(commentdata)\n",
" print(\"Number of comments added to dataframe: \" + str(commentcounter))\n",
" df = pd.DataFrame(thedata, columns=['id','url','comments'])\n",
" return(df)\n",
" \n",
"df = commentstodataframe(jsonobject)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for comment in df.iterrows():\n",
" print(\"\\n\")\n",
" print(comment[1][1])\n",
" for c in comment[1][2]: \n",
" print(c[0], c[1])\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" def makepostdirectednetwork(df):\n",
" '''Makes a directed network from a user to a\n",
" post. Can be visualised as an in- or outdegree\n",
" network depending on your question.'''\n",
" G = nx.DiGraph()\n",
" postcounter = 0\n",
" interactionscounter = 0\n",
" userlist = []\n",
" for row in df.iterrows():\n",
" postid = row[1][0]\n",
" postcounter += 1\n",
" for c in row[1][2]: \n",
" username = c[0]\n",
" interactionscounter += 1\n",
" userlist.append(username)\n",
" G.add_edge(username, postid) # direction of graph, from user to post\n",
" nx.write_gexf(G, sourcefile + \"postdirected.gexf\")\n",
" print(\"Posts: \" + str(postcounter))\n",
" print(\"Interactions (incl. multiple interactions with same post: \" \n",
" + str(interactionscounter))\n",
" print(\"Unique users: \" + str(len(set(userlist))))\n",
"\n",
"makepostdirectednetwork(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def makeuserdirectednetwork(df):\n",
" '''Makes a directe network when users\n",
" ping each other using @'''\n",
" G = nx.MultiDiGraph()\n",
" interactionscounter = 0\n",
" userlist = []\n",
" for row in df.iterrows():\n",
" for c in row[1][2]: \n",
" match = re.findall(\"(?<=@).*?(?=[\\s])\", c[1], re.IGNORECASE)\n",
" if match:\n",
" source = c[0]\n",
" #G.add_node(source)\n",
" #print(\"\\nSource: \" + source)\n",
" for m in match:\n",
" #G.add_node(m)\n",
" interactionscounter += 1\n",
" #print(\"Target: \" + m)\n",
" G.add_edge(source, m)\n",
" userlist.append(source)\n",
" userlist.append(m)\n",
" \n",
" nx.write_gexf(G, sourcefile + \"userdirected.gexf\")\n",
" print(\"Number of interactions: \" + str(interactionscounter))\n",
" print(\"Number of unique users: \" + str(len(set(userlist))))\n",
" print(nx.info(G))\n",
" \n",
" \n",
"makeuserdirectednetwork(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment