Skip to content

Instantly share code, notes, and snippets.

@amirziai
Created February 22, 2015 04:03
Show Gist options
  • Save amirziai/2d67f266cfcc67bf103e to your computer and use it in GitHub Desktop.
Save amirziai/2d67f266cfcc67bf103e to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "",
"signature": "sha256:a0fd75809a68efa56163142b5a328a1c67861e4ee0c8ac6824be3147d71022ba"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Data about a twitter handle"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's look into the content of twitter handle @jboitnott"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"twitter_handle = 'jboitnott'\n",
"x = api.get_user(twitter_handle)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 55
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The fields we get back:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for k in x._json:\n",
" print k"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"follow_request_sent\n",
"profile_use_background_image\n",
"profile_text_color\n",
"default_profile_image\n",
"id\n",
"profile_background_image_url_https\n",
"verified\n",
"profile_location\n",
"profile_image_url_https\n",
"profile_sidebar_fill_color\n",
"entities\n",
"followers_count\n",
"profile_sidebar_border_color\n",
"id_str\n",
"profile_background_color\n",
"listed_count\n",
"status\n",
"is_translation_enabled\n",
"utc_offset\n",
"statuses_count\n",
"description\n",
"friends_count\n",
"location\n",
"profile_link_color\n",
"profile_image_url\n",
"following\n",
"geo_enabled\n",
"profile_banner_url\n",
"profile_background_image_url\n",
"name\n",
"lang\n",
"profile_background_tile\n",
"favourites_count\n",
"screen_name\n",
"notifications\n",
"url\n",
"created_at\n",
"contributors_enabled\n",
"time_zone\n",
"protected\n",
"default_profile\n",
"is_translator\n"
]
}
],
"prompt_number": 56
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Example"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is how it's structured"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x._json"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 57,
"text": [
"{u'contributors_enabled': False,\n",
" u'created_at': u'Wed Apr 23 06:57:53 +0000 2008',\n",
" u'default_profile': False,\n",
" u'default_profile_image': False,\n",
" u'description': u'Writer @Inc @Entrepreneur @usatodaytech @Biz_Innovations - Partner @DealFlowSF - Advisor @startupgrind - Tweets are my own. https://t.co/N4si3yAwzr',\n",
" u'entities': {u'description': {u'urls': [{u'display_url': u'umano.me/johnboitnott',\n",
" u'expanded_url': u'https://umano.me/johnboitnott',\n",
" u'indices': [124, 147],\n",
" u'url': u'https://t.co/N4si3yAwzr'}]},\n",
" u'url': {u'urls': [{u'display_url': u'about.me/boitnott',\n",
" u'expanded_url': u'http://about.me/boitnott',\n",
" u'indices': [0, 22],\n",
" u'url': u'http://t.co/UwrWWzKKdg'}]}},\n",
" u'favourites_count': 2909,\n",
" u'follow_request_sent': False,\n",
" u'followers_count': 79787,\n",
" u'following': False,\n",
" u'friends_count': 60287,\n",
" u'geo_enabled': True,\n",
" u'id': 14486811,\n",
" u'id_str': u'14486811',\n",
" u'is_translation_enabled': False,\n",
" u'is_translator': False,\n",
" u'lang': u'en',\n",
" u'listed_count': 1851,\n",
" u'location': u'San Francisco Bay Area',\n",
" u'name': u'John Boitnott',\n",
" u'notifications': False,\n",
" u'profile_background_color': u'8E1C2B',\n",
" u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/638670230/04m802qx6y524lvg7hfs.png',\n",
" u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/638670230/04m802qx6y524lvg7hfs.png',\n",
" u'profile_background_tile': False,\n",
" u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/14486811/1417034936',\n",
" u'profile_image_url': u'http://pbs.twimg.com/profile_images/436572430618734592/y1GLkfBJ_normal.png',\n",
" u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/436572430618734592/y1GLkfBJ_normal.png',\n",
" u'profile_link_color': u'0000FF',\n",
" u'profile_location': None,\n",
" u'profile_sidebar_border_color': u'87BC44',\n",
" u'profile_sidebar_fill_color': u'E0FF92',\n",
" u'profile_text_color': u'000000',\n",
" u'profile_use_background_image': True,\n",
" u'protected': False,\n",
" u'screen_name': u'jboitnott',\n",
" u'status': {u'contributors': None,\n",
" u'coordinates': None,\n",
" u'created_at': u'Sat Feb 21 17:21:35 +0000 2015',\n",
" u'entities': {u'hashtags': [],\n",
" u'symbols': [],\n",
" u'urls': [{u'display_url': u'onforb.es/1B0TNro',\n",
" u'expanded_url': u'http://onforb.es/1B0TNro',\n",
" u'indices': [79, 101],\n",
" u'url': u'http://t.co/kemCR4Aq3D'}],\n",
" u'user_mentions': []},\n",
" u'favorite_count': 2,\n",
" u'favorited': False,\n",
" u'geo': None,\n",
" u'id': 569185175315349506L,\n",
" u'id_str': u'569185175315349506',\n",
" u'in_reply_to_screen_name': None,\n",
" u'in_reply_to_status_id': None,\n",
" u'in_reply_to_status_id_str': None,\n",
" u'in_reply_to_user_id': None,\n",
" u'in_reply_to_user_id_str': None,\n",
" u'lang': u'en',\n",
" u'place': None,\n",
" u'possibly_sensitive': False,\n",
" u'retweet_count': 3,\n",
" u'retweeted': False,\n",
" u'source': u'<a href=\"https://bitly.com/\" rel=\"nofollow\">Bitly</a>',\n",
" u'text': u\"It's like someone at the FDA woke up. 23andMe &amp; Genetic Tests To Consumers http://t.co/kemCR4Aq3D\",\n",
" u'truncated': False},\n",
" u'statuses_count': 23464,\n",
" u'time_zone': u'Pacific Time (US & Canada)',\n",
" u'url': u'http://t.co/UwrWWzKKdg',\n",
" u'utc_offset': -28800,\n",
" u'verified': False}"
]
}
],
"prompt_number": 57
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Things we want to extract"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A sample of what we can extract.\n",
"Text is cleaned up."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class TwitterUser():\n",
" def __init__(self, json):\n",
" self.name = clean_text(json['name'])\n",
" self.description = clean_text(json['description'])\n",
" self.handles = extract_handles(json['description'])\n",
" self.location = clean_text(json['location'])\n",
" self.profile_image_url = json['profile_image_url']\n",
" self.followers_count = json['followers_count']\n",
" self.url = json['url']\n",
" \n",
" def show(self):\n",
" for k,v in self.__dict__.items():\n",
" print k,':',v\n",
"\n",
"jboitnott = TwitterUser(x._json)\n",
"jboitnott.show()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"name : ['john', 'boitnott']\n",
"url : http://t.co/UwrWWzKKdg\n",
"handles : [u'@Inc', u'@Entrepreneur', u'@usatodaytech', u'@Biz_Innovations', u'@DealFlowSF', u'@startupgrind']\n",
"profile_image_url : http://pbs.twimg.com/profile_images/436572430618734592/y1GLkfBJ_normal.png\n",
"followers_count : 79787\n",
"location : ['san', 'francisco', 'bay', 'area']\n",
"description : ['writer', 'partner', 'advisor', 'tweets']\n"
]
}
],
"prompt_number": 67
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Now a little more streamlined"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"handle = 'socialmktgfella' # enter a twitter handle here\n",
"user = TwitterUser(api.get_user(handle)._json)\n",
"user.show()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"name : ['andre', 'f', 'bourque']\n",
"url : http://t.co/6OZ0ublZLV\n",
"handles : [u'@Technorati', u'@HuffingtonPost', u\"@RoxyCertified's\"]\n",
"profile_image_url : http://pbs.twimg.com/profile_images/423316853876731904/Ecu3oGZH_normal.jpeg\n",
"followers_count : 72778\n",
"location : ['san', 'francisco', 'ca']\n",
"description : ['editor', 'emeritus', 'writer', 'pr', 'inbound', 'mktg', 'consultant', 'dad']\n"
]
}
],
"prompt_number": 78
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment