Created
February 22, 2015 04:03
-
-
Save amirziai/2d67f266cfcc67bf103e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:a0fd75809a68efa56163142b5a328a1c67861e4ee0c8ac6824be3147d71022ba" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Data about a twitter handle" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's look into the content of twitter handle @jboitnott" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"twitter_handle = 'jboitnott'\n", | |
"x = api.get_user(twitter_handle)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 55 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The fields we get back:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"for k in x._json:\n", | |
" print k" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"follow_request_sent\n", | |
"profile_use_background_image\n", | |
"profile_text_color\n", | |
"default_profile_image\n", | |
"id\n", | |
"profile_background_image_url_https\n", | |
"verified\n", | |
"profile_location\n", | |
"profile_image_url_https\n", | |
"profile_sidebar_fill_color\n", | |
"entities\n", | |
"followers_count\n", | |
"profile_sidebar_border_color\n", | |
"id_str\n", | |
"profile_background_color\n", | |
"listed_count\n", | |
"status\n", | |
"is_translation_enabled\n", | |
"utc_offset\n", | |
"statuses_count\n", | |
"description\n", | |
"friends_count\n", | |
"location\n", | |
"profile_link_color\n", | |
"profile_image_url\n", | |
"following\n", | |
"geo_enabled\n", | |
"profile_banner_url\n", | |
"profile_background_image_url\n", | |
"name\n", | |
"lang\n", | |
"profile_background_tile\n", | |
"favourites_count\n", | |
"screen_name\n", | |
"notifications\n", | |
"url\n", | |
"created_at\n", | |
"contributors_enabled\n", | |
"time_zone\n", | |
"protected\n", | |
"default_profile\n", | |
"is_translator\n" | |
] | |
} | |
], | |
"prompt_number": 56 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Example" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This is how it's structured" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"x._json" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 57, | |
"text": [ | |
"{u'contributors_enabled': False,\n", | |
" u'created_at': u'Wed Apr 23 06:57:53 +0000 2008',\n", | |
" u'default_profile': False,\n", | |
" u'default_profile_image': False,\n", | |
" u'description': u'Writer @Inc @Entrepreneur @usatodaytech @Biz_Innovations - Partner @DealFlowSF - Advisor @startupgrind - Tweets are my own. https://t.co/N4si3yAwzr',\n", | |
" u'entities': {u'description': {u'urls': [{u'display_url': u'umano.me/johnboitnott',\n", | |
" u'expanded_url': u'https://umano.me/johnboitnott',\n", | |
" u'indices': [124, 147],\n", | |
" u'url': u'https://t.co/N4si3yAwzr'}]},\n", | |
" u'url': {u'urls': [{u'display_url': u'about.me/boitnott',\n", | |
" u'expanded_url': u'http://about.me/boitnott',\n", | |
" u'indices': [0, 22],\n", | |
" u'url': u'http://t.co/UwrWWzKKdg'}]}},\n", | |
" u'favourites_count': 2909,\n", | |
" u'follow_request_sent': False,\n", | |
" u'followers_count': 79787,\n", | |
" u'following': False,\n", | |
" u'friends_count': 60287,\n", | |
" u'geo_enabled': True,\n", | |
" u'id': 14486811,\n", | |
" u'id_str': u'14486811',\n", | |
" u'is_translation_enabled': False,\n", | |
" u'is_translator': False,\n", | |
" u'lang': u'en',\n", | |
" u'listed_count': 1851,\n", | |
" u'location': u'San Francisco Bay Area',\n", | |
" u'name': u'John Boitnott',\n", | |
" u'notifications': False,\n", | |
" u'profile_background_color': u'8E1C2B',\n", | |
" u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/638670230/04m802qx6y524lvg7hfs.png',\n", | |
" u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/638670230/04m802qx6y524lvg7hfs.png',\n", | |
" u'profile_background_tile': False,\n", | |
" u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/14486811/1417034936',\n", | |
" u'profile_image_url': u'http://pbs.twimg.com/profile_images/436572430618734592/y1GLkfBJ_normal.png',\n", | |
" u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/436572430618734592/y1GLkfBJ_normal.png',\n", | |
" u'profile_link_color': u'0000FF',\n", | |
" u'profile_location': None,\n", | |
" u'profile_sidebar_border_color': u'87BC44',\n", | |
" u'profile_sidebar_fill_color': u'E0FF92',\n", | |
" u'profile_text_color': u'000000',\n", | |
" u'profile_use_background_image': True,\n", | |
" u'protected': False,\n", | |
" u'screen_name': u'jboitnott',\n", | |
" u'status': {u'contributors': None,\n", | |
" u'coordinates': None,\n", | |
" u'created_at': u'Sat Feb 21 17:21:35 +0000 2015',\n", | |
" u'entities': {u'hashtags': [],\n", | |
" u'symbols': [],\n", | |
" u'urls': [{u'display_url': u'onforb.es/1B0TNro',\n", | |
" u'expanded_url': u'http://onforb.es/1B0TNro',\n", | |
" u'indices': [79, 101],\n", | |
" u'url': u'http://t.co/kemCR4Aq3D'}],\n", | |
" u'user_mentions': []},\n", | |
" u'favorite_count': 2,\n", | |
" u'favorited': False,\n", | |
" u'geo': None,\n", | |
" u'id': 569185175315349506L,\n", | |
" u'id_str': u'569185175315349506',\n", | |
" u'in_reply_to_screen_name': None,\n", | |
" u'in_reply_to_status_id': None,\n", | |
" u'in_reply_to_status_id_str': None,\n", | |
" u'in_reply_to_user_id': None,\n", | |
" u'in_reply_to_user_id_str': None,\n", | |
" u'lang': u'en',\n", | |
" u'place': None,\n", | |
" u'possibly_sensitive': False,\n", | |
" u'retweet_count': 3,\n", | |
" u'retweeted': False,\n", | |
" u'source': u'<a href=\"https://bitly.com/\" rel=\"nofollow\">Bitly</a>',\n", | |
" u'text': u\"It's like someone at the FDA woke up. 23andMe & Genetic Tests To Consumers http://t.co/kemCR4Aq3D\",\n", | |
" u'truncated': False},\n", | |
" u'statuses_count': 23464,\n", | |
" u'time_zone': u'Pacific Time (US & Canada)',\n", | |
" u'url': u'http://t.co/UwrWWzKKdg',\n", | |
" u'utc_offset': -28800,\n", | |
" u'verified': False}" | |
] | |
} | |
], | |
"prompt_number": 57 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Things we want to extract" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"A sample of what we can extract.\n", | |
"Text is cleaned up." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"class TwitterUser():\n", | |
" def __init__(self, json):\n", | |
" self.name = clean_text(json['name'])\n", | |
" self.description = clean_text(json['description'])\n", | |
" self.handles = extract_handles(json['description'])\n", | |
" self.location = clean_text(json['location'])\n", | |
" self.profile_image_url = json['profile_image_url']\n", | |
" self.followers_count = json['followers_count']\n", | |
" self.url = json['url']\n", | |
" \n", | |
" def show(self):\n", | |
" for k,v in self.__dict__.items():\n", | |
" print k,':',v\n", | |
"\n", | |
"jboitnott = TwitterUser(x._json)\n", | |
"jboitnott.show()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"name : ['john', 'boitnott']\n", | |
"url : http://t.co/UwrWWzKKdg\n", | |
"handles : [u'@Inc', u'@Entrepreneur', u'@usatodaytech', u'@Biz_Innovations', u'@DealFlowSF', u'@startupgrind']\n", | |
"profile_image_url : http://pbs.twimg.com/profile_images/436572430618734592/y1GLkfBJ_normal.png\n", | |
"followers_count : 79787\n", | |
"location : ['san', 'francisco', 'bay', 'area']\n", | |
"description : ['writer', 'partner', 'advisor', 'tweets']\n" | |
] | |
} | |
], | |
"prompt_number": 67 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Now a little more streamlined" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"handle = 'socialmktgfella' # enter a twitter handle here\n", | |
"user = TwitterUser(api.get_user(handle)._json)\n", | |
"user.show()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"name : ['andre', 'f', 'bourque']\n", | |
"url : http://t.co/6OZ0ublZLV\n", | |
"handles : [u'@Technorati', u'@HuffingtonPost', u\"@RoxyCertified's\"]\n", | |
"profile_image_url : http://pbs.twimg.com/profile_images/423316853876731904/Ecu3oGZH_normal.jpeg\n", | |
"followers_count : 72778\n", | |
"location : ['san', 'francisco', 'ca']\n", | |
"description : ['editor', 'emeritus', 'writer', 'pr', 'inbound', 'mktg', 'consultant', 'dad']\n" | |
] | |
} | |
], | |
"prompt_number": 78 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment