Skip to content

Instantly share code, notes, and snippets.

@ShivangiM
Created April 8, 2017 07:33
Show Gist options
  • Save ShivangiM/626e74cc0190cf467f35c0eec7cbd759 to your computer and use it in GitHub Desktop.
Save ShivangiM/626e74cc0190cf467f35c0eec7cbd759 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"How stress controls hemoglobin levels in blood https://t.co/D4ffkErZ4b\n",
"Trump will very likely fire either his grandchildren's father, or Steve Bannon. Who will it be?\n",
"Hospitals put your data at risk, study finds https://t.co/l4Q0AFz4he https://t.co/GChWceFnDw\n",
".@Boeing and ULA demo their Emergency Egress System for crewed space launches https://t.co/V0XwdPNpQK https://t.co/stGN6yfQjB\n",
"Engrams and circuits crucial for systems consolidation of a memory | If you have money to access Science ... https://t.co/Q4CUt8AyLc\n",
"A NASA infrared look at the Southern Indian Ocean’s 15th tropical cyclone https://t.co/uqXIWmVlki https://t.co/DYgUC5JaNB\n",
"Weekly Roundup: Apple to reset the Mac Pro, Tesla most valuable U.S. automaker https://t.co/O8XissmJvI\n",
"Instead of doubling its data center footprint, Google built its own computer chip for running deep neural networks https://t.co/oJyjVdXzik\n",
"RT @tejasdkulkarni: Some foundational conceptual frameworks for AI are (1) Horde by Sutton et al, (2) algorithmic complexity (3) core knowl…\n",
"RT @CMU_Robotics: RI Seminar: Sergey Levine : Deep Robotic Learning: https://t.co/a0A2tlesfP via @YouTube\n"
]
}
],
"source": [
"import tweepy\n",
"from tweepy import OAuthHandler\n",
"\n",
"#The Twitter API credentials\n",
"\n",
"consumer_key = ''\n",
"consumer_secret = ''\n",
"access_token = ''\n",
"access_secret = ''\n",
"\n",
" \n",
"auth = OAuthHandler(consumer_key, consumer_secret)\n",
"auth.set_access_token(access_token, access_secret)\n",
" \n",
"api = tweepy.API(auth)\n",
"\n",
"for status in tweepy.Cursor(api.home_timeline).items(10):\n",
" # Process a single status\n",
" print(status.text) "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'created_at': 'Sat Apr 08 07:30:28 +0000 2017', 'id': 850611788693549056, 'id_str': '850611788693549056', 'text': \"#Sirens used for weather emergencies in #Dallas are going off. It's a malfunction, no emergency. Crews working to fix. Don't call 911.\", 'truncated': False, 'entities': {'hashtags': [{'text': 'Sirens', 'indices': [0, 7]}, {'text': 'Dallas', 'indices': [40, 47]}], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 824126001936474113, 'id_str': '824126001936474113', 'name': 'Alternative NOAA', 'screen_name': 'altNOAA', 'location': 'Silver Spring, MD', 'description': 'The Unofficial \"Resistance\" team of the NOAA. Account not tax payer subsidized. The NOAA studies the oceans, and the atmosphere to understand our planet. #MASA', 'url': 'https://t.co/DrD566fhpI', 'entities': {'url': {'urls': [{'url': 'https://t.co/DrD566fhpI', 'expanded_url': 'http://noaa.gov', 'display_url': 'noaa.gov', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 149265, 'friends_count': 250, 'listed_count': 1775, 'created_at': 'Wed Jan 25 05:25:25 +0000 2017', 'favourites_count': 785, 'utc_offset': -14400, 'time_zone': 'Eastern Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 2247, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'F5F8FA', 'profile_background_image_url': None, 'profile_background_image_url_https': None, 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/824692083180986368/ghs8RK-P_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/824692083180986368/ghs8RK-P_normal.jpg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/824126001936474113/1485456978', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 0, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'lang': 'en'}\n",
"{'created_at': 'Sat Apr 08 07:26:37 +0000 2017', 'id': 850610820832776196, 'id_str': '850610820832776196', 'text': 'How stress controls hemoglobin levels in blood https://t.co/D4ffkErZ4b', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/D4ffkErZ4b', 'expanded_url': 'https://scienmag.com/?p=1527016', 'display_url': 'scienmag.com/?p=1527016', 'indices': [47, 70]}]}, 'source': '<a href=\"http://scienmag.com/\" rel=\"nofollow\">scienmag_bioeng</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 2782211491, 'id_str': '2782211491', 'name': 'Science', 'screen_name': 'scienmag', 'location': 'London, England', 'description': '#Science #Magazine - #Biology #Chemistry #Physics #Space #Nature #Bioengineering #sciencepressrelease', 'url': 'https://t.co/FFkjqIzULu', 'entities': {'url': {'urls': [{'url': 'https://t.co/FFkjqIzULu', 'expanded_url': 'http://scienmag.com/', 'display_url': 'scienmag.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 129987, 'friends_count': 54957, 'listed_count': 1695, 'created_at': 'Sun Aug 31 12:03:37 +0000 2014', 'favourites_count': 663, 'utc_offset': -14400, 'time_zone': 'Eastern Time (US & Canada)', 'geo_enabled': True, 'verified': True, 'statuses_count': 92129, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/2782211491/1409486781', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 2, 'favorite_count': 2, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n",
"{'created_at': 'Sat Apr 08 07:26:18 +0000 2017', 'id': 850610738863497217, 'id_str': '850610738863497217', 'text': \"Trump will very likely fire either his grandchildren's father, or Steve Bannon. Who will it be?\", 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 824126001936474113, 'id_str': '824126001936474113', 'name': 'Alternative NOAA', 'screen_name': 'altNOAA', 'location': 'Silver Spring, MD', 'description': 'The Unofficial \"Resistance\" team of the NOAA. Account not tax payer subsidized. The NOAA studies the oceans, and the atmosphere to understand our planet. #MASA', 'url': 'https://t.co/DrD566fhpI', 'entities': {'url': {'urls': [{'url': 'https://t.co/DrD566fhpI', 'expanded_url': 'http://noaa.gov', 'display_url': 'noaa.gov', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 149265, 'friends_count': 250, 'listed_count': 1775, 'created_at': 'Wed Jan 25 05:25:25 +0000 2017', 'favourites_count': 785, 'utc_offset': -14400, 'time_zone': 'Eastern Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 2247, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'F5F8FA', 'profile_background_image_url': None, 'profile_background_image_url_https': None, 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/824692083180986368/ghs8RK-P_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/824692083180986368/ghs8RK-P_normal.jpg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/824126001936474113/1485456978', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 2, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'lang': 'en'}\n",
"{'created_at': 'Sat Apr 08 07:23:44 +0000 2017', 'id': 850610094379331586, 'id_str': '850610094379331586', 'text': 'Hospitals put your data at risk, study finds https://t.co/l4Q0AFz4he https://t.co/GChWceFnDw', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/l4Q0AFz4he', 'expanded_url': 'https://scienmag.com/?p=1527733', 'display_url': 'scienmag.com/?p=1527733', 'indices': [45, 68]}], 'media': [{'id': 850610091539779584, 'id_str': '850610091539779584', 'indices': [69, 92], 'media_url': 'http://pbs.twimg.com/media/C835XxyXUAAAsA-.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C835XxyXUAAAsA-.jpg', 'url': 'https://t.co/GChWceFnDw', 'display_url': 'pic.twitter.com/GChWceFnDw', 'expanded_url': 'https://twitter.com/scienmag/status/850610094379331586/photo/1', 'type': 'photo', 'sizes': {'medium': {'w': 720, 'h': 999, 'resize': 'fit'}, 'small': {'w': 490, 'h': 680, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}, 'large': {'w': 720, 'h': 999, 'resize': 'fit'}}}]}, 'extended_entities': {'media': [{'id': 850610091539779584, 'id_str': '850610091539779584', 'indices': [69, 92], 'media_url': 'http://pbs.twimg.com/media/C835XxyXUAAAsA-.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C835XxyXUAAAsA-.jpg', 'url': 'https://t.co/GChWceFnDw', 'display_url': 'pic.twitter.com/GChWceFnDw', 'expanded_url': 'https://twitter.com/scienmag/status/850610094379331586/photo/1', 'type': 'photo', 'sizes': {'medium': {'w': 720, 'h': 999, 'resize': 'fit'}, 'small': {'w': 490, 'h': 680, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}, 'large': {'w': 720, 'h': 999, 'resize': 'fit'}}}]}, 'source': '<a href=\"http://scienmag.com/\" rel=\"nofollow\">scienmag_bioeng</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 2782211491, 'id_str': '2782211491', 'name': 'Science', 'screen_name': 'scienmag', 'location': 'London, England', 'description': '#Science #Magazine - #Biology #Chemistry #Physics #Space #Nature #Bioengineering #sciencepressrelease', 'url': 'https://t.co/FFkjqIzULu', 'entities': {'url': {'urls': [{'url': 'https://t.co/FFkjqIzULu', 'expanded_url': 'http://scienmag.com/', 'display_url': 'scienmag.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 129987, 'friends_count': 54957, 'listed_count': 1695, 'created_at': 'Sun Aug 31 12:03:37 +0000 2014', 'favourites_count': 663, 'utc_offset': -14400, 'time_zone': 'Eastern Time (US & Canada)', 'geo_enabled': True, 'verified': True, 'statuses_count': 92129, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/2782211491/1409486781', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 0, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n",
"{'created_at': 'Sat Apr 08 07:15:52 +0000 2017', 'id': 850608112096509952, 'id_str': '850608112096509952', 'text': '.@Boeing and ULA demo their Emergency Egress System for crewed space launches https://t.co/V0XwdPNpQK https://t.co/stGN6yfQjB', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'Boeing', 'name': 'The Boeing Company', 'id': 25103967, 'id_str': '25103967', 'indices': [1, 8]}], 'urls': [{'url': 'https://t.co/V0XwdPNpQK', 'expanded_url': 'http://tcrn.ch/2nxQ5S1', 'display_url': 'tcrn.ch/2nxQ5S1', 'indices': [78, 101]}], 'media': [{'id': 850607935507972096, 'id_str': '850607935507972096', 'indices': [102, 125], 'media_url': 'http://pbs.twimg.com/media/C833kNGUAAAOTKS.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C833kNGUAAAOTKS.jpg', 'url': 'https://t.co/stGN6yfQjB', 'display_url': 'pic.twitter.com/stGN6yfQjB', 'expanded_url': 'https://twitter.com/TechCrunch/status/850608112096509952/video/1', 'type': 'photo', 'sizes': {'small': {'w': 680, 'h': 383, 'resize': 'fit'}, 'medium': {'w': 1200, 'h': 675, 'resize': 'fit'}, 'large': {'w': 1280, 'h': 720, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}}}]}, 'extended_entities': {'media': [{'id': 850607935507972096, 'id_str': '850607935507972096', 'indices': [102, 125], 'media_url': 'http://pbs.twimg.com/media/C833kNGUAAAOTKS.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C833kNGUAAAOTKS.jpg', 'url': 'https://t.co/stGN6yfQjB', 'display_url': 'pic.twitter.com/stGN6yfQjB', 'expanded_url': 'https://twitter.com/TechCrunch/status/850608112096509952/video/1', 'type': 'video', 'sizes': {'small': {'w': 680, 'h': 383, 'resize': 'fit'}, 'medium': {'w': 1200, 'h': 675, 'resize': 'fit'}, 'large': {'w': 1280, 'h': 720, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}}, 'video_info': {'aspect_ratio': [16, 9], 'duration_millis': 43377, 'variants': [{'bitrate': 832000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/amplify_video/850607935507972096/vid/640x360/LM3uDnS4VK0_AXpQ.mp4'}, {'content_type': 'application/x-mpegURL', 'url': 'https://video.twimg.com/amplify_video/850607935507972096/pl/qbcsnyEVjyxpCvF0.m3u8'}, {'bitrate': 2176000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/amplify_video/850607935507972096/vid/1280x720/MfsjvH_gIjdnTA6A.mp4'}, {'bitrate': 320000, 'content_type': 'video/mp4', 'url': 'https://video.twimg.com/amplify_video/850607935507972096/vid/320x180/6b7lqyx2aE-0R_WA.mp4'}]}, 'additional_media_info': {'title': 'ULA demo its Emergency Egress System', 'description': 'Follow @TechCrunch', 'call_to_actions': {'visit_site': {'url': 'http://www.techcrunch.com/video'}}, 'embeddable': True, 'monetizable': True}}]}, 'source': '<a href=\"http://snappytv.com\" rel=\"nofollow\">SnappyTV.com</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 816653, 'id_str': '816653', 'name': 'TechCrunch', 'screen_name': 'TechCrunch', 'location': 'San Francisco, CA', 'description': 'Breaking technology news, analysis, and opinions from TechCrunch. The number one guide for all things tech. Got a tip? Let us know tips@techcrunch.com', 'url': 'https://t.co/b5Oyx12qGG', 'entities': {'url': {'urls': [{'url': 'https://t.co/b5Oyx12qGG', 'expanded_url': 'http://techcrunch.com', 'display_url': 'techcrunch.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 8800086, 'friends_count': 898, 'listed_count': 108905, 'created_at': 'Wed Mar 07 01:27:09 +0000 2007', 'favourites_count': 1906, 'utc_offset': -25200, 'time_zone': 'Pacific Time (US & Canada)', 'geo_enabled': True, 'verified': True, 'statuses_count': 153517, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': True, 'profile_background_color': '149500', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/621096023751004161/BAKy7hCT.png', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/621096023751004161/BAKy7hCT.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/615392662233808896/EtxjSSKk_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/615392662233808896/EtxjSSKk_normal.jpg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/816653/1490894597', 'profile_link_color': '097000', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'DDFFCC', 'profile_text_color': '222222', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 37, 'favorite_count': 10, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n",
"{'created_at': 'Sat Apr 08 07:15:02 +0000 2017', 'id': 850607904730296320, 'id_str': '850607904730296320', 'text': 'Engrams and circuits crucial for systems consolidation of a memory | If you have money to access Science ... https://t.co/Q4CUt8AyLc', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/Q4CUt8AyLc', 'expanded_url': 'http://science.sciencemag.org/content/356/6333/73', 'display_url': 'science.sciencemag.org/content/356/63…', 'indices': [109, 132]}]}, 'source': '<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 29843511, 'id_str': '29843511', 'name': 'Nando de Freitas', 'screen_name': 'NandoDF', 'location': 'London, England', 'description': 'Researching intelligence to understand what we are and to find ways to harness it wisely.', 'url': 'https://t.co/KJYJNVKZE9', 'entities': {'url': {'urls': [{'url': 'https://t.co/KJYJNVKZE9', 'expanded_url': 'http://www.cs.ox.ac.uk/people/nando.defreitas/', 'display_url': 'cs.ox.ac.uk/people/nando.d…', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 22713, 'friends_count': 186, 'listed_count': 623, 'created_at': 'Wed Apr 08 22:41:09 +0000 2009', 'favourites_count': 2265, 'utc_offset': -25200, 'time_zone': 'Pacific Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 2683, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': '022330', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme15/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme15/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/1532482636/DSC_0549_037_normal.JPG', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/1532482636/DSC_0549_037_normal.JPG', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/29843511/1457616654', 'profile_link_color': '0084B4', 'profile_sidebar_border_color': 'A8C7F7', 'profile_sidebar_fill_color': 'C0DFEC', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 1, 'favorite_count': 1, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n",
"{'created_at': 'Sat Apr 08 07:12:06 +0000 2017', 'id': 850607166495051776, 'id_str': '850607166495051776', 'text': 'A NASA infrared look at the Southern Indian Ocean’s 15th tropical cyclone https://t.co/uqXIWmVlki https://t.co/DYgUC5JaNB', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/uqXIWmVlki', 'expanded_url': 'https://scienmag.com/?p=1527728', 'display_url': 'scienmag.com/?p=1527728', 'indices': [74, 97]}], 'media': [{'id': 850607163332603905, 'id_str': '850607163332603905', 'indices': [98, 121], 'media_url': 'http://pbs.twimg.com/media/C832tVXXsAEg3TR.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C832tVXXsAEg3TR.jpg', 'url': 'https://t.co/DYgUC5JaNB', 'display_url': 'pic.twitter.com/DYgUC5JaNB', 'expanded_url': 'https://twitter.com/scienmag/status/850607166495051776/photo/1', 'type': 'photo', 'sizes': {'medium': {'w': 720, 'h': 556, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}, 'small': {'w': 680, 'h': 525, 'resize': 'fit'}, 'large': {'w': 720, 'h': 556, 'resize': 'fit'}}}]}, 'extended_entities': {'media': [{'id': 850607163332603905, 'id_str': '850607163332603905', 'indices': [98, 121], 'media_url': 'http://pbs.twimg.com/media/C832tVXXsAEg3TR.jpg', 'media_url_https': 'https://pbs.twimg.com/media/C832tVXXsAEg3TR.jpg', 'url': 'https://t.co/DYgUC5JaNB', 'display_url': 'pic.twitter.com/DYgUC5JaNB', 'expanded_url': 'https://twitter.com/scienmag/status/850607166495051776/photo/1', 'type': 'photo', 'sizes': {'medium': {'w': 720, 'h': 556, 'resize': 'fit'}, 'thumb': {'w': 150, 'h': 150, 'resize': 'crop'}, 'small': {'w': 680, 'h': 525, 'resize': 'fit'}, 'large': {'w': 720, 'h': 556, 'resize': 'fit'}}}]}, 'source': '<a href=\"http://scienmag.com/\" rel=\"nofollow\">scienmag_bioeng</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 2782211491, 'id_str': '2782211491', 'name': 'Science', 'screen_name': 'scienmag', 'location': 'London, England', 'description': '#Science #Magazine - #Biology #Chemistry #Physics #Space #Nature #Bioengineering #sciencepressrelease', 'url': 'https://t.co/FFkjqIzULu', 'entities': {'url': {'urls': [{'url': 'https://t.co/FFkjqIzULu', 'expanded_url': 'http://scienmag.com/', 'display_url': 'scienmag.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 129987, 'friends_count': 54957, 'listed_count': 1695, 'created_at': 'Sun Aug 31 12:03:37 +0000 2014', 'favourites_count': 663, 'utc_offset': -14400, 'time_zone': 'Eastern Time (US & Canada)', 'geo_enabled': True, 'verified': True, 'statuses_count': 92129, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/521409417292431360/EGg0LOAK_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/2782211491/1409486781', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': True, 'default_profile': True, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 0, 'favorite_count': 6, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n",
"{'created_at': 'Sat Apr 08 07:12:03 +0000 2017', 'id': 850607151462711297, 'id_str': '850607151462711297', 'text': 'Weekly Roundup: Apple to reset the Mac Pro, Tesla most valuable U.S. automaker https://t.co/O8XissmJvI', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/O8XissmJvI', 'expanded_url': 'http://tcrn.ch/2nTd2jA', 'display_url': 'tcrn.ch/2nTd2jA', 'indices': [79, 102]}]}, 'source': '<a href=\"http://www.socialflow.com\" rel=\"nofollow\">SocialFlow</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 816653, 'id_str': '816653', 'name': 'TechCrunch', 'screen_name': 'TechCrunch', 'location': 'San Francisco, CA', 'description': 'Breaking technology news, analysis, and opinions from TechCrunch. The number one guide for all things tech. Got a tip? Let us know tips@techcrunch.com', 'url': 'https://t.co/b5Oyx12qGG', 'entities': {'url': {'urls': [{'url': 'https://t.co/b5Oyx12qGG', 'expanded_url': 'http://techcrunch.com', 'display_url': 'techcrunch.com', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 8800086, 'friends_count': 898, 'listed_count': 108905, 'created_at': 'Wed Mar 07 01:27:09 +0000 2007', 'favourites_count': 1906, 'utc_offset': -25200, 'time_zone': 'Pacific Time (US & Canada)', 'geo_enabled': True, 'verified': True, 'statuses_count': 153517, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': True, 'profile_background_color': '149500', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/621096023751004161/BAKy7hCT.png', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/621096023751004161/BAKy7hCT.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/615392662233808896/EtxjSSKk_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/615392662233808896/EtxjSSKk_normal.jpg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/816653/1490894597', 'profile_link_color': '097000', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'DDFFCC', 'profile_text_color': '222222', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 15, 'favorite_count': 13, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n",
"{'created_at': 'Sat Apr 08 07:10:34 +0000 2017', 'id': 850606780954628096, 'id_str': '850606780954628096', 'text': 'Instead of doubling its data center footprint, Google built its own computer chip for running deep neural networks https://t.co/oJyjVdXzik', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/oJyjVdXzik', 'expanded_url': 'http://bit.ly/2nTZVil', 'display_url': 'bit.ly/2nTZVil', 'indices': [115, 138]}]}, 'source': '<a href=\"http://www.socialflow.com\" rel=\"nofollow\">SocialFlow</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 1344951, 'id_str': '1344951', 'name': 'WIRED', 'screen_name': 'WIRED', 'location': 'San Francisco/New York', 'description': 'WIRED is where tomorrow is realized.', 'url': 'http://t.co/AbRkBRjcYo', 'entities': {'url': {'urls': [{'url': 'http://t.co/AbRkBRjcYo', 'expanded_url': 'http://WIRED.com', 'display_url': 'WIRED.com', 'indices': [0, 22]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 8525967, 'friends_count': 278, 'listed_count': 89501, 'created_at': 'Sat Mar 17 09:57:25 +0000 2007', 'favourites_count': 1715, 'utc_offset': -25200, 'time_zone': 'Pacific Time (US & Canada)', 'geo_enabled': False, 'verified': True, 'statuses_count': 75916, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': True, 'profile_background_color': '000000', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/734548027/5e5ee18945d22a9e2d9971208971bfdd.jpeg', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/734548027/5e5ee18945d22a9e2d9971208971bfdd.jpeg', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/615598832726970372/jsK-gBSt_normal.png', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/615598832726970372/jsK-gBSt_normal.png', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/1344951/1490210731', 'profile_link_color': '99DCF0', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'EEEEEE', 'profile_text_color': '000000', 'profile_use_background_image': False, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 41, 'favorite_count': 64, 'favorited': False, 'retweeted': False, 'possibly_sensitive': False, 'possibly_sensitive_appealable': False, 'lang': 'en'}\n",
"{'created_at': 'Sat Apr 08 07:09:33 +0000 2017', 'id': 850606523440967680, 'id_str': '850606523440967680', 'text': 'RT @tejasdkulkarni: Some foundational conceptual frameworks for AI are (1) Horde by Sutton et al, (2) algorithmic complexity (3) core knowl…', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'tejasdkulkarni', 'name': 'Tejas Kulkarni', 'id': 56872711, 'id_str': '56872711', 'indices': [3, 18]}], 'urls': []}, 'source': '<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 29843511, 'id_str': '29843511', 'name': 'Nando de Freitas', 'screen_name': 'NandoDF', 'location': 'London, England', 'description': 'Researching intelligence to understand what we are and to find ways to harness it wisely.', 'url': 'https://t.co/KJYJNVKZE9', 'entities': {'url': {'urls': [{'url': 'https://t.co/KJYJNVKZE9', 'expanded_url': 'http://www.cs.ox.ac.uk/people/nando.defreitas/', 'display_url': 'cs.ox.ac.uk/people/nando.d…', 'indices': [0, 23]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 22713, 'friends_count': 186, 'listed_count': 623, 'created_at': 'Wed Apr 08 22:41:09 +0000 2009', 'favourites_count': 2265, 'utc_offset': -25200, 'time_zone': 'Pacific Time (US & Canada)', 'geo_enabled': False, 'verified': False, 'statuses_count': 2683, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': '022330', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme15/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme15/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/1532482636/DSC_0549_037_normal.JPG', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/1532482636/DSC_0549_037_normal.JPG', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/29843511/1457616654', 'profile_link_color': '0084B4', 'profile_sidebar_border_color': 'A8C7F7', 'profile_sidebar_fill_color': 'C0DFEC', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': False, 'default_profile_image': False, 'following': True, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'retweeted_status': {'created_at': 'Fri Apr 07 09:46:46 +0000 2017', 'id': 850283699312762880, 'id_str': '850283699312762880', 'text': 'Some foundational conceptual frameworks for AI are (1) Horde by Sutton et al, (2) algorithmic complexity (3) core knowledge by Spelke et al', 'truncated': False, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}, 'source': '<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 56872711, 'id_str': '56872711', 'name': 'Tejas Kulkarni', 'screen_name': 'tejasdkulkarni', 'location': '', 'description': 'Research Scientist at Google DeepMind. AI PhD from MIT. I want to understand how the mind works.', 'url': 'http://t.co/qNnX25VwDk', 'entities': {'url': {'urls': [{'url': 'http://t.co/qNnX25VwDk', 'expanded_url': 'http://tejask.com', 'display_url': 'tejask.com', 'indices': [0, 22]}]}, 'description': {'urls': []}}, 'protected': False, 'followers_count': 1731, 'friends_count': 267, 'listed_count': 51, 'created_at': 'Wed Jul 15 00:44:25 +0000 2009', 'favourites_count': 254, 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'statuses_count': 139, 'lang': 'en', 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/722316259132645376/kHn7-6gJ_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/722316259132645376/kHn7-6gJ_normal.jpg', 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'has_extended_profile': False, 'default_profile': True, 'default_profile_image': False, 'following': False, 'follow_request_sent': False, 'notifications': False, 'translator_type': 'none'}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'retweet_count': 3, 'favorite_count': 26, 'favorited': False, 'retweeted': False, 'lang': 'en'}, 'is_quote_status': False, 'retweet_count': 3, 'favorite_count': 0, 'favorited': False, 'retweeted': False, 'lang': 'en'}\n"
]
}
],
"source": [
"for status in tweepy.Cursor(api.home_timeline).items(10):\n",
" # Process a single status\n",
" print(status._json) "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from tweepy import Stream\n",
"from tweepy.streaming import StreamListener\n",
"import time\n",
" \n",
"class MyListener(StreamListener):\n",
" \n",
" def on_data(self, data):\n",
" try:\n",
" with open('python.json', 'a') as f:\n",
" f.write(data)\n",
" return True\n",
" except BaseException as e:\n",
" print(str(e))\n",
" return True\n",
" \n",
" def on_error(self, status):\n",
" print(status)\n",
" return True\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"created_at\": \"Sun Mar 19 10:25:43 +0000 2017\",\n",
" \"id\": 843408135171792896,\n",
" \"id_str\": \"843408135171792896\",\n",
" \"text\": \"#AI Consulting Roles-We're Hiring #Artificial Intelligence experts #machinelearning #python...\\u2026 https://t.co/ivmKoKwC6T\",\n",
" \"display_text_range\": [\n",
" 0,\n",
" 140\n",
" ],\n",
" \"source\": \"<a href=\\\"http://linkis.com\\\" rel=\\\"nofollow\\\">Linkis: turn sharing into growth</a>\",\n",
" \"truncated\": true,\n",
" \"in_reply_to_status_id\": null,\n",
" \"in_reply_to_status_id_str\": null,\n",
" \"in_reply_to_user_id\": null,\n",
" \"in_reply_to_user_id_str\": null,\n",
" \"in_reply_to_screen_name\": null,\n",
" \"user\": {\n",
" \"id\": 4481835072,\n",
" \"id_str\": \"4481835072\",\n",
" \"name\": \"DIGR\",\n",
" \"screen_name\": \"digr_io\",\n",
" \"location\": \"St. Louis, MO, United States\",\n",
" \"url\": \"http://bit.ly/digr\",\n",
" \"description\": \"Helping customers deploy #Analytics #DataManagement #BigData #DataViz @Tableau @Talend @XtremeData @SnowflakeDB. Founded by @axelrod_eric\",\n",
" \"protected\": false,\n",
" \"verified\": false,\n",
" \"followers_count\": 2538,\n",
" \"friends_count\": 4111,\n",
" \"listed_count\": 1298,\n",
" \"favourites_count\": 640,\n",
" \"statuses_count\": 11859,\n",
" \"created_at\": \"Mon Dec 07 03:40:21 +0000 2015\",\n",
" \"utc_offset\": -18000,\n",
" \"time_zone\": \"Central Time (US & Canada)\",\n",
" \"geo_enabled\": false,\n",
" \"lang\": \"en\",\n",
" \"contributors_enabled\": false,\n",
" \"is_translator\": false,\n",
" \"profile_background_color\": \"C0DEED\",\n",
" \"profile_background_image_url\": \"http://abs.twimg.com/images/themes/theme1/bg.png\",\n",
" \"profile_background_image_url_https\": \"https://abs.twimg.com/images/themes/theme1/bg.png\",\n",
" \"profile_background_tile\": false,\n",
" \"profile_link_color\": \"1DA1F2\",\n",
" \"profile_sidebar_border_color\": \"C0DEED\",\n",
" \"profile_sidebar_fill_color\": \"DDEEF6\",\n",
" \"profile_text_color\": \"333333\",\n",
" \"profile_use_background_image\": true,\n",
" \"profile_image_url\": \"http://pbs.twimg.com/profile_images/673709429200564225/XrIRC-ip_normal.png\",\n",
" \"profile_image_url_https\": \"https://pbs.twimg.com/profile_images/673709429200564225/XrIRC-ip_normal.png\",\n",
" \"default_profile\": true,\n",
" \"default_profile_image\": false,\n",
" \"following\": null,\n",
" \"follow_request_sent\": null,\n",
" \"notifications\": null\n",
" },\n",
" \"geo\": null,\n",
" \"coordinates\": null,\n",
" \"place\": null,\n",
" \"contributors\": null,\n",
" \"is_quote_status\": false,\n",
" \"extended_tweet\": {\n",
" \"full_text\": \"#AI Consulting Roles-We're Hiring #Artificial Intelligence experts #machinelearning #python... https://t.co/cs51fgWqTW by #alevergara78 https://t.co/Ug0OpxTacU\",\n",
" \"display_text_range\": [\n",
" 0,\n",
" 135\n",
" ],\n",
" \"entities\": {\n",
" \"hashtags\": [\n",
" {\n",
" \"text\": \"AI\",\n",
" \"indices\": [\n",
" 0,\n",
" 3\n",
" ]\n",
" },\n",
" {\n",
" \"text\": \"Artificial\",\n",
" \"indices\": [\n",
" 34,\n",
" 45\n",
" ]\n",
" },\n",
" {\n",
" \"text\": \"machinelearning\",\n",
" \"indices\": [\n",
" 67,\n",
" 83\n",
" ]\n",
" },\n",
" {\n",
" \"text\": \"python\",\n",
" \"indices\": [\n",
" 84,\n",
" 91\n",
" ]\n",
" },\n",
" {\n",
" \"text\": \"alevergara78\",\n",
" \"indices\": [\n",
" 122,\n",
" 135\n",
" ]\n",
" }\n",
" ],\n",
" \"urls\": [\n",
" {\n",
" \"url\": \"https://t.co/cs51fgWqTW\",\n",
" \"expanded_url\": \"http://ln.is/FMYLB\",\n",
" \"display_url\": \"ln.is/FMYLB\",\n",
" \"indices\": [\n",
" 95,\n",
" 118\n",
" ]\n",
" }\n",
" ],\n",
" \"user_mentions\": [],\n",
" \"symbols\": [],\n",
" \"media\": [\n",
" {\n",
" \"id\": 843408132487315457,\n",
" \"id_str\": \"843408132487315457\",\n",
" \"indices\": [\n",
" 136,\n",
" 159\n",
" ],\n",
" \"media_url\": \"http://pbs.twimg.com/media/C7RjOlmVAAEevSG.jpg\",\n",
" \"media_url_https\": \"https://pbs.twimg.com/media/C7RjOlmVAAEevSG.jpg\",\n",
" \"url\": \"https://t.co/Ug0OpxTacU\",\n",
" \"display_url\": \"pic.twitter.com/Ug0OpxTacU\",\n",
" \"expanded_url\": \"https://twitter.com/digr_io/status/843408135171792896/photo/1\",\n",
" \"type\": \"photo\",\n",
" \"sizes\": {\n",
" \"large\": {\n",
" \"w\": 1200,\n",
" \"h\": 675,\n",
" \"resize\": \"fit\"\n",
" },\n",
" \"small\": {\n",
" \"w\": 680,\n",
" \"h\": 383,\n",
" \"resize\": \"fit\"\n",
" },\n",
" \"medium\": {\n",
" \"w\": 1200,\n",
" \"h\": 675,\n",
" \"resize\": \"fit\"\n",
" },\n",
" \"thumb\": {\n",
" \"w\": 150,\n",
" \"h\": 150,\n",
" \"resize\": \"crop\"\n",
" }\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" \"extended_entities\": {\n",
" \"media\": [\n",
" {\n",
" \"id\": 843408132487315457,\n",
" \"id_str\": \"843408132487315457\",\n",
" \"indices\": [\n",
" 136,\n",
" 159\n",
" ],\n",
" \"media_url\": \"http://pbs.twimg.com/media/C7RjOlmVAAEevSG.jpg\",\n",
" \"media_url_https\": \"https://pbs.twimg.com/media/C7RjOlmVAAEevSG.jpg\",\n",
" \"url\": \"https://t.co/Ug0OpxTacU\",\n",
" \"display_url\": \"pic.twitter.com/Ug0OpxTacU\",\n",
" \"expanded_url\": \"https://twitter.com/digr_io/status/843408135171792896/photo/1\",\n",
" \"type\": \"photo\",\n",
" \"sizes\": {\n",
" \"large\": {\n",
" \"w\": 1200,\n",
" \"h\": 675,\n",
" \"resize\": \"fit\"\n",
" },\n",
" \"small\": {\n",
" \"w\": 680,\n",
" \"h\": 383,\n",
" \"resize\": \"fit\"\n",
" },\n",
" \"medium\": {\n",
" \"w\": 1200,\n",
" \"h\": 675,\n",
" \"resize\": \"fit\"\n",
" },\n",
" \"thumb\": {\n",
" \"w\": 150,\n",
" \"h\": 150,\n",
" \"resize\": \"crop\"\n",
" }\n",
" }\n",
" }\n",
" ]\n",
" }\n",
" },\n",
" \"retweet_count\": 0,\n",
" \"favorite_count\": 0,\n",
" \"entities\": {\n",
" \"hashtags\": [\n",
" {\n",
" \"text\": \"AI\",\n",
" \"indices\": [\n",
" 0,\n",
" 3\n",
" ]\n",
" },\n",
" {\n",
" \"text\": \"Artificial\",\n",
" \"indices\": [\n",
" 34,\n",
" 45\n",
" ]\n",
" },\n",
" {\n",
" \"text\": \"machinelearning\",\n",
" \"indices\": [\n",
" 67,\n",
" 83\n",
" ]\n",
" },\n",
" {\n",
" \"text\": \"python\",\n",
" \"indices\": [\n",
" 84,\n",
" 91\n",
" ]\n",
" }\n",
" ],\n",
" \"urls\": [\n",
" {\n",
" \"url\": \"https://t.co/ivmKoKwC6T\",\n",
" \"expanded_url\": \"https://twitter.com/i/web/status/843408135171792896\",\n",
" \"display_url\": \"twitter.com/i/web/status/8\\u2026\",\n",
" \"indices\": [\n",
" 96,\n",
" 119\n",
" ]\n",
" }\n",
" ],\n",
" \"user_mentions\": [],\n",
" \"symbols\": []\n",
" },\n",
" \"favorited\": false,\n",
" \"retweeted\": false,\n",
" \"possibly_sensitive\": false,\n",
" \"filter_level\": \"low\",\n",
" \"lang\": \"en\",\n",
" \"timestamp_ms\": \"1489919143847\"\n",
"}\n"
]
}
],
"source": [
"import json\n",
" \n",
"with open('python.json', 'r') as f:\n",
" line = f.readline() # read only the first tweet/line\n",
" tweet = json.loads(line) # load it as Python dict\n",
" print(json.dumps(tweet, indent=4)) # pretty-print"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['RT', '@', 'marcobonzanini', ':', 'just', 'an', 'example', '!', ':', 'D', 'http', ':', '//example.com', '#', 'NLP']\n"
]
}
],
"source": [
"from nltk.tokenize import word_tokenize\n",
" \n",
"tweet = 'RT @marcobonzanini: just an example! :D http://example.com #NLP'\n",
"print(word_tokenize(tweet))\n",
"# ['RT', '@', 'marcobonzanini', ':', 'just', 'an', 'example', '!', ':', 'D', 'http', ':', '//example.com', '#', 'NLP']\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['RT', '@marcobonzanini', ':', 'just', 'an', 'example', '!', ':D', 'http://example.com', '#NLP']\n"
]
}
],
"source": [
"import re\n",
" \n",
"emoticons_str = r\"\"\"\n",
" (?:\n",
" [:=;] # Eyes\n",
" [oO\\-]? # Nose (optional)\n",
" [D\\)\\]\\(\\]/\\\\OpP] # Mouth\n",
" )\"\"\"\n",
" \n",
"regex_str = [\n",
" emoticons_str,\n",
" r'<[^>]+>', # HTML tags\n",
" r'(?:@[\\w_]+)', # @-mentions\n",
" r\"(?:\\#+[\\w_]+[\\w\\'_\\-]*[\\w_]+)\", # hash-tags\n",
" r'http[s]?://(?:[a-z]|[0-9]|[$-_@.&amp;+]|[!*\\(\\),]|(?:%[0-9a-f][0-9a-f]))+', # URLs\n",
" \n",
" r'(?:(?:\\d+,?)+(?:\\.?\\d+)?)', # numbers\n",
" r\"(?:[a-z][a-z'\\-_]+[a-z])\", # words with - and '\n",
" r'(?:[\\w_]+)', # other words\n",
" r'(?:\\S)' # anything else\n",
"]\n",
" \n",
"tokens_re = re.compile(r'('+'|'.join(regex_str)+')', re.VERBOSE | re.IGNORECASE)\n",
"emoticon_re = re.compile(r'^'+emoticons_str+'$', re.VERBOSE | re.IGNORECASE)\n",
" \n",
"def tokenize(s):\n",
" return tokens_re.findall(s)\n",
" \n",
"def preprocess(s, lowercase=False):\n",
" tokens = tokenize(s)\n",
" if lowercase:\n",
" tokens = [token if emoticon_re.search(token) else token.lower() for token in tokens]\n",
" return tokens\n",
" \n",
"tweet = 'RT @marcobonzanini: just an example! :D http://example.com #NLP'\n",
"print(preprocess(tweet))\n",
"# ['RT', '@marcobonzanini', ':', 'just', 'an', 'example', '!', ':D', 'http://example.com', '#NLP']\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with open('python.json', 'r') as f:\n",
" for line in f:\n",
" tweet = json.loads(line)\n",
" tokens = preprocess(tweet['text'])\n",
" #do_something_else(tokens)\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['RT', '@ipfconline1', ':', 'The', 'Guide', 'to', 'Understand', '#MachineLearning', 'Algorithms', 'With', 'The', 'R', '&', 'amp', ';', '#Python', 'Codes', 'to', 'Run', 'Them', '.', 'https://t.co/POABbz2So5', '…']\n"
]
}
],
"source": [
"print(tokens)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment