Created
April 24, 2015 18:29
-
-
Save davclark/5d3e7ee2c86a0d9eab2a to your computer and use it in GitHub Desktop.
Example of using GNIP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from requests import post, get, put\n", | |
"from requests.auth import HTTPBasicAuth\n", | |
"import yaml" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"with open('creds.yaml') as credfile:\n", | |
" creds = yaml.load(credfile)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"auth = HTTPBasicAuth(creds['email'], creds['password'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"url_base = 'https://historical.gnip.com/accounts/' + creds['user'] + '/'\n", | |
"post_url = url_base + 'jobs.json'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 70, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"base_query = {\"publisher\": \"twitter\",\n", | |
" \"streamType\": \"track\",\n", | |
" \"dataFormat\": \"activity_streams\",}" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Step 1\n", | |
"\n", | |
"Submit a job" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 93, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# info specific to this query\n", | |
"q = base_query.copy()\n", | |
"q['fromDate'] = \"201504110000\"\n", | |
"q['toDate'] = \"20150417\" # time will be inferred as 0000 if unspecified\n", | |
"q['title'] = 'gb3' # Needs to be unique\n", | |
"q[\"rules\"] = [\n", | |
" {\n", | |
" \"value\": \"from:CathrynCarson\"\n", | |
" },\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 94, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'account': 'UCBerkeleyExplore',\n", | |
" 'format': 'activity_streams',\n", | |
" 'fromDate': '201504110000',\n", | |
" 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm.json',\n", | |
" 'publisher': 'twitter',\n", | |
" 'requestedAt': '2015-04-24T06:30:03Z',\n", | |
" 'requestedBy': 'jordan@cs.berkeley.edu',\n", | |
" 'status': 'opened',\n", | |
" 'statusMessage': 'Waiting on quote from Gnip.',\n", | |
" 'streamType': 'track',\n", | |
" 'title': 'gb3',\n", | |
" 'toDate': '201504170000'}" | |
] | |
}, | |
"execution_count": 94, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Get a quote\n", | |
"resp = post(post_url, json=q, auth=auth)\n", | |
"job_info = resp.json()\n", | |
"job_info" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Since data won't be persistent, it's a good idea to copy the jobURL into text somewhere\n", | |
"# For a more robust solution, we'd want to write this out to a file or even better\n", | |
"# something like a redis queue or mongodb\n", | |
"# Note that this is also included in requests - even those where the request was TO this URL!\n", | |
"jobURL = 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm.json'" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Steps 2 and 4\n", | |
"\n", | |
"This is the general approach to checking in on the status of a job - both checking for a quote prior to acceptance, and also checking for completion of the job." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'acceptedAt': '2015-04-24T06:32:05Z',\n", | |
" 'acceptedBy': 'jordan@cs.berkeley.edu',\n", | |
" 'account': 'UCBerkeleyExplore',\n", | |
" 'format': 'activity_streams',\n", | |
" 'fromDate': '201504110000',\n", | |
" 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm.json',\n", | |
" 'percentComplete': 100,\n", | |
" 'publisher': 'twitter',\n", | |
" 'quote': {'estimatedActivityCount': 100,\n", | |
" 'estimatedDurationHours': '1.0',\n", | |
" 'estimatedFileSizeMb': '0.0',\n", | |
" 'expiresAt': '2015-05-01T06:31:14Z'},\n", | |
" 'requestedAt': '2015-04-24T06:30:03Z',\n", | |
" 'requestedBy': 'jordan@cs.berkeley.edu',\n", | |
" 'results': {'activityCount': 5,\n", | |
" 'completedAt': '2015-04-24T06:37:30Z',\n", | |
" 'dataURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm/results.json',\n", | |
" 'expiresAt': '2015-05-09T06:37:12Z',\n", | |
" 'fileCount': 5,\n", | |
" 'fileSizeMb': '0.0'},\n", | |
" 'status': 'delivered',\n", | |
" 'statusMessage': 'Job delivered and available for download.',\n", | |
" 'streamType': 'track',\n", | |
" 'title': 'gb3',\n", | |
" 'toDate': '201504170000'}" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Keep checking (max 5 / 5 sec across all GET requests) until ready\n", | |
"resp = get(jobURL, auth=auth)\n", | |
"job_status = resp.json()\n", | |
"job_status" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# dynamically could do\n", | |
"# dataURL = job_status['dataURL']\n", | |
"# But this is persistent:\n", | |
"dataURL = 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm/results.json'" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Notice that the \"requestedAt\" time is in GMT. Bummer!" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Step 3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 105, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'2015-04-24 06:35:54'" | |
] | |
}, | |
"execution_count": 105, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from time import gmtime, strftime\n", | |
"strftime(\"%Y-%m-%d %H:%M:%S\", gmtime())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 99, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'acceptedAt': '2015-04-24T06:32:05Z',\n", | |
" 'acceptedBy': 'jordan@cs.berkeley.edu',\n", | |
" 'account': 'UCBerkeleyExplore',\n", | |
" 'format': 'activity_streams',\n", | |
" 'fromDate': '201504110000',\n", | |
" 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm.json',\n", | |
" 'publisher': 'twitter',\n", | |
" 'quote': {'estimatedActivityCount': 100,\n", | |
" 'estimatedDurationHours': '1.0',\n", | |
" 'estimatedFileSizeMb': '0.0',\n", | |
" 'expiresAt': '2015-05-01T06:31:14Z'},\n", | |
" 'requestedAt': '2015-04-24T06:30:03Z',\n", | |
" 'requestedBy': 'jordan@cs.berkeley.edu',\n", | |
" 'status': 'accepted',\n", | |
" 'statusMessage': 'Job accepted and ready to be queued.',\n", | |
" 'streamType': 'track',\n", | |
" 'title': 'gb3',\n", | |
" 'toDate': '201504170000'}" | |
] | |
}, | |
"execution_count": 99, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Could also use job_status here, can also \"reject\"\n", | |
"resp = put(job_info['jobURL'], json={\"status\": \"accept\"}, auth=auth)\n", | |
"job_status = resp.json()\n", | |
"job_status" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Step 5\n", | |
"\n", | |
"Get our results (for the sample query, this should be very small)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'expiresAt': '2015-05-09T06:37:12Z',\n", | |
" 'totalFileSizeBytes': 5154,\n", | |
" 'urlCount': 5,\n", | |
" 'urlList': ['https://s3-us-west-1.amazonaws.com/archive.replay.snapshots/snapshots/twitter/track/activity_streams/UCBerkeleyExplore/2015/04/24/20150411-20150417_8wg55wk6vm/2015/04/11/14/10_activities.json.gz?AWSAccessKeyId=AKIAJMSYMREFVVJ6E7QQ&Expires=1432449450&Signature=uacttaiL2eEQmQrcMf2dxOOJF%2FA%3D',\n", | |
" 'https://s3-us-west-1.amazonaws.com/archive.replay.snapshots/snapshots/twitter/track/activity_streams/UCBerkeleyExplore/2015/04/24/20150411-20150417_8wg55wk6vm/2015/04/11/14/20_activities.json.gz?AWSAccessKeyId=AKIAJMSYMREFVVJ6E7QQ&Expires=1432449450&Signature=y2vtp%2FfFN%2FAUx2KlvyKzKTRe1iE%3D',\n", | |
" 'https://s3-us-west-1.amazonaws.com/archive.replay.snapshots/snapshots/twitter/track/activity_streams/UCBerkeleyExplore/2015/04/24/20150411-20150417_8wg55wk6vm/2015/04/11/23/50_activities.json.gz?AWSAccessKeyId=AKIAJMSYMREFVVJ6E7QQ&Expires=1432449450&Signature=NrHsR8YWf9hjD3Ks8KEwJCkPKv0%3D',\n", | |
" 'https://s3-us-west-1.amazonaws.com/archive.replay.snapshots/snapshots/twitter/track/activity_streams/UCBerkeleyExplore/2015/04/24/20150411-20150417_8wg55wk6vm/2015/04/12/03/20_activities.json.gz?AWSAccessKeyId=AKIAJMSYMREFVVJ6E7QQ&Expires=1432449450&Signature=N4oGXFlb%2BvwXV2gaMUOjRJ%2FUcI8%3D',\n", | |
" 'https://s3-us-west-1.amazonaws.com/archive.replay.snapshots/snapshots/twitter/track/activity_streams/UCBerkeleyExplore/2015/04/24/20150411-20150417_8wg55wk6vm/2015/04/15/13/50_activities.json.gz?AWSAccessKeyId=AKIAJMSYMREFVVJ6E7QQ&Expires=1432449450&Signature=Mj4wnN1x988HC%2F4R%2FxgEUq%2FOxdo%3D']}" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Should just be ready\n", | |
"resp = get(dataURL, auth=auth)\n", | |
"job_status = resp.json()\n", | |
"job_status" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"ofnames = []\n", | |
"for url in job_status['urlList']:\n", | |
" ofname = url.rsplit('/', 1)[1].split('?', 1)[0]\n", | |
" resp = get(url, stream=True)\n", | |
" with open(ofname, 'wb') as ofile:\n", | |
" for chunk in resp.iter_content():\n", | |
" ofile.write(chunk)\n", | |
" ofnames.append(ofname) # This again isn't persistent" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['10_activities.json.gz',\n", | |
" '20_activities.json.gz',\n", | |
" '50_activities.json.gz',\n", | |
" '20_activities.json.gz',\n", | |
" '50_activities.json.gz']" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ofnames" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"ofnames = ['10_activities.json.gz',\n", | |
" '20_activities.json.gz',\n", | |
" '50_activities.json.gz',\n", | |
" '20_activities.json.gz',\n", | |
" '50_activities.json.gz']\n", | |
"# or ofnames = glob('*.json.gz')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Step 6" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import json, gzip" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data = json.load?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def read_tweet_data(fname):\n", | |
" '''These files include tweet packages, as well as some general info at the end\n", | |
" \n", | |
" There may be some regular structure to the blank lines that could make this\n", | |
" *slightly* faster, but probably doesn't matter relative to the speed of parsing.'''\n", | |
" tweet_data = []\n", | |
" with gzip.open(fname, 'rt') as datafile:\n", | |
" for line in datafile:\n", | |
" if line != '\\n':\n", | |
" tweet_data.append(json.loads(line))\n", | |
" \n", | |
" return tweet_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'actor': {'displayName': 'Cathryn Carson',\n", | |
" 'favoritesCount': 45,\n", | |
" 'followersCount': 260,\n", | |
" 'friendsCount': 212,\n", | |
" 'id': 'id:twitter.com:2594018798',\n", | |
" 'image': 'https://pbs.twimg.com/profile_images/526971310090358784/eWx-JEup_normal.jpeg',\n", | |
" 'languages': ['en'],\n", | |
" 'link': 'http://www.twitter.com/CathrynCarson',\n", | |
" 'links': [{'href': 'http://history.berkeley.edu/people/cathryn-carson',\n", | |
" 'rel': 'me'}],\n", | |
" 'listedCount': 17,\n", | |
" 'location': {'displayName': 'Berkeley, CA', 'objectType': 'place'},\n", | |
" 'objectType': 'person',\n", | |
" 'postedTime': '2014-06-29T01:59:33.000Z',\n", | |
" 'preferredUsername': 'CathrynCarson',\n", | |
" 'statusesCount': 384,\n", | |
" 'summary': 'Historian of science, ethnographer of contemporary research institutions. Once upon a time @DLabAtBerkeley.',\n", | |
" 'twitterTimeZone': None,\n", | |
" 'utcOffset': None,\n", | |
" 'verified': False},\n", | |
" 'body': 'RT @abuaardvark: Big Data in Social Science - new Annals packed w/interesting articles http://t.co/grLC6eRwO2',\n", | |
" 'favoritesCount': 0,\n", | |
" 'generator': {'displayName': 'Twitter Web Client',\n", | |
" 'link': 'http://twitter.com'},\n", | |
" 'gnip': {'matching_rules': [{'tag': None, 'value': 'from:CathrynCarson'}],\n", | |
" 'urls': [{'expanded_status': 200,\n", | |
" 'expanded_url': 'http://ann.sagepub.com/content/659/1.toc',\n", | |
" 'url': 'http://t.co/grLC6eRwO2'}]},\n", | |
" 'id': 'tag:search.twitter.com,2005:586896169319600128',\n", | |
" 'link': 'http://twitter.com/CathrynCarson/statuses/586896169319600128',\n", | |
" 'object': {'actor': {'displayName': 'Marc Lynch',\n", | |
" 'favoritesCount': 97,\n", | |
" 'followersCount': 35939,\n", | |
" 'friendsCount': 666,\n", | |
" 'id': 'id:twitter.com:18267544',\n", | |
" 'image': 'https://pbs.twimg.com/profile_images/566579490063937536/MYo9iE8U_normal.jpeg',\n", | |
" 'languages': ['en'],\n", | |
" 'link': 'http://www.twitter.com/abuaardvark',\n", | |
" 'links': [{'href': 'http://www.marclynch.com', 'rel': 'me'}],\n", | |
" 'listedCount': 1865,\n", | |
" 'location': {'displayName': 'ÜT: 37.892943,-122.270439',\n", | |
" 'objectType': 'place'},\n", | |
" 'objectType': 'person',\n", | |
" 'postedTime': '2008-12-20T15:33:59.000Z',\n", | |
" 'preferredUsername': 'abuaardvark',\n", | |
" 'statusesCount': 29649,\n", | |
" 'summary': 'Abu Aardvark. GWU. Monkey Cage. POMEPS. CNAS. Go Brewers and Packers!',\n", | |
" 'twitterTimeZone': 'Eastern Time (US & Canada)',\n", | |
" 'utcOffset': '-14400',\n", | |
" 'verified': False},\n", | |
" 'body': 'Big Data in Social Science - new Annals packed w/interesting articles http://t.co/grLC6eRwO2',\n", | |
" 'favoritesCount': 10,\n", | |
" 'generator': {'displayName': 'Twitter for Mac',\n", | |
" 'link': 'http://itunes.apple.com/us/app/twitter/id409789998?mt=12'},\n", | |
" 'id': 'tag:search.twitter.com,2005:586855911601348608',\n", | |
" 'link': 'http://twitter.com/abuaardvark/statuses/586855911601348608',\n", | |
" 'object': {'id': 'object:search.twitter.com,2005:586855911601348608',\n", | |
" 'link': 'http://twitter.com/abuaardvark/statuses/586855911601348608',\n", | |
" 'objectType': 'note',\n", | |
" 'postedTime': '2015-04-11T11:38:47.000Z',\n", | |
" 'summary': 'Big Data in Social Science - new Annals packed w/interesting articles http://t.co/grLC6eRwO2'},\n", | |
" 'objectType': 'activity',\n", | |
" 'postedTime': '2015-04-11T11:38:47.000Z',\n", | |
" 'provider': {'displayName': 'Twitter',\n", | |
" 'link': 'http://www.twitter.com',\n", | |
" 'objectType': 'service'},\n", | |
" 'twitter_entities': {'hashtags': [],\n", | |
" 'symbols': [],\n", | |
" 'trends': [],\n", | |
" 'urls': [{'display_url': 'ann.sagepub.com/content/659/1.…',\n", | |
" 'expanded_url': 'http://ann.sagepub.com/content/659/1.toc',\n", | |
" 'indices': [71, 93],\n", | |
" 'url': 'http://t.co/grLC6eRwO2'}],\n", | |
" 'user_mentions': []},\n", | |
" 'twitter_filter_level': 'low',\n", | |
" 'twitter_lang': 'en',\n", | |
" 'verb': 'post'},\n", | |
" 'objectType': 'activity',\n", | |
" 'postedTime': '2015-04-11T14:18:45.000Z',\n", | |
" 'provider': {'displayName': 'Twitter',\n", | |
" 'link': 'http://www.twitter.com',\n", | |
" 'objectType': 'service'},\n", | |
" 'retweetCount': 6,\n", | |
" 'twitter_entities': {'hashtags': [],\n", | |
" 'symbols': [],\n", | |
" 'trends': [],\n", | |
" 'urls': [{'display_url': 'ann.sagepub.com/content/659/1.…',\n", | |
" 'expanded_url': 'http://ann.sagepub.com/content/659/1.toc',\n", | |
" 'indices': [88, 110],\n", | |
" 'url': 'http://t.co/grLC6eRwO2'}],\n", | |
" 'user_mentions': [{'id': 18267544,\n", | |
" 'id_str': '18267544',\n", | |
" 'indices': [3, 15],\n", | |
" 'name': 'Marc Lynch',\n", | |
" 'screen_name': 'abuaardvark'}]},\n", | |
" 'twitter_filter_level': 'low',\n", | |
" 'twitter_lang': 'en',\n", | |
" 'verb': 'share'},\n", | |
" {'info': {'activity_count': 1,\n", | |
" 'message': 'Replay Request Completed',\n", | |
" 'sent': '2015-04-24T06:36:32+00:00'}}]" | |
] | |
}, | |
"execution_count": 57, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"read_tweet_data(ofnames[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 72, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data = [read_tweet_data(ofname) for ofname in ofnames]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 74, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[2, 2, 2, 2, 2]" | |
] | |
}, | |
"execution_count": 74, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Cathryn never tweets twice in 10 minutes\n", | |
"[len(d) for d in data]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Step 0\n", | |
"\n", | |
"Did you forget about your job status? (Or want to spy on the others?)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Generally: https://historical.gnip.com/accounts/<account_name>/jobs.json\n", | |
"jobs_url = url_base + 'jobs.json'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 71, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'delivered': {'activityCount': 74,\n", | |
" 'jobCount': 3,\n", | |
" 'jobDaysRun': 10,\n", | |
" 'period': 'trial',\n", | |
" 'since': '2015-04-16T21:44:19Z'},\n", | |
" 'jobs': [{'expiresAt': '2015-05-08T05:47:10Z',\n", | |
" 'fromDate': '201201010000',\n", | |
" 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/9mkbvaz928.json',\n", | |
" 'percentComplete': 100,\n", | |
" 'publisher': 'twitter',\n", | |
" 'status': 'delivered',\n", | |
" 'streamType': 'track',\n", | |
" 'title': 'my_job',\n", | |
" 'toDate': '201201010001',\n", | |
" 'uuid': '9mkbvaz928'},\n", | |
" {'expiresAt': '2015-05-01T06:14:35Z',\n", | |
" 'fromDate': '201404230000',\n", | |
" 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/g75dcp53nh.json',\n", | |
" 'percentComplete': 0,\n", | |
" 'publisher': 'twitter',\n", | |
" 'status': 'quoted',\n", | |
" 'streamType': 'track',\n", | |
" 'title': 'gb1',\n", | |
" 'toDate': '201504230000',\n", | |
" 'uuid': 'g75dcp53nh'},\n", | |
" {'expiresAt': '2015-05-01T06:28:19Z',\n", | |
" 'fromDate': '201501010000',\n", | |
" 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/w9r3bb41z2.json',\n", | |
" 'percentComplete': 0,\n", | |
" 'publisher': 'twitter',\n", | |
" 'status': 'quoted',\n", | |
" 'streamType': 'track',\n", | |
" 'title': 'gb2',\n", | |
" 'toDate': '201504230000',\n", | |
" 'uuid': 'w9r3bb41z2'},\n", | |
" {'expiresAt': '2015-05-09T06:37:12Z',\n", | |
" 'fromDate': '201504110000',\n", | |
" 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm.json',\n", | |
" 'percentComplete': 100,\n", | |
" 'publisher': 'twitter',\n", | |
" 'status': 'delivered',\n", | |
" 'streamType': 'track',\n", | |
" 'title': 'gb3',\n", | |
" 'toDate': '201504170000',\n", | |
" 'uuid': '8wg55wk6vm'},\n", | |
" {'expiresAt': '2015-05-01T16:07:26Z',\n", | |
" 'fromDate': '201501010000',\n", | |
" 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/80b1dnt6q.json',\n", | |
" 'percentComplete': 0,\n", | |
" 'publisher': 'twitter',\n", | |
" 'status': 'rejected',\n", | |
" 'streamType': 'track',\n", | |
" 'title': 'mike j',\n", | |
" 'toDate': '201504230001',\n", | |
" 'uuid': '80b1dnt6q'},\n", | |
" {'expiresAt': '2015-05-09T16:23:16Z',\n", | |
" 'fromDate': '201504210000',\n", | |
" 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/pvnzbnwf0b.json',\n", | |
" 'percentComplete': 100,\n", | |
" 'publisher': 'twitter',\n", | |
" 'status': 'delivered',\n", | |
" 'streamType': 'track',\n", | |
" 'title': 'mike j',\n", | |
" 'toDate': '201504230001',\n", | |
" 'uuid': 'pvnzbnwf0b'},\n", | |
" {'expiresAt': '2015-05-01T18:20:13Z',\n", | |
" 'fromDate': '201301010000',\n", | |
" 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/ea88dwtjve.json',\n", | |
" 'percentComplete': 0,\n", | |
" 'publisher': 'twitter',\n", | |
" 'status': 'quoted',\n", | |
" 'streamType': 'track',\n", | |
" 'title': 'my historical job python',\n", | |
" 'toDate': '201301010001',\n", | |
" 'uuid': 'ea88dwtjve'}]}" | |
] | |
}, | |
"execution_count": 71, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"resp = get(jobs_url, auth=auth)\n", | |
"jobs_info = resp.json()\n", | |
"jobs_info" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment