Created
September 17, 2019 17:50
-
-
Save djfan/d84ad663b83b9c146813b1ce8cc447ae to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import boto3\n", | |
"import ndjson\n", | |
"import gzip" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"s3 = boto3.resource('s3',\n", | |
" aws_access_key_id='AKIAJ6ZLMQ7SFXDCQFAQ', \n", | |
" aws_secret_access_key='*Q#$#*$*@#$**!#$*!#$')\n", | |
"\n", | |
"uc_bucket = s3.Bucket('uc-export-carto')\n", | |
"data_list = []\n", | |
"for obj in uc_bucket.objects.all(): \n", | |
" data_list.append(obj.key)\n", | |
" \n", | |
"path = data_list[-1]\n", | |
"print(path)\n", | |
"uc_bucket.download_file(path, 'hackathon_000000000000.ndjson.gz')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with gzip.open('hackathon_000000000000.ndjson.gz', 'rb') as f:\n", | |
" data = ndjson.load(f)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"155" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'venue_id': '35f2d50d-9c92-4d44-b307-579b21c46ef1',\n", | |
" 'brand': 'Sprouts Farmers Market',\n", | |
" 'state': 'CO',\n", | |
" 'city': 'Aurora',\n", | |
" 'daily_traffic_avg': '154',\n", | |
" 'home_air_distance_kilometers_25th_pct': '2',\n", | |
" 'home_air_distance_kilometers_median': '3',\n", | |
" 'home_air_distance_75th_kilometers_pct': '5',\n", | |
" 'work_air_distance_25th_kilometers_pct': '5',\n", | |
" 'work_air_distance_kilometers_median': '7',\n", | |
" 'work_air_distance_75th_kilometers_pct': '12',\n", | |
" 'home_origins': [{'fipsCode': '080050812002',\n", | |
" 'traffic_fraction': 0.006,\n", | |
" 'people_fraction': 0.009,\n", | |
" 'venue_air_distance': '2982',\n", | |
" 'population': '1625'},\n", | |
" {'fipsCode': '080050870003',\n", | |
" 'traffic_fraction': 0.012,\n", | |
" 'people_fraction': 0.011,\n", | |
" 'venue_air_distance': '271',\n", | |
" 'population': '2385'},\n", | |
" {'fipsCode': '080050868004',\n", | |
" 'traffic_fraction': 0.01,\n", | |
" 'people_fraction': 0.015,\n", | |
" 'venue_air_distance': '1071',\n", | |
" 'population': '1168'},\n", | |
" {'fipsCode': '080050803003',\n", | |
" 'traffic_fraction': 0.007,\n", | |
" 'people_fraction': 0.013,\n", | |
" 'venue_air_distance': '1574',\n", | |
" 'population': '2208'},\n", | |
" {'fipsCode': '080050802001',\n", | |
" 'traffic_fraction': 0.07,\n", | |
" 'people_fraction': 0.026,\n", | |
" 'venue_air_distance': '0',\n", | |
" 'population': '1175'},\n", | |
" {'fipsCode': '080310070371',\n", | |
" 'traffic_fraction': 0.015,\n", | |
" 'people_fraction': 0.017,\n", | |
" 'venue_air_distance': '1000',\n", | |
" 'population': '2663'},\n", | |
" {'fipsCode': '080050072012',\n", | |
" 'traffic_fraction': 0.007,\n", | |
" 'people_fraction': 0.013,\n", | |
" 'venue_air_distance': '2764',\n", | |
" 'population': '2361'},\n", | |
" {'fipsCode': '080050808001',\n", | |
" 'traffic_fraction': 0.033,\n", | |
" 'people_fraction': 0.011,\n", | |
" 'venue_air_distance': '1345',\n", | |
" 'population': '2537'},\n", | |
" {'fipsCode': '080050805001',\n", | |
" 'traffic_fraction': 0.009,\n", | |
" 'people_fraction': 0.011,\n", | |
" 'venue_air_distance': '2647',\n", | |
" 'population': '2286'},\n", | |
" {'fipsCode': '080050868003',\n", | |
" 'traffic_fraction': 0.009,\n", | |
" 'people_fraction': 0.015,\n", | |
" 'venue_air_distance': '1670',\n", | |
" 'population': '1858'}]}" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'venue_id': '6ea0a4d1-7aab-4e1d-8ecf-0f41e43ad556',\n", | |
" 'brand': 'Whole Foods Market',\n", | |
" 'state': 'CO',\n", | |
" 'city': 'Frisco',\n", | |
" 'daily_traffic_avg': '1227',\n", | |
" 'home_air_distance_kilometers_25th_pct': '8',\n", | |
" 'home_air_distance_kilometers_median': '34',\n", | |
" 'home_air_distance_75th_kilometers_pct': '105',\n", | |
" 'work_air_distance_25th_kilometers_pct': '8',\n", | |
" 'work_air_distance_kilometers_median': '37',\n", | |
" 'work_air_distance_75th_kilometers_pct': '124',\n", | |
" 'home_origins': [{'fipsCode': '081170001005',\n", | |
" 'traffic_fraction': 0.014,\n", | |
" 'people_fraction': 0.008,\n", | |
" 'venue_air_distance': '3201',\n", | |
" 'population': '1433'},\n", | |
" {'fipsCode': '081170004012',\n", | |
" 'traffic_fraction': 0.011,\n", | |
" 'people_fraction': 0.006,\n", | |
" 'venue_air_distance': '13123',\n", | |
" 'population': '801'},\n", | |
" {'fipsCode': '081170001004',\n", | |
" 'traffic_fraction': 0.011,\n", | |
" 'people_fraction': 0.006,\n", | |
" 'venue_air_distance': '1979',\n", | |
" 'population': '1168'},\n", | |
" {'fipsCode': '080370004033',\n", | |
" 'traffic_fraction': 0.004,\n", | |
" 'people_fraction': 0.006,\n", | |
" 'venue_air_distance': '55187',\n", | |
" 'population': '4308'},\n", | |
" {'fipsCode': '081170004014',\n", | |
" 'traffic_fraction': 0.01,\n", | |
" 'people_fraction': 0.01,\n", | |
" 'venue_air_distance': '10822',\n", | |
" 'population': '1298'},\n", | |
" {'fipsCode': '081170002004',\n", | |
" 'traffic_fraction': 0.015,\n", | |
" 'people_fraction': 0.015,\n", | |
" 'venue_air_distance': '3919',\n", | |
" 'population': '2800'},\n", | |
" {'fipsCode': '081170003003',\n", | |
" 'traffic_fraction': 0.018,\n", | |
" 'people_fraction': 0.009,\n", | |
" 'venue_air_distance': '462',\n", | |
" 'population': '486'},\n", | |
" {'fipsCode': '080370005031',\n", | |
" 'traffic_fraction': 0.002,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '37801',\n", | |
" 'population': '1240'},\n", | |
" {'fipsCode': '080370004021',\n", | |
" 'traffic_fraction': 0.002,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '43684',\n", | |
" 'population': '1750'},\n", | |
" {'fipsCode': '080370004012',\n", | |
" 'traffic_fraction': 0.004,\n", | |
" 'people_fraction': 0.007,\n", | |
" 'venue_air_distance': '30582',\n", | |
" 'population': '2044'},\n", | |
" {'fipsCode': '080370007021',\n", | |
" 'traffic_fraction': 0.004,\n", | |
" 'people_fraction': 0.006,\n", | |
" 'venue_air_distance': '14736',\n", | |
" 'population': '743'},\n", | |
" {'fipsCode': '081170001001',\n", | |
" 'traffic_fraction': 0.002,\n", | |
" 'people_fraction': 0.003,\n", | |
" 'venue_air_distance': '5220',\n", | |
" 'population': '805'},\n", | |
" {'fipsCode': '080370007031',\n", | |
" 'traffic_fraction': 0.003,\n", | |
" 'people_fraction': 0.003,\n", | |
" 'venue_air_distance': '21164',\n", | |
" 'population': '2089'},\n", | |
" {'fipsCode': '080370007032',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '6989',\n", | |
" 'population': '754'},\n", | |
" {'fipsCode': '080659617001',\n", | |
" 'traffic_fraction': 0.003,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '39369',\n", | |
" 'population': '869'},\n", | |
" {'fipsCode': '081170004011',\n", | |
" 'traffic_fraction': 0.007,\n", | |
" 'people_fraction': 0.004,\n", | |
" 'venue_air_distance': '16743',\n", | |
" 'population': '652'},\n", | |
" {'fipsCode': '080659617002',\n", | |
" 'traffic_fraction': 0.002,\n", | |
" 'people_fraction': 0.004,\n", | |
" 'venue_air_distance': '38965',\n", | |
" 'population': '978'},\n", | |
" {'fipsCode': '081170002003',\n", | |
" 'traffic_fraction': 0.01,\n", | |
" 'people_fraction': 0.009,\n", | |
" 'venue_air_distance': '4488',\n", | |
" 'population': '1062'},\n", | |
" {'fipsCode': '080930003002',\n", | |
" 'traffic_fraction': 0.002,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '37927',\n", | |
" 'population': '750'},\n", | |
" {'fipsCode': '081170002001',\n", | |
" 'traffic_fraction': 0.014,\n", | |
" 'people_fraction': 0.006,\n", | |
" 'venue_air_distance': '8239',\n", | |
" 'population': '418'},\n", | |
" {'fipsCode': '080370006001',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '11228',\n", | |
" 'population': '1387'},\n", | |
" {'fipsCode': '080970004011',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '75570',\n", | |
" 'population': '1638'},\n", | |
" {'fipsCode': '080590098471',\n", | |
" 'traffic_fraction': 0.002,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '63747',\n", | |
" 'population': '1862'},\n", | |
" {'fipsCode': '080659617006',\n", | |
" 'traffic_fraction': 0.005,\n", | |
" 'people_fraction': 0.003,\n", | |
" 'venue_air_distance': '41203',\n", | |
" 'population': '1032'},\n", | |
" {'fipsCode': '080370004031',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.001,\n", | |
" 'venue_air_distance': '47544',\n", | |
" 'population': '909'},\n", | |
" {'fipsCode': '081170001003',\n", | |
" 'traffic_fraction': 0.041,\n", | |
" 'people_fraction': 0.014,\n", | |
" 'venue_air_distance': '4388',\n", | |
" 'population': '3016'},\n", | |
" {'fipsCode': '081170003002',\n", | |
" 'traffic_fraction': 0.054,\n", | |
" 'people_fraction': 0.016,\n", | |
" 'venue_air_distance': '0',\n", | |
" 'population': '1929'},\n", | |
" {'fipsCode': '080930003001',\n", | |
" 'traffic_fraction': 0.006,\n", | |
" 'people_fraction': 0.005,\n", | |
" 'venue_air_distance': '23387',\n", | |
" 'population': '2023'},\n", | |
" {'fipsCode': '080370005021',\n", | |
" 'traffic_fraction': 0.004,\n", | |
" 'people_fraction': 0.004,\n", | |
" 'venue_air_distance': '33001',\n", | |
" 'population': '2969'},\n", | |
" {'fipsCode': '081170004024',\n", | |
" 'traffic_fraction': 0.015,\n", | |
" 'people_fraction': 0.006,\n", | |
" 'venue_air_distance': '12451',\n", | |
" 'population': '1242'},\n", | |
" {'fipsCode': '080370005011',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '34467',\n", | |
" 'population': '660'},\n", | |
" {'fipsCode': '080370004022',\n", | |
" 'traffic_fraction': 0.004,\n", | |
" 'people_fraction': 0.004,\n", | |
" 'venue_air_distance': '38073',\n", | |
" 'population': '2759'},\n", | |
" {'fipsCode': '081170003001',\n", | |
" 'traffic_fraction': 0.005,\n", | |
" 'people_fraction': 0.005,\n", | |
" 'venue_air_distance': '176',\n", | |
" 'population': '432'},\n", | |
" {'fipsCode': '081070008002',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.001,\n", | |
" 'venue_air_distance': '75668',\n", | |
" 'population': '1093'},\n", | |
" {'fipsCode': '080490001002',\n", | |
" 'traffic_fraction': 0.003,\n", | |
" 'people_fraction': 0.004,\n", | |
" 'venue_air_distance': '54520',\n", | |
" 'population': '1913'},\n", | |
" {'fipsCode': '080590098433',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '77072',\n", | |
" 'population': '3990'},\n", | |
" {'fipsCode': '081170004023',\n", | |
" 'traffic_fraction': 0.01,\n", | |
" 'people_fraction': 0.007,\n", | |
" 'venue_air_distance': '3780',\n", | |
" 'population': '315'},\n", | |
" {'fipsCode': '080930004001',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '20034',\n", | |
" 'population': '1104'},\n", | |
" {'fipsCode': '081170002005',\n", | |
" 'traffic_fraction': 0.013,\n", | |
" 'people_fraction': 0.011,\n", | |
" 'venue_air_distance': '5616',\n", | |
" 'population': '2540'},\n", | |
" {'fipsCode': '081170004022',\n", | |
" 'traffic_fraction': 0.035,\n", | |
" 'people_fraction': 0.015,\n", | |
" 'venue_air_distance': '12406',\n", | |
" 'population': '1396'},\n", | |
" {'fipsCode': '081170004021',\n", | |
" 'traffic_fraction': 0.031,\n", | |
" 'people_fraction': 0.014,\n", | |
" 'venue_air_distance': '7919',\n", | |
" 'population': '2276'},\n", | |
" {'fipsCode': '081170004013',\n", | |
" 'traffic_fraction': 0.027,\n", | |
" 'people_fraction': 0.015,\n", | |
" 'venue_air_distance': '2878',\n", | |
" 'population': '1867'},\n", | |
" {'fipsCode': '081170001002',\n", | |
" 'traffic_fraction': 0.026,\n", | |
" 'people_fraction': 0.016,\n", | |
" 'venue_air_distance': '5177',\n", | |
" 'population': '1997'},\n", | |
" {'fipsCode': '080490001001',\n", | |
" 'traffic_fraction': 0.002,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '12888',\n", | |
" 'population': '1002'},\n", | |
" {'fipsCode': '080370007011',\n", | |
" 'traffic_fraction': 0.005,\n", | |
" 'people_fraction': 0.005,\n", | |
" 'venue_air_distance': '22085',\n", | |
" 'population': '1272'},\n", | |
" {'fipsCode': '080659619002',\n", | |
" 'traffic_fraction': 0.003,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '23419',\n", | |
" 'population': '862'},\n", | |
" {'fipsCode': '080370005032',\n", | |
" 'traffic_fraction': 0.003,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '34416',\n", | |
" 'population': '1237'},\n", | |
" {'fipsCode': '080370002001',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '52995',\n", | |
" 'population': '3064'},\n", | |
" {'fipsCode': '080370004032',\n", | |
" 'traffic_fraction': 0.002,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '43265',\n", | |
" 'population': '3096'},\n", | |
" {'fipsCode': '081170002002',\n", | |
" 'traffic_fraction': 0.004,\n", | |
" 'people_fraction': 0.004,\n", | |
" 'venue_air_distance': '5714',\n", | |
" 'population': '428'},\n", | |
" {'fipsCode': '080590098461',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.001,\n", | |
" 'venue_air_distance': '63762',\n", | |
" 'population': '2187'},\n", | |
" {'fipsCode': '080370004011',\n", | |
" 'traffic_fraction': 0.001,\n", | |
" 'people_fraction': 0.002,\n", | |
" 'venue_air_distance': '40488',\n", | |
" 'population': '2487'},\n", | |
" {'fipsCode': '081170003004',\n", | |
" 'traffic_fraction': 0.03,\n", | |
" 'people_fraction': 0.015,\n", | |
" 'venue_air_distance': '1416',\n", | |
" 'population': '908'}]}" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data[-1]" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment