Skip to content

Instantly share code, notes, and snippets.

@djfan
Created September 17, 2019 17:50
Show Gist options
  • Save djfan/d84ad663b83b9c146813b1ce8cc447ae to your computer and use it in GitHub Desktop.
Save djfan/d84ad663b83b9c146813b1ce8cc447ae to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"import boto3\n",
"import ndjson\n",
"import gzip"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"s3 = boto3.resource('s3',\n",
" aws_access_key_id='AKIAJ6ZLMQ7SFXDCQFAQ', \n",
" aws_secret_access_key='*Q#$#*$*@#$**!#$*!#$')\n",
"\n",
"uc_bucket = s3.Bucket('uc-export-carto')\n",
"data_list = []\n",
"for obj in uc_bucket.objects.all(): \n",
" data_list.append(obj.key)\n",
" \n",
"path = data_list[-1]\n",
"print(path)\n",
"uc_bucket.download_file(path, 'hackathon_000000000000.ndjson.gz')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"with gzip.open('hackathon_000000000000.ndjson.gz', 'rb') as f:\n",
" data = ndjson.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"155"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(data)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'venue_id': '35f2d50d-9c92-4d44-b307-579b21c46ef1',\n",
" 'brand': 'Sprouts Farmers Market',\n",
" 'state': 'CO',\n",
" 'city': 'Aurora',\n",
" 'daily_traffic_avg': '154',\n",
" 'home_air_distance_kilometers_25th_pct': '2',\n",
" 'home_air_distance_kilometers_median': '3',\n",
" 'home_air_distance_75th_kilometers_pct': '5',\n",
" 'work_air_distance_25th_kilometers_pct': '5',\n",
" 'work_air_distance_kilometers_median': '7',\n",
" 'work_air_distance_75th_kilometers_pct': '12',\n",
" 'home_origins': [{'fipsCode': '080050812002',\n",
" 'traffic_fraction': 0.006,\n",
" 'people_fraction': 0.009,\n",
" 'venue_air_distance': '2982',\n",
" 'population': '1625'},\n",
" {'fipsCode': '080050870003',\n",
" 'traffic_fraction': 0.012,\n",
" 'people_fraction': 0.011,\n",
" 'venue_air_distance': '271',\n",
" 'population': '2385'},\n",
" {'fipsCode': '080050868004',\n",
" 'traffic_fraction': 0.01,\n",
" 'people_fraction': 0.015,\n",
" 'venue_air_distance': '1071',\n",
" 'population': '1168'},\n",
" {'fipsCode': '080050803003',\n",
" 'traffic_fraction': 0.007,\n",
" 'people_fraction': 0.013,\n",
" 'venue_air_distance': '1574',\n",
" 'population': '2208'},\n",
" {'fipsCode': '080050802001',\n",
" 'traffic_fraction': 0.07,\n",
" 'people_fraction': 0.026,\n",
" 'venue_air_distance': '0',\n",
" 'population': '1175'},\n",
" {'fipsCode': '080310070371',\n",
" 'traffic_fraction': 0.015,\n",
" 'people_fraction': 0.017,\n",
" 'venue_air_distance': '1000',\n",
" 'population': '2663'},\n",
" {'fipsCode': '080050072012',\n",
" 'traffic_fraction': 0.007,\n",
" 'people_fraction': 0.013,\n",
" 'venue_air_distance': '2764',\n",
" 'population': '2361'},\n",
" {'fipsCode': '080050808001',\n",
" 'traffic_fraction': 0.033,\n",
" 'people_fraction': 0.011,\n",
" 'venue_air_distance': '1345',\n",
" 'population': '2537'},\n",
" {'fipsCode': '080050805001',\n",
" 'traffic_fraction': 0.009,\n",
" 'people_fraction': 0.011,\n",
" 'venue_air_distance': '2647',\n",
" 'population': '2286'},\n",
" {'fipsCode': '080050868003',\n",
" 'traffic_fraction': 0.009,\n",
" 'people_fraction': 0.015,\n",
" 'venue_air_distance': '1670',\n",
" 'population': '1858'}]}"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[0]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'venue_id': '6ea0a4d1-7aab-4e1d-8ecf-0f41e43ad556',\n",
" 'brand': 'Whole Foods Market',\n",
" 'state': 'CO',\n",
" 'city': 'Frisco',\n",
" 'daily_traffic_avg': '1227',\n",
" 'home_air_distance_kilometers_25th_pct': '8',\n",
" 'home_air_distance_kilometers_median': '34',\n",
" 'home_air_distance_75th_kilometers_pct': '105',\n",
" 'work_air_distance_25th_kilometers_pct': '8',\n",
" 'work_air_distance_kilometers_median': '37',\n",
" 'work_air_distance_75th_kilometers_pct': '124',\n",
" 'home_origins': [{'fipsCode': '081170001005',\n",
" 'traffic_fraction': 0.014,\n",
" 'people_fraction': 0.008,\n",
" 'venue_air_distance': '3201',\n",
" 'population': '1433'},\n",
" {'fipsCode': '081170004012',\n",
" 'traffic_fraction': 0.011,\n",
" 'people_fraction': 0.006,\n",
" 'venue_air_distance': '13123',\n",
" 'population': '801'},\n",
" {'fipsCode': '081170001004',\n",
" 'traffic_fraction': 0.011,\n",
" 'people_fraction': 0.006,\n",
" 'venue_air_distance': '1979',\n",
" 'population': '1168'},\n",
" {'fipsCode': '080370004033',\n",
" 'traffic_fraction': 0.004,\n",
" 'people_fraction': 0.006,\n",
" 'venue_air_distance': '55187',\n",
" 'population': '4308'},\n",
" {'fipsCode': '081170004014',\n",
" 'traffic_fraction': 0.01,\n",
" 'people_fraction': 0.01,\n",
" 'venue_air_distance': '10822',\n",
" 'population': '1298'},\n",
" {'fipsCode': '081170002004',\n",
" 'traffic_fraction': 0.015,\n",
" 'people_fraction': 0.015,\n",
" 'venue_air_distance': '3919',\n",
" 'population': '2800'},\n",
" {'fipsCode': '081170003003',\n",
" 'traffic_fraction': 0.018,\n",
" 'people_fraction': 0.009,\n",
" 'venue_air_distance': '462',\n",
" 'population': '486'},\n",
" {'fipsCode': '080370005031',\n",
" 'traffic_fraction': 0.002,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '37801',\n",
" 'population': '1240'},\n",
" {'fipsCode': '080370004021',\n",
" 'traffic_fraction': 0.002,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '43684',\n",
" 'population': '1750'},\n",
" {'fipsCode': '080370004012',\n",
" 'traffic_fraction': 0.004,\n",
" 'people_fraction': 0.007,\n",
" 'venue_air_distance': '30582',\n",
" 'population': '2044'},\n",
" {'fipsCode': '080370007021',\n",
" 'traffic_fraction': 0.004,\n",
" 'people_fraction': 0.006,\n",
" 'venue_air_distance': '14736',\n",
" 'population': '743'},\n",
" {'fipsCode': '081170001001',\n",
" 'traffic_fraction': 0.002,\n",
" 'people_fraction': 0.003,\n",
" 'venue_air_distance': '5220',\n",
" 'population': '805'},\n",
" {'fipsCode': '080370007031',\n",
" 'traffic_fraction': 0.003,\n",
" 'people_fraction': 0.003,\n",
" 'venue_air_distance': '21164',\n",
" 'population': '2089'},\n",
" {'fipsCode': '080370007032',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '6989',\n",
" 'population': '754'},\n",
" {'fipsCode': '080659617001',\n",
" 'traffic_fraction': 0.003,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '39369',\n",
" 'population': '869'},\n",
" {'fipsCode': '081170004011',\n",
" 'traffic_fraction': 0.007,\n",
" 'people_fraction': 0.004,\n",
" 'venue_air_distance': '16743',\n",
" 'population': '652'},\n",
" {'fipsCode': '080659617002',\n",
" 'traffic_fraction': 0.002,\n",
" 'people_fraction': 0.004,\n",
" 'venue_air_distance': '38965',\n",
" 'population': '978'},\n",
" {'fipsCode': '081170002003',\n",
" 'traffic_fraction': 0.01,\n",
" 'people_fraction': 0.009,\n",
" 'venue_air_distance': '4488',\n",
" 'population': '1062'},\n",
" {'fipsCode': '080930003002',\n",
" 'traffic_fraction': 0.002,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '37927',\n",
" 'population': '750'},\n",
" {'fipsCode': '081170002001',\n",
" 'traffic_fraction': 0.014,\n",
" 'people_fraction': 0.006,\n",
" 'venue_air_distance': '8239',\n",
" 'population': '418'},\n",
" {'fipsCode': '080370006001',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '11228',\n",
" 'population': '1387'},\n",
" {'fipsCode': '080970004011',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '75570',\n",
" 'population': '1638'},\n",
" {'fipsCode': '080590098471',\n",
" 'traffic_fraction': 0.002,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '63747',\n",
" 'population': '1862'},\n",
" {'fipsCode': '080659617006',\n",
" 'traffic_fraction': 0.005,\n",
" 'people_fraction': 0.003,\n",
" 'venue_air_distance': '41203',\n",
" 'population': '1032'},\n",
" {'fipsCode': '080370004031',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.001,\n",
" 'venue_air_distance': '47544',\n",
" 'population': '909'},\n",
" {'fipsCode': '081170001003',\n",
" 'traffic_fraction': 0.041,\n",
" 'people_fraction': 0.014,\n",
" 'venue_air_distance': '4388',\n",
" 'population': '3016'},\n",
" {'fipsCode': '081170003002',\n",
" 'traffic_fraction': 0.054,\n",
" 'people_fraction': 0.016,\n",
" 'venue_air_distance': '0',\n",
" 'population': '1929'},\n",
" {'fipsCode': '080930003001',\n",
" 'traffic_fraction': 0.006,\n",
" 'people_fraction': 0.005,\n",
" 'venue_air_distance': '23387',\n",
" 'population': '2023'},\n",
" {'fipsCode': '080370005021',\n",
" 'traffic_fraction': 0.004,\n",
" 'people_fraction': 0.004,\n",
" 'venue_air_distance': '33001',\n",
" 'population': '2969'},\n",
" {'fipsCode': '081170004024',\n",
" 'traffic_fraction': 0.015,\n",
" 'people_fraction': 0.006,\n",
" 'venue_air_distance': '12451',\n",
" 'population': '1242'},\n",
" {'fipsCode': '080370005011',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '34467',\n",
" 'population': '660'},\n",
" {'fipsCode': '080370004022',\n",
" 'traffic_fraction': 0.004,\n",
" 'people_fraction': 0.004,\n",
" 'venue_air_distance': '38073',\n",
" 'population': '2759'},\n",
" {'fipsCode': '081170003001',\n",
" 'traffic_fraction': 0.005,\n",
" 'people_fraction': 0.005,\n",
" 'venue_air_distance': '176',\n",
" 'population': '432'},\n",
" {'fipsCode': '081070008002',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.001,\n",
" 'venue_air_distance': '75668',\n",
" 'population': '1093'},\n",
" {'fipsCode': '080490001002',\n",
" 'traffic_fraction': 0.003,\n",
" 'people_fraction': 0.004,\n",
" 'venue_air_distance': '54520',\n",
" 'population': '1913'},\n",
" {'fipsCode': '080590098433',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '77072',\n",
" 'population': '3990'},\n",
" {'fipsCode': '081170004023',\n",
" 'traffic_fraction': 0.01,\n",
" 'people_fraction': 0.007,\n",
" 'venue_air_distance': '3780',\n",
" 'population': '315'},\n",
" {'fipsCode': '080930004001',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '20034',\n",
" 'population': '1104'},\n",
" {'fipsCode': '081170002005',\n",
" 'traffic_fraction': 0.013,\n",
" 'people_fraction': 0.011,\n",
" 'venue_air_distance': '5616',\n",
" 'population': '2540'},\n",
" {'fipsCode': '081170004022',\n",
" 'traffic_fraction': 0.035,\n",
" 'people_fraction': 0.015,\n",
" 'venue_air_distance': '12406',\n",
" 'population': '1396'},\n",
" {'fipsCode': '081170004021',\n",
" 'traffic_fraction': 0.031,\n",
" 'people_fraction': 0.014,\n",
" 'venue_air_distance': '7919',\n",
" 'population': '2276'},\n",
" {'fipsCode': '081170004013',\n",
" 'traffic_fraction': 0.027,\n",
" 'people_fraction': 0.015,\n",
" 'venue_air_distance': '2878',\n",
" 'population': '1867'},\n",
" {'fipsCode': '081170001002',\n",
" 'traffic_fraction': 0.026,\n",
" 'people_fraction': 0.016,\n",
" 'venue_air_distance': '5177',\n",
" 'population': '1997'},\n",
" {'fipsCode': '080490001001',\n",
" 'traffic_fraction': 0.002,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '12888',\n",
" 'population': '1002'},\n",
" {'fipsCode': '080370007011',\n",
" 'traffic_fraction': 0.005,\n",
" 'people_fraction': 0.005,\n",
" 'venue_air_distance': '22085',\n",
" 'population': '1272'},\n",
" {'fipsCode': '080659619002',\n",
" 'traffic_fraction': 0.003,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '23419',\n",
" 'population': '862'},\n",
" {'fipsCode': '080370005032',\n",
" 'traffic_fraction': 0.003,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '34416',\n",
" 'population': '1237'},\n",
" {'fipsCode': '080370002001',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '52995',\n",
" 'population': '3064'},\n",
" {'fipsCode': '080370004032',\n",
" 'traffic_fraction': 0.002,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '43265',\n",
" 'population': '3096'},\n",
" {'fipsCode': '081170002002',\n",
" 'traffic_fraction': 0.004,\n",
" 'people_fraction': 0.004,\n",
" 'venue_air_distance': '5714',\n",
" 'population': '428'},\n",
" {'fipsCode': '080590098461',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.001,\n",
" 'venue_air_distance': '63762',\n",
" 'population': '2187'},\n",
" {'fipsCode': '080370004011',\n",
" 'traffic_fraction': 0.001,\n",
" 'people_fraction': 0.002,\n",
" 'venue_air_distance': '40488',\n",
" 'population': '2487'},\n",
" {'fipsCode': '081170003004',\n",
" 'traffic_fraction': 0.03,\n",
" 'people_fraction': 0.015,\n",
" 'venue_air_distance': '1416',\n",
" 'population': '908'}]}"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[-1]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment