Last active
March 22, 2020 18:00
-
-
Save jpoles1/c93b5113e266910faf47da7ee094159e to your computer and use it in GitHub Desktop.
MaskSearch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "MaskSearch", | |
"provenance": [], | |
"collapsed_sections": [], | |
"toc_visible": true, | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/jpoles1/c93b5113e266910faf47da7ee094159e/masksearch.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "aNyiqBdAe5Eg", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import requests\n", | |
"import json\n", | |
"import re\n", | |
"import pandas as pd\n", | |
"from google.colab import files\n", | |
"\n", | |
"def clean_filename(raw_filename: str) -> str: \n", | |
" return ''.join([c for c in raw_filename.replace(' ','_') if re.match(r'\\w', c)])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "VajxjnrPn5eJ", | |
"colab_type": "code", | |
"outputId": "277827bf-a2e8-419b-f013-8263aa411775", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 85 | |
} | |
}, | |
"source": [ | |
"#Enter the type of business you want to search, and where to search\n", | |
"business_type = \"construction\" #Search term\n", | |
"locale = \"Philadelphia\" #Enter a zip code, locale, address, etc. here\n", | |
"api_key = \"\" #Enter your Yelp API key here\n", | |
"\n", | |
"max_fetch = 1000 #Maximum number of businesses to fetch\n", | |
"\n", | |
"#No need to edit these settings\n", | |
"headers = {'Authorization': 'Bearer %s' % api_key}\n", | |
"filename = clean_filename(locale + \" - \" + business_type) + \".csv\"\n", | |
"limit = 50 \n", | |
"total = 1\n", | |
"captured = []\n", | |
"#Send requests for more data to Yelp API until all results are returned (50 max at a time) or until max_fetch is reached\n", | |
"while len(captured) < total and total > 0:\n", | |
" #Edit this URL to change search API params (docs: https://www.yelp.com/developers/documentation/v3/business_search)\n", | |
" url = \"https://api.yelp.com/v3/businesses/search?location=%s&term=%s&open_now=true&limit=%s&offset=%s\" % (locale, business_type, limit, len(captured))\n", | |
" #Request JSON formatted data from API\n", | |
" resp_text = requests.get(url=url, headers=headers).text\n", | |
" resp_data = json.loads(resp_text)\n", | |
" total = min(resp_data[\"total\"], max_fetch)\n", | |
" captured += resp_data[\"businesses\"]\n", | |
" print(\"Captured: %s | Total: %s\" % (len(captured), resp_data[\"total\"]))" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Captured: 50 | Total: 171\n", | |
"Captured: 100 | Total: 171\n", | |
"Captured: 150 | Total: 171\n", | |
"Captured: 171 | Total: 171\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "JvKgUR9iiyx8", | |
"colab_type": "code", | |
"outputId": "38f65339-0d93-4f50-d483-c280decc3707", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 405 | |
} | |
}, | |
"source": [ | |
"#Generate dataframe from selected columns of data\n", | |
"clean_data = pd.DataFrame(captured)[[\"name\", \"display_phone\", \"location\", \"url\"]]\n", | |
"#Unpack address column\n", | |
"clean_data[\"location\"] = [x[\"address1\"] for x in clean_data[\"location\"]]\n", | |
"#Clean URL to remove long \"referral\" url param\n", | |
"clean_data[\"url\"] = [re.sub(r'\\?.*', '', x) for x in clean_data[\"url\"]]\n", | |
"#Save dataframe to CSV (stored on colab)\n", | |
"clean_data.to_csv(filename)\n", | |
"#Download CSV file from colab\n", | |
"files.download(filename)\n", | |
"clean_data" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>name</th>\n", | |
" <th>display_phone</th>\n", | |
" <th>location</th>\n", | |
" <th>url</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Q Builders</td>\n", | |
" <td>(215) 941-0462</td>\n", | |
" <td></td>\n", | |
" <td>https://www.yelp.com/biz/q-builders-philadelphia</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>TKO CONTRACTING</td>\n", | |
" <td>(856) 209-8437</td>\n", | |
" <td>None</td>\n", | |
" <td>https://www.yelp.com/biz/tko-contracting-glouc...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Wells Building and Maintenance Consulting Serv...</td>\n", | |
" <td>(484) 358-0761</td>\n", | |
" <td></td>\n", | |
" <td>https://www.yelp.com/biz/wells-building-and-ma...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Doozer Construction</td>\n", | |
" <td>(267) 639-6522</td>\n", | |
" <td>244 S 22nd St</td>\n", | |
" <td>https://www.yelp.com/biz/doozer-construction-p...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Abs General Contracting</td>\n", | |
" <td>(610) 931-1437</td>\n", | |
" <td>2301 Washington Ave</td>\n", | |
" <td>https://www.yelp.com/biz/abs-general-contracti...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>166</th>\n", | |
" <td>A Absolute Plumbing Heating and Air</td>\n", | |
" <td>(908) 280-0445</td>\n", | |
" <td>115 E 11th Ave</td>\n", | |
" <td>https://www.yelp.com/biz/a-absolute-plumbing-h...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>167</th>\n", | |
" <td>Becker Construction</td>\n", | |
" <td>(717) 707-7331</td>\n", | |
" <td>241 Clear Spring Rd</td>\n", | |
" <td>https://www.yelp.com/biz/becker-construction-a...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>168</th>\n", | |
" <td>A-1 Affordable</td>\n", | |
" <td>(800) 865-0053</td>\n", | |
" <td>164 Getty Ave</td>\n", | |
" <td>https://www.yelp.com/biz/a-1-affordable-clifton-2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>169</th>\n", | |
" <td>Priority You Moving & Storage</td>\n", | |
" <td>(973) 864-2113</td>\n", | |
" <td>33 Gingerbread Castle Rd</td>\n", | |
" <td>https://www.yelp.com/biz/priority-you-moving-a...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>170</th>\n", | |
" <td>Busy Bee Construction</td>\n", | |
" <td>(973) 539-0047</td>\n", | |
" <td>25 Margaret Ct</td>\n", | |
" <td>https://www.yelp.com/biz/busy-bee-construction...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>171 rows × 4 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" name ... url\n", | |
"0 Q Builders ... https://www.yelp.com/biz/q-builders-philadelphia\n", | |
"1 TKO CONTRACTING ... https://www.yelp.com/biz/tko-contracting-glouc...\n", | |
"2 Wells Building and Maintenance Consulting Serv... ... https://www.yelp.com/biz/wells-building-and-ma...\n", | |
"3 Doozer Construction ... https://www.yelp.com/biz/doozer-construction-p...\n", | |
"4 Abs General Contracting ... https://www.yelp.com/biz/abs-general-contracti...\n", | |
".. ... ... ...\n", | |
"166 A Absolute Plumbing Heating and Air ... https://www.yelp.com/biz/a-absolute-plumbing-h...\n", | |
"167 Becker Construction ... https://www.yelp.com/biz/becker-construction-a...\n", | |
"168 A-1 Affordable ... https://www.yelp.com/biz/a-1-affordable-clifton-2\n", | |
"169 Priority You Moving & Storage ... https://www.yelp.com/biz/priority-you-moving-a...\n", | |
"170 Busy Bee Construction ... https://www.yelp.com/biz/busy-bee-construction...\n", | |
"\n", | |
"[171 rows x 4 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 3 | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment