Skip to content

Instantly share code, notes, and snippets.

@jpoles1
Last active March 22, 2020 18:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpoles1/c93b5113e266910faf47da7ee094159e to your computer and use it in GitHub Desktop.
Save jpoles1/c93b5113e266910faf47da7ee094159e to your computer and use it in GitHub Desktop.
MaskSearch
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "MaskSearch",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true,
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/jpoles1/c93b5113e266910faf47da7ee094159e/masksearch.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "aNyiqBdAe5Eg",
"colab_type": "code",
"colab": {}
},
"source": [
"import requests\n",
"import json\n",
"import re\n",
"import pandas as pd\n",
"from google.colab import files\n",
"\n",
"def clean_filename(raw_filename: str) -> str: \n",
" return ''.join([c for c in raw_filename.replace(' ','_') if re.match(r'\\w', c)])"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "VajxjnrPn5eJ",
"colab_type": "code",
"outputId": "277827bf-a2e8-419b-f013-8263aa411775",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
}
},
"source": [
"#Enter the type of business you want to search, and where to search\n",
"business_type = \"construction\" #Search term\n",
"locale = \"Philadelphia\" #Enter a zip code, locale, address, etc. here\n",
"api_key = \"\" #Enter your Yelp API key here\n",
"\n",
"max_fetch = 1000 #Maximum number of businesses to fetch\n",
"\n",
"#No need to edit these settings\n",
"headers = {'Authorization': 'Bearer %s' % api_key}\n",
"filename = clean_filename(locale + \" - \" + business_type) + \".csv\"\n",
"limit = 50 \n",
"total = 1\n",
"captured = []\n",
"#Send requests for more data to Yelp API until all results are returned (50 max at a time) or until max_fetch is reached\n",
"while len(captured) < total and total > 0:\n",
" #Edit this URL to change search API params (docs: https://www.yelp.com/developers/documentation/v3/business_search)\n",
" url = \"https://api.yelp.com/v3/businesses/search?location=%s&term=%s&open_now=true&limit=%s&offset=%s\" % (locale, business_type, limit, len(captured))\n",
" #Request JSON formatted data from API\n",
" resp_text = requests.get(url=url, headers=headers).text\n",
" resp_data = json.loads(resp_text)\n",
" total = min(resp_data[\"total\"], max_fetch)\n",
" captured += resp_data[\"businesses\"]\n",
" print(\"Captured: %s | Total: %s\" % (len(captured), resp_data[\"total\"]))"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Captured: 50 | Total: 171\n",
"Captured: 100 | Total: 171\n",
"Captured: 150 | Total: 171\n",
"Captured: 171 | Total: 171\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "JvKgUR9iiyx8",
"colab_type": "code",
"outputId": "38f65339-0d93-4f50-d483-c280decc3707",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 405
}
},
"source": [
"#Generate dataframe from selected columns of data\n",
"clean_data = pd.DataFrame(captured)[[\"name\", \"display_phone\", \"location\", \"url\"]]\n",
"#Unpack address column\n",
"clean_data[\"location\"] = [x[\"address1\"] for x in clean_data[\"location\"]]\n",
"#Clean URL to remove long \"referral\" url param\n",
"clean_data[\"url\"] = [re.sub(r'\\?.*', '', x) for x in clean_data[\"url\"]]\n",
"#Save dataframe to CSV (stored on colab)\n",
"clean_data.to_csv(filename)\n",
"#Download CSV file from colab\n",
"files.download(filename)\n",
"clean_data"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>display_phone</th>\n",
" <th>location</th>\n",
" <th>url</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Q Builders</td>\n",
" <td>(215) 941-0462</td>\n",
" <td></td>\n",
" <td>https://www.yelp.com/biz/q-builders-philadelphia</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>TKO CONTRACTING</td>\n",
" <td>(856) 209-8437</td>\n",
" <td>None</td>\n",
" <td>https://www.yelp.com/biz/tko-contracting-glouc...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Wells Building and Maintenance Consulting Serv...</td>\n",
" <td>(484) 358-0761</td>\n",
" <td></td>\n",
" <td>https://www.yelp.com/biz/wells-building-and-ma...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Doozer Construction</td>\n",
" <td>(267) 639-6522</td>\n",
" <td>244 S 22nd St</td>\n",
" <td>https://www.yelp.com/biz/doozer-construction-p...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Abs General Contracting</td>\n",
" <td>(610) 931-1437</td>\n",
" <td>2301 Washington Ave</td>\n",
" <td>https://www.yelp.com/biz/abs-general-contracti...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>166</th>\n",
" <td>A Absolute Plumbing Heating and Air</td>\n",
" <td>(908) 280-0445</td>\n",
" <td>115 E 11th Ave</td>\n",
" <td>https://www.yelp.com/biz/a-absolute-plumbing-h...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>167</th>\n",
" <td>Becker Construction</td>\n",
" <td>(717) 707-7331</td>\n",
" <td>241 Clear Spring Rd</td>\n",
" <td>https://www.yelp.com/biz/becker-construction-a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>168</th>\n",
" <td>A-1 Affordable</td>\n",
" <td>(800) 865-0053</td>\n",
" <td>164 Getty Ave</td>\n",
" <td>https://www.yelp.com/biz/a-1-affordable-clifton-2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>169</th>\n",
" <td>Priority You Moving &amp; Storage</td>\n",
" <td>(973) 864-2113</td>\n",
" <td>33 Gingerbread Castle Rd</td>\n",
" <td>https://www.yelp.com/biz/priority-you-moving-a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>170</th>\n",
" <td>Busy Bee Construction</td>\n",
" <td>(973) 539-0047</td>\n",
" <td>25 Margaret Ct</td>\n",
" <td>https://www.yelp.com/biz/busy-bee-construction...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>171 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" name ... url\n",
"0 Q Builders ... https://www.yelp.com/biz/q-builders-philadelphia\n",
"1 TKO CONTRACTING ... https://www.yelp.com/biz/tko-contracting-glouc...\n",
"2 Wells Building and Maintenance Consulting Serv... ... https://www.yelp.com/biz/wells-building-and-ma...\n",
"3 Doozer Construction ... https://www.yelp.com/biz/doozer-construction-p...\n",
"4 Abs General Contracting ... https://www.yelp.com/biz/abs-general-contracti...\n",
".. ... ... ...\n",
"166 A Absolute Plumbing Heating and Air ... https://www.yelp.com/biz/a-absolute-plumbing-h...\n",
"167 Becker Construction ... https://www.yelp.com/biz/becker-construction-a...\n",
"168 A-1 Affordable ... https://www.yelp.com/biz/a-1-affordable-clifton-2\n",
"169 Priority You Moving & Storage ... https://www.yelp.com/biz/priority-you-moving-a...\n",
"170 Busy Bee Construction ... https://www.yelp.com/biz/busy-bee-construction...\n",
"\n",
"[171 rows x 4 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 3
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment