Skip to content

Instantly share code, notes, and snippets.

@kanungo
Created April 20, 2018 17:34
Show Gist options
  • Save kanungo/06b98d9751ea74353cacf1148ca3a5f6 to your computer and use it in GitHub Desktop.
Save kanungo/06b98d9751ea74353cacf1148ca3a5f6 to your computer and use it in GitHub Desktop.
yelp scraper
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Load required packages\n",
"from bs4 import BeautifulSoup\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Read in a web page\n",
"response = requests.get(\"https://www.yelp.com/search?find_desc=Vegetarian+Food&find_loc=Washington%2C+DC&ns=1\")\n",
"soup = BeautifulSoup(response.content, \"html.parser\")"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Business name : HipCityVeg\n",
"Address : 712 7th St NWWashington, DC 20001\n",
"4.5 star rating\n",
"\n",
"\n",
"Business name : Shouk\n",
"Address : 655 K St NWWashington, DC 20001\n",
"4.5 star rating\n",
"\n",
"\n",
"Business name : Chaia\n",
"Address : 3207 Grace St NWWashington, DC 20007\n",
"4.5 star rating\n",
"\n",
"\n",
"Business name : NuVegan Café\n",
"Address : 2928 Georgia Ave NWWashington, DC 20001\n",
"4.5 star rating\n",
"\n",
"\n",
"Business name : Pow Pow\n",
"Address : 1253 H St NEWashington, DC 20002\n",
"4.0 star rating\n",
"\n",
"\n",
"Business name : Beefsteak\n",
"Address : 4531 Wisconsin Ave NWWashington, DC 20016\n",
"4.5 star rating\n",
"\n",
"\n",
"Business name : Evolve Vegan Restaurant\n",
"Address : 341 Cedar St NWWashington, DC 20012\n",
"4.0 star rating\n",
"\n",
"\n",
"Business name : Unconventional Diner\n",
"Address : 1207 9th St NWWashington, DC 20001\n",
"4.5 star rating\n",
"\n",
"\n",
"Business name : Sababa\n",
"Address : 3311 Connecticut Ave NWWashington, DC 20008\n",
"4.0 star rating\n",
"\n",
"\n",
"Business name : Fancy Radish\n",
"Address : 600 H St NEWashington, DC 20002\n",
"4.5 star rating\n",
"\n",
"\n"
]
}
],
"source": [
"all_li=soup.findAll(\"li\",{\"class\":\"regular-search-result\"})\n",
"for myli in all_li:\n",
" try:\n",
" bizname = myli.find(\"a\",{\"class\":\"biz-name\"})\n",
" print(\"Business name : \", bizname.text)\n",
" except:\n",
" print(\"Business name missing\")\n",
" try:\n",
" addr = myli.find('address').text.strip()\n",
" print(\"Address : \",addr)\n",
" except:\n",
" print(\"Business address missing\")\n",
"\n",
" try:\n",
" stars = myli.find(\"div\",{\"class\":\"i-stars\"})\n",
" rating = stars.find('img', alt=True)\n",
" print(rating['alt'])\n",
" except:\n",
" print(\"Business rating missing\")\n",
" \n",
" print('\\n') # Leave a line between businesses \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment