Skip to content

Instantly share code, notes, and snippets.

@scrapehero
Created May 7, 2019 12:23
Show Gist options
  • Save scrapehero/2b5c89665007159f1daa9704963d4a5d to your computer and use it in GitHub Desktop.
Save scrapehero/2b5c89665007159f1daa9704963d4a5d to your computer and use it in GitHub Desktop.
Sitemap to extract event details based on a city, date, event popularity, and categories from Yelp.com using Web Scraper Chrome Extension
{
"_id":"yelp",
"startUrl":[
"https://www.yelp.com/events/sf/browse?start_date=20190506&end_date=20190512&start=0"
],
"selectors":[
{
"id":"follow",
"type":"SelectorLink",
"parentSelectors":[
"_root",
"paginate"
],
"selector":"a[itemprop='url']",
"multiple":true,
"delay":0
},
{
"id":"event_name",
"type":"SelectorText",
"parentSelectors":[
"follow"
],
"selector":"h1",
"multiple":false,
"regex":"",
"delay":0
},
{
"id":"paginate",
"type":"SelectorLink",
"parentSelectors":[
"_root",
"paginate"
],
"selector":"#super-container > div:nth-child(5) > div > div > div.pagination-links.arrange_unit > div > div:last-child > a",
"multiple":false,
"delay":0
},
{
"id":"event_location",
"type":"SelectorText",
"parentSelectors":[
"follow"
],
"selector":"[itemprop='location'] .biz-name span, div.venue_name",
"multiple":false,
"regex":"",
"delay":0
},
{
"id":"event_location_address",
"type":"SelectorText",
"parentSelectors":[
"follow"
],
"selector":".media-story address",
"multiple":true,
"regex":"",
"delay":0
},
{
"id":"event_location_contact",
"type":"SelectorText",
"parentSelectors":[
"follow"
],
"selector":"span.biz-phone",
"multiple":false,
"regex":"",
"delay":0
},
{
"id":"event_location_rating",
"type":"SelectorElementAttribute",
"parentSelectors":[
"follow"
],
"selector":"[itemprop='location'] div.i-stars",
"multiple":false,
"extractAttribute":"title",
"delay":0
},
{
"id":"event_timing",
"type":"SelectorElement",
"parentSelectors":[
"follow"
],
"selector":".card_content > li:nth-of-type(2)",
"multiple":false,
"delay":0
},
{
"id":"event_pricing",
"type":"SelectorText",
"parentSelectors":[
"follow"
],
"selector":"span.event-details_ticket-info",
"multiple":false,
"regex":"",
"delay":0
},
{
"id":"event_description",
"type":"SelectorText",
"parentSelectors":[
"follow"
],
"selector":"p[itemprop='description']",
"multiple":false,
"regex":"",
"delay":0
},
{
"id":"event_start",
"type":"SelectorText",
"parentSelectors":[
"event_timing"
],
"selector":".event-details_date div:nth-of-type(1)",
"multiple":false,
"regex":" [\\s\\S]*$",
"delay":0
},
{
"id":"event_end",
"type":"SelectorText",
"parentSelectors":[
"event_timing"
],
"selector":".event-details_date div:nth-of-type(2)",
"multiple":false,
"regex":" [\\s\\S]*$",
"delay":0
},
{
"id":"reviews",
"type":"SelectorText",
"parentSelectors":[
"follow"
],
"selector":"[itemprop='location'] span.review-count",
"multiple":false,
"regex":"",
"delay":0
},
{
"id":"category",
"type":"SelectorText",
"parentSelectors":[
"follow"
],
"selector":".event-details_category-links a",
"multiple":false,
"regex":"",
"delay":0
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment