Skip to content

Instantly share code, notes, and snippets.

@rergw
Last active March 5, 2019 17:37
Show Gist options
  • Save rergw/b54edd72503e0ce74e0f174209a253fa to your computer and use it in GitHub Desktop.
Save rergw/b54edd72503e0ce74e0f174209a253fa to your computer and use it in GitHub Desktop.
General purpose scraper adapted to angel.co.
/*
General purpose scraper adapted to angel.co.
Does not support pagination.
Usage:
1. For other pages change `map` and `item`.
2. Copy entire code into browser console
3. Results are copied to clipboard and can be pasted on a spreadsheet.
*/
JSONresults = []
TSVresults = []
// Helpers
onlyNumbers = function(n){return n.innerText.match(/\d+/)}
// Helpers END
map = {
name: ':closest(.header-info) .startup-link',
URL: [':closest(.header-info) .startup-link', 'href'],
tagline: ':closest(.header-info) .tagline',
title: '@innerText',
compensation: ':closest(.collapsed-listing-row) .collapsed-compensation',
tags: ':closest(.collapsed-listing-row) .collapsed-tags',
active: ':closest(.header-info) .tag.active',
applicants_last_week: [':closest(.header-info) .tag.applicants', onlyNumbers],
locations: ':closest(.header-info) .tag.locations',
employees: ':closest(.header-info) .tag.employees'
}
// item = '.header-info'
item = '.collapsed-title'
TSVresults.push(Object.keys(map).join("\t"))
jQuery(item)
.each(function(i,e){
var $ = jQuery
x=e
result = {}
for (name in map) {
map[name] = Array.isArray(map[name]) ? map[name] : [map[name]]
selector = map[name][0]
method = map[name][1] || 'innerText'
// Handle closest pseudo selector
if(closest = (selector.match(/:closest\((.*)\)/) || [])[1]){
node = e.closest(closest)
selector = selector.replace(/:closest\((.*)\)/, '')
}
else node = e
// Handle selectors starting with @
if(selector.match(/^@/)){
selector = selector.replace(/^@/, '')
result[name] = e[selector]
xx=e
xy=selector
}
else
result[name] = typeof method == 'function' ?
method($(node).find(selector)[0]) :
$(node).find(selector)[0][method]
}
JSONresults.push(result)
TSVresults.push(Object.values(result).join("\t"))
})
TSVresults = TSVresults.join("\n")
copy(TSVresults)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment