Created
September 8, 2016 14:28
-
-
Save adamhepton/4a7f6a1e6faa886103b6a3159c3bfabc to your computer and use it in GitHub Desktop.
Scrape guts out of ebay auctions. Save into a writable directory, `npm install` and run with `node ebay-scraper.js` followed by as many URLs as you want.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var jsdom = require('jsdom'), | |
csv = require('json2csv'), | |
fs = require('fs'), | |
processed = [], | |
urls = process.argv.slice(2), | |
fields = ['title', 'price', 'image', 'date', 'description']; | |
urls.forEach(function(url) { | |
jsdom.env( | |
url, | |
function (err, window) { | |
if(err) { | |
throw(err); | |
} | |
var dateText = window.document.querySelector('#bb_tlft').textContent.trim(); | |
var info = { | |
title: window.document.querySelector('#itemTitle span').nextSibling.textContent.trim(), | |
price: window.document.querySelector('#prcIsum').textContent.trim(), | |
image: window.document.querySelector('#mainImgHldr #icImg').getAttribute('src'), | |
date: !!~dateText.indexOf('Time left') ? 'Ongoing' : new Date(dateText.replace(/\s\s+/, ' ').replace(/(\:\d\d)\s\w\w\w/, '$1')) | |
}; | |
jsdom.env( | |
window.document.querySelector('#desc_ifr').getAttribute('src'), | |
function(err, iframe) { | |
if(err) { | |
throw(err); | |
} | |
var style = iframe.document.querySelector('#ds_div style'), | |
content = iframe.document.querySelector('#ds_div').textContent; | |
style = style ? style.textContent || "" : ""; | |
info.description = content.replace(style, '').trim(); | |
processed.push(info); | |
checkProgress(); | |
}); | |
}); | |
}); | |
var checkProgress = function() { | |
if(processed.length === urls.length) { | |
var today = new Date(), | |
now = { | |
year: today.getFullYear(), | |
month: today.getMonth(), | |
date: today.getDate(), | |
hours: today.getHours(), | |
minutes: today.getMinutes(), | |
seconds: today.getSeconds() | |
}, | |
pad = function(str) { | |
return str.toString().length === 1 ? '0' + str : str; | |
}, | |
filename; | |
Object.keys(now).forEach(function(el) { | |
now[el] = pad(now[el]); | |
return; | |
}); | |
filename = [ | |
'ebay', | |
[now.year, now.month, now.date].join(''), | |
[now.hours, now.minutes, now.seconds].join('') | |
].join('-') + '.csv'; | |
fs.writeFile( | |
filename, | |
csv({ data: processed, fields: fields, del: '\t' }), | |
function(err) { | |
if(err) { | |
return console.log(err); | |
} | |
console.log('Processing finished and saved to', filename); | |
}); | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment