Skip to content

Instantly share code, notes, and snippets.

@adamhepton
Created September 8, 2016 14:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adamhepton/44bb94eedc9365afbf0e85631c75deb8 to your computer and use it in GitHub Desktop.
Save adamhepton/44bb94eedc9365afbf0e85631c75deb8 to your computer and use it in GitHub Desktop.
Scrape guts out of ebay auctions.
var jsdom = require('jsdom'),
csv = require('json2csv'),
fs = require('fs'),
processed = [],
urls = process.argv.slice(2),
fields = ['title', 'price', 'image', 'date', 'description'];
urls.forEach(function(url) {
jsdom.env(
url,
function (err, window) {
if(err) {
throw(err);
}
var dateText = window.document.querySelector('#bb_tlft').textContent.trim();
var info = {
title: window.document.querySelector('#itemTitle span').nextSibling.textContent.trim(),
price: window.document.querySelector('#prcIsum').textContent.trim(),
image: window.document.querySelector('#mainImgHldr #icImg').getAttribute('src'),
date: !!~dateText.indexOf('Time left') ? 'Ongoing' : new Date(dateText.replace(/\s\s+/, ' ').replace(/(\:\d\d)\s\w\w\w/, '$1'))
};
jsdom.env(
window.document.querySelector('#desc_ifr').getAttribute('src'),
function(err, iframe) {
if(err) {
throw(err);
}
var style = iframe.document.querySelector('#ds_div style'),
content = iframe.document.querySelector('#ds_div').textContent;
style = style ? style.textContent || "" : "";
info.description = content.replace(style, '').trim();
processed.push(info);
checkProgress();
});
});
});
var checkProgress = function() {
if(processed.length === urls.length) {
var today = new Date(),
now = {
year: today.getFullYear(),
month: today.getMonth(),
date: today.getDate(),
hours: today.getHours(),
minutes: today.getMinutes(),
seconds: today.getSeconds()
},
pad = function(str) {
return str.toString().length === 1 ? '0' + str : str;
},
filename;
Object.keys(now).forEach(function(el) {
now[el] = pad(now[el]);
return;
});
filename = [
'ebay',
[now.year, now.month, now.date].join(''),
[now.hours, now.minutes, now.seconds].join('')
].join('-') + '.csv';
fs.writeFile(
filename,
csv({ data: processed, fields: fields, del: '\t' }),
function(err) {
if(err) {
return console.log(err);
}
console.log('Processing finished and saved to', filename);
});
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment