Skip to content

Instantly share code, notes, and snippets.

@johnsdeveloper
Created May 11, 2020 01:59
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnsdeveloper/b856195b0d23efd219e18db2da5ff4fc to your computer and use it in GitHub Desktop.
Save johnsdeveloper/b856195b0d23efd219e18db2da5ff4fc to your computer and use it in GitHub Desktop.
const puppeteer = require('puppeteer');
const jsonfile = require("jsonfile");
const _ = require("lodash");
var mysql = require('mysql');
var browser;
var page;
// Connect to database
var pool = mysql.createPool({
connectionLimit : 10,
host : 'localhost',
user : 'root',
password : '',
database : 'marketplace'
});
global.pool = pool;
// Gets current items Search Results
const getItems = async searchTerm => {
browser = await puppeteer.launch({
headless: true,
timeout: 0,
args: ["--no-sandbox"]
});
page = await browser.newPage();
await page.goto(`https://facebook.com/marketplace/tampa/search/?query=${encodeURI(searchTerm)}&sort=created_date_descending&exact=true`);
await autoScroll(page);
const itemList = await page.waitForSelector('div > div > span > div > a[tabindex="0"]')
.then(() => page.evaluate(() => {
const itemArray = [];
const itemNodeList = document.querySelectorAll('div > div > span > div > a[tabindex="0"]');
itemNodeList.forEach(item => {
const itemTitle = item.innerText;
const itemURL = item.getAttribute('href');
const itemImg = item.querySelector('div > div > span > div > a > div > div > div > div > div > div > img').getAttribute('src');
var obj = ['price', 'title', 'location', 'miles',
...itemTitle.split(/\n/)
]
.reduce((a, c, i, t) => {
if (i < 4) a[c] = t[i + 4]
return a
}, {});
obj.imgUrl = itemImg;
obj.itemURL = itemURL;
itemArray.push(obj);
});
return itemArray;
}))
.catch(() => console.log("Selector error."));
return itemList;
}
const initScraper = async () => {
var finalArray = [];
var currentItems = [];
var previousItems = [];
// Scrape Page - Get New Items
currentItems = await getItems('Jeep Wrangler');
// Save Data: previousJeeps
const insertCurrentSuccess = await saveToDatabase('previousJeeps',currentItems);
allDone();
// Get Previous Items From Database
previousItems = await getPreviousItems();
// Get Differences
finalArray = _.difference(currentItems, previousItems);
//console.log(finalArray);
// Save Data: newJeeps
const insertNewSuccess = await saveToDatabase('newJeeps',finalArray);
// If New Items, Notify User
if (!_.isEqual(currentItems, previousItems)) {
changed = true;
const page2 = await browser.newPage();
await page2.goto(`http://john.mail.com/mail.php`);
console.log("changed");
}
// Let us know when done
console.log("done");
}
initScraper();
const allDone = async function(){
console.log("All done");
process.exit();
}
//----------------------------------------------------
// This function loads the entire search results from
// last time - so it can be compared against the
// new search results.about_content
//----------------------------------------------------
const getPreviousItems = async function () {
pool.query("SELECT * FROM previousJeeps", function (err, result, fields) {
if (err){
console.log(err);
// Redirect to error page
} else {
return result;
}
});
}
// Save Data
const saveToDatabase = async function (tblName, results) {
console.log(`HERE~~~~~~ : ${results}`);
con.connect(function(err) {
if (err) throw err;
var sql = "INSERT INTO " + tblName + " (price,title,location,miles,imageUrl,itemURL) VALUES ?";
var values = results;
con.query(sql, [values], function (err, result) {
if (err) throw err;
console.log("Number of records inserted: " + result.affectedRows);
});
});
/*
results.forEach(element => {
var sql = "";
var title = title.replace(/'/g, "\\'");;
var location= location.replace(/'/g, "\\'");;;
var miles= miles.replace(/'/g, "\\'");;;
var imgUrl= imgUrl.replace(/'/g, "\\'");;;
var itemURL= itemURL.replace(/'/g, "\\'");;;
sql = "INSERT INTO " + tblName +
"SET (title, price, location, miles, imgUrl, itemURL, status, is_deleted)" +
"VALUES (" +
"'${title}', '${element.price}', '${location}', '${miles}', '${imgUrl}', '${itemURL}', 1, 0" +
")";
pool.query(sql, function (err, rows, fields) {
if (err) throw err;
});
})
*/
return true;
}
// This takes care of the auto scrolling problem
async function autoScroll(page) {
await page.evaluate(async () => {
await new Promise(resolve => {
var totalHeight = 0;
var distance = 100;
var timer = setInterval(() => {
var scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight || scrollHeight > 9000) {
clearInterval(timer);
resolve();
}
}, 100);
});
});
}
//----------------------------------------------------
@scottocs11
Copy link

Does this still work?

@onemanjo
Copy link

onemanjo commented Mar 8, 2023

is this code still working 2023?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment