-
-
Save johnsdeveloper/b856195b0d23efd219e18db2da5ff4fc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
const jsonfile = require("jsonfile"); | |
const _ = require("lodash"); | |
var mysql = require('mysql'); | |
var browser; | |
var page; | |
// Connect to database | |
var pool = mysql.createPool({ | |
connectionLimit : 10, | |
host : 'localhost', | |
user : 'root', | |
password : '', | |
database : 'marketplace' | |
}); | |
global.pool = pool; | |
// Gets current items Search Results | |
const getItems = async searchTerm => { | |
browser = await puppeteer.launch({ | |
headless: true, | |
timeout: 0, | |
args: ["--no-sandbox"] | |
}); | |
page = await browser.newPage(); | |
await page.goto(`https://facebook.com/marketplace/tampa/search/?query=${encodeURI(searchTerm)}&sort=created_date_descending&exact=true`); | |
await autoScroll(page); | |
const itemList = await page.waitForSelector('div > div > span > div > a[tabindex="0"]') | |
.then(() => page.evaluate(() => { | |
const itemArray = []; | |
const itemNodeList = document.querySelectorAll('div > div > span > div > a[tabindex="0"]'); | |
itemNodeList.forEach(item => { | |
const itemTitle = item.innerText; | |
const itemURL = item.getAttribute('href'); | |
const itemImg = item.querySelector('div > div > span > div > a > div > div > div > div > div > div > img').getAttribute('src'); | |
var obj = ['price', 'title', 'location', 'miles', | |
...itemTitle.split(/\n/) | |
] | |
.reduce((a, c, i, t) => { | |
if (i < 4) a[c] = t[i + 4] | |
return a | |
}, {}); | |
obj.imgUrl = itemImg; | |
obj.itemURL = itemURL; | |
itemArray.push(obj); | |
}); | |
return itemArray; | |
})) | |
.catch(() => console.log("Selector error.")); | |
return itemList; | |
} | |
const initScraper = async () => { | |
var finalArray = []; | |
var currentItems = []; | |
var previousItems = []; | |
// Scrape Page - Get New Items | |
currentItems = await getItems('Jeep Wrangler'); | |
// Save Data: previousJeeps | |
const insertCurrentSuccess = await saveToDatabase('previousJeeps',currentItems); | |
allDone(); | |
// Get Previous Items From Database | |
previousItems = await getPreviousItems(); | |
// Get Differences | |
finalArray = _.difference(currentItems, previousItems); | |
//console.log(finalArray); | |
// Save Data: newJeeps | |
const insertNewSuccess = await saveToDatabase('newJeeps',finalArray); | |
// If New Items, Notify User | |
if (!_.isEqual(currentItems, previousItems)) { | |
changed = true; | |
const page2 = await browser.newPage(); | |
await page2.goto(`http://john.mail.com/mail.php`); | |
console.log("changed"); | |
} | |
// Let us know when done | |
console.log("done"); | |
} | |
initScraper(); | |
const allDone = async function(){ | |
console.log("All done"); | |
process.exit(); | |
} | |
//---------------------------------------------------- | |
// This function loads the entire search results from | |
// last time - so it can be compared against the | |
// new search results.about_content | |
//---------------------------------------------------- | |
const getPreviousItems = async function () { | |
pool.query("SELECT * FROM previousJeeps", function (err, result, fields) { | |
if (err){ | |
console.log(err); | |
// Redirect to error page | |
} else { | |
return result; | |
} | |
}); | |
} | |
// Save Data | |
const saveToDatabase = async function (tblName, results) { | |
console.log(`HERE~~~~~~ : ${results}`); | |
con.connect(function(err) { | |
if (err) throw err; | |
var sql = "INSERT INTO " + tblName + " (price,title,location,miles,imageUrl,itemURL) VALUES ?"; | |
var values = results; | |
con.query(sql, [values], function (err, result) { | |
if (err) throw err; | |
console.log("Number of records inserted: " + result.affectedRows); | |
}); | |
}); | |
/* | |
results.forEach(element => { | |
var sql = ""; | |
var title = title.replace(/'/g, "\\'");; | |
var location= location.replace(/'/g, "\\'");;; | |
var miles= miles.replace(/'/g, "\\'");;; | |
var imgUrl= imgUrl.replace(/'/g, "\\'");;; | |
var itemURL= itemURL.replace(/'/g, "\\'");;; | |
sql = "INSERT INTO " + tblName + | |
"SET (title, price, location, miles, imgUrl, itemURL, status, is_deleted)" + | |
"VALUES (" + | |
"'${title}', '${element.price}', '${location}', '${miles}', '${imgUrl}', '${itemURL}', 1, 0" + | |
")"; | |
pool.query(sql, function (err, rows, fields) { | |
if (err) throw err; | |
}); | |
}) | |
*/ | |
return true; | |
} | |
// This takes care of the auto scrolling problem | |
async function autoScroll(page) { | |
await page.evaluate(async () => { | |
await new Promise(resolve => { | |
var totalHeight = 0; | |
var distance = 100; | |
var timer = setInterval(() => { | |
var scrollHeight = document.body.scrollHeight; | |
window.scrollBy(0, distance); | |
totalHeight += distance; | |
if (totalHeight >= scrollHeight || scrollHeight > 9000) { | |
clearInterval(timer); | |
resolve(); | |
} | |
}, 100); | |
}); | |
}); | |
} | |
//---------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Does this still work?