Created
July 19, 2023 17:31
-
-
Save jduffey/add9321813ba57bf1f5247ed3157a1e2 to your computer and use it in GitHub Desktop.
Meetup.com spike scraper for event calendar dates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
(async () => { | |
const browser = await puppeteer.launch(); | |
console.log('Browser launched'); | |
const page = await browser.newPage(); | |
console.log('New page opened'); | |
await page.goto('https://www.meetup.com/techlifecolumbus/events/'); | |
console.log('Navigated to Meetup events page'); | |
const targetString = 'Free Code Camp Columbus'; | |
const classNames = await page.evaluate(() => { | |
const allElements = Array.from(document.querySelectorAll('*')); | |
const allClassNames = allElements.reduce((classes, element) => { | |
Array.from(element.classList).forEach(className => { | |
if (!classes.includes(className)) { | |
classes.push(className); | |
} | |
}); | |
return classes; | |
}, []); | |
return allClassNames; | |
}); | |
// classNames | |
// .sort() | |
// .forEach(name => console.log(name)); | |
// Function to find the target string within the events | |
async function searchEventInPage() { | |
return await page.evaluate((targetString) => { | |
// const eventNodes = Array.from(document.querySelectorAll('.eventCard--link')); | |
const eventNodes = Array.from(document.querySelectorAll('.eventCard')); | |
const allEvents = []; | |
let targetEvent = null; | |
for (let i = 0; i < eventNodes.length; i++) { | |
const textLines = eventNodes[i].innerText.split('\n'); | |
const eventName = textLines.find(line => line.length > 0); | |
const eventDateTime = textLines[eventName ? 1 : -1] || 'Date/Time not available'; // Assuming date/time is the next line after event name | |
allEvents.push({ eventName, eventDateTime }); | |
// allEvents.push({ eventName, innerText: eventNodes[i].classList }); | |
const eventIndex = textLines.findIndex(line => line.includes(targetString)); | |
if (eventIndex > -1 && eventIndex < textLines.length - 1) { | |
const dateTime = textLines[eventIndex + 1]; // Assuming the date/time is on the next line | |
targetEvent = { | |
eventName: targetString, | |
dateTime: dateTime | |
}; | |
} | |
} | |
return { | |
allEvents: allEvents, | |
targetEvent: targetEvent | |
}; | |
}, targetString); | |
} | |
let eventDetails = null; | |
while (!eventDetails || !eventDetails.targetEvent) { | |
// while (true) { | |
console.log('Searching for event...'); | |
eventDetails = await searchEventInPage(); | |
eventDetails.allEvents.forEach(event => { | |
if (event.eventName.includes(targetString)) { | |
console.log(` 🎉🎉🎉🎉 --> ${event.eventName} at ${event.eventDateTime}`); | |
console.log(event.innerText); | |
} else { | |
console.log(`Found event: ${event.eventName}`); | |
} | |
}); | |
if (!eventDetails.targetEvent) { | |
// Scroll by 1000px | |
console.log('Scrolling down...'); | |
await page.evaluate(() => { | |
window.scrollBy(0, 1000); | |
}); | |
// Wait for a bit for scrolling to complete | |
console.log('Waiting for scroll to complete...'); | |
await page.waitForTimeout(2000); | |
} | |
} | |
console.log(`Found event details:\nEvent Name: ${eventDetails.targetEvent.eventName}\nDate and Time: ${eventDetails.targetEvent.dateTime}`); | |
console.log('Closing browser...'); | |
await browser.close(); | |
console.log('Browser closed'); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment