Skip to content

Instantly share code, notes, and snippets.

@jduffey
Created July 19, 2023 17:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jduffey/add9321813ba57bf1f5247ed3157a1e2 to your computer and use it in GitHub Desktop.
Save jduffey/add9321813ba57bf1f5247ed3157a1e2 to your computer and use it in GitHub Desktop.
Meetup.com spike scraper for event calendar dates
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
console.log('Browser launched');
const page = await browser.newPage();
console.log('New page opened');
await page.goto('https://www.meetup.com/techlifecolumbus/events/');
console.log('Navigated to Meetup events page');
const targetString = 'Free Code Camp Columbus';
const classNames = await page.evaluate(() => {
const allElements = Array.from(document.querySelectorAll('*'));
const allClassNames = allElements.reduce((classes, element) => {
Array.from(element.classList).forEach(className => {
if (!classes.includes(className)) {
classes.push(className);
}
});
return classes;
}, []);
return allClassNames;
});
// classNames
// .sort()
// .forEach(name => console.log(name));
// Function to find the target string within the events
async function searchEventInPage() {
return await page.evaluate((targetString) => {
// const eventNodes = Array.from(document.querySelectorAll('.eventCard--link'));
const eventNodes = Array.from(document.querySelectorAll('.eventCard'));
const allEvents = [];
let targetEvent = null;
for (let i = 0; i < eventNodes.length; i++) {
const textLines = eventNodes[i].innerText.split('\n');
const eventName = textLines.find(line => line.length > 0);
const eventDateTime = textLines[eventName ? 1 : -1] || 'Date/Time not available'; // Assuming date/time is the next line after event name
allEvents.push({ eventName, eventDateTime });
// allEvents.push({ eventName, innerText: eventNodes[i].classList });
const eventIndex = textLines.findIndex(line => line.includes(targetString));
if (eventIndex > -1 && eventIndex < textLines.length - 1) {
const dateTime = textLines[eventIndex + 1]; // Assuming the date/time is on the next line
targetEvent = {
eventName: targetString,
dateTime: dateTime
};
}
}
return {
allEvents: allEvents,
targetEvent: targetEvent
};
}, targetString);
}
let eventDetails = null;
while (!eventDetails || !eventDetails.targetEvent) {
// while (true) {
console.log('Searching for event...');
eventDetails = await searchEventInPage();
eventDetails.allEvents.forEach(event => {
if (event.eventName.includes(targetString)) {
console.log(` 🎉🎉🎉🎉 --> ${event.eventName} at ${event.eventDateTime}`);
console.log(event.innerText);
} else {
console.log(`Found event: ${event.eventName}`);
}
});
if (!eventDetails.targetEvent) {
// Scroll by 1000px
console.log('Scrolling down...');
await page.evaluate(() => {
window.scrollBy(0, 1000);
});
// Wait for a bit for scrolling to complete
console.log('Waiting for scroll to complete...');
await page.waitForTimeout(2000);
}
}
console.log(`Found event details:\nEvent Name: ${eventDetails.targetEvent.eventName}\nDate and Time: ${eventDetails.targetEvent.dateTime}`);
console.log('Closing browser...');
await browser.close();
console.log('Browser closed');
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment