Skip to content

Instantly share code, notes, and snippets.

@ngekoding
Last active February 10, 2022 08:47
Show Gist options
  • Save ngekoding/b198d144982abc1d58ac5512843079e9 to your computer and use it in GitHub Desktop.
Save ngekoding/b198d144982abc1d58ac5512843079e9 to your computer and use it in GitHub Desktop.
Crawling google form response summary (analytics)
const puppeteer = require('puppeteer-extra');
const pluginStealth = require('puppeteer-extra-plugin-stealth');
const fs = require('fs');
const dayjs = require('dayjs');
puppeteer.use(pluginStealth());
// Change default locale to Indonesia
require('dayjs/locale/id');
dayjs.locale('id');
// Saving to PDF only work for headless mode
const headless = true;
// E-mail account
const email = 'your-email@gmail.com';
const password = 'your-email-password';
// Base path where to save the PDF result
const resultBasePath = './';
// Data to be crawled
// We make this to make dynamic crawler
const survey = JSON.parse(fs.readFileSync('survey.json'));
// Store last update time
const timestampsFile = 'timestamps.json';
function getTimestamps() {
return JSON.parse(fs.readFileSync(timestampsFile));
}
function setTimestamps(data) {
try {
fs.writeFileSync(timestampsFile, JSON.stringify(data));
} catch (err) {
console.log('Failed to write timestamps', err);
}
}
function updateSurveyTimestamp(key, value) {
let timestamps = getTimestamps();
timestamps[key] = value;
setTimestamps(timestamps);
}
(async () => {
const browser = await puppeteer.launch({
headless,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu',
// '--single-process', // <- this one doesn't works in Windows
],
});
console.log('App running...');
const page = await browser.newPage();
const pages = await browser.pages();
// Close the new tab that chromium always opens first.
pages[0].close();
await page.setBypassCSP(true);
await page.goto('https://accounts.google.com/signin/v2/identifier', { waitUntil: 'networkidle2' });
// Wait for email input.
await page.waitForSelector('#identifierId');
await page.type('#identifierId', email);
await page.waitForTimeout(1000);
await page.keyboard.press('Enter');
await page.waitForTimeout(1000);
// Wait for password input
await page.waitForSelector('input[type="checkbox"]');
await page.evaluate(() => {
document.querySelector('input[type="checkbox"]').parentElement.click();
});
await page.waitForTimeout(1000);
await page.type('input[type="text"]', password);
await page.waitForTimeout(1000);
await page.keyboard.press('Enter');
await page.waitForSelector('[role="banner"]');
for (let itemIndex = 0; itemIndex < survey.summaries.length; itemIndex++) {
const item = survey.summaries[itemIndex];
const now = dayjs();
const lastUpdate = now.format('dddd, D MMMM YYYY') + ' pukul ' + now.format('HH.mm');
// Print the title
console.log('Getting for: ' + item.title);
await page.goto(item.url);
// Wait until loading gone!
await page.waitForSelector('.freebirdCommonViewLoadingindicatorLoadingIndicatorContainer', {
hidden: true
});
// Try to wait 'file unavailable' dialog
// It will be removed later
try {
await page.waitForSelector('.quantumWizDialogBackground.isOpen', {
timeout: 5000 // 5 seconds
});
} catch (err) {
// do nothing
}
const summariesTitleToRemove = item.ignores;
// Remove unused elements
await page.evaluate((summariesTitleToRemove, lastUpdate) => {
// Remove file unavailable dialog if exists
const fileUnavailableDialog = document.querySelector('.quantumWizDialogBackground.isOpen');
if (fileUnavailableDialog) fileUnavailableDialog.remove();
// Change body background
document.body.style.backgroundColor = '#ffffff';
// Remove publish button
document.querySelector('a.exportButtonNestedLink').remove();
// Remove FAB
document.querySelector('.freebirdFormviewerViewNavigationHeaderButtonContainer').remove();
// Change footer content for last update
document.querySelector('.freebirdFormviewerViewFooterDisclaimer').innerHTML = 'Diperbarui pada: ' + lastUpdate;
// Or you can just remove the footer
// document.querySelector('.freebirdFormviewerViewFooterDisclaimer').remove();
// document.querySelector('.freebirdFormviewerViewFooterImageContainer').remove();
// Remove unused summary by title
const elementsToRemove = document.querySelectorAll('.freebirdAnalyticsViewAnalyticsItem');
for (let i = 0; i < elementsToRemove.length; i++) {
let el = elementsToRemove[i];
// Finding title
const title = el.querySelector('.freebirdAnalyticsViewQuestionTitle');
const headerTitle = el.querySelector('.freebirdAnalyticsViewSectionHeader');
if (
(title && summariesTitleToRemove.includes(title.innerText)) ||
(headerTitle && summariesTitleToRemove.includes(headerTitle.innerText))
) {
elementsToRemove[i].parentNode.removeChild(elementsToRemove[i]);
}
}
}, summariesTitleToRemove, lastUpdate);
// Save content to file
console.log('Saving result: ' + item.filename);
// PDF only works when headless active
if (headless) {
await page.pdf({
path: resultBasePath + item.filename,
printBackground: true,
format: 'A4',
margin: {
top: '2cm',
bottom: '2cm',
left: '1cm',
right: '1cm'
}
});
}
// Save last update
updateSurveyTimestamp(item.filename, lastUpdate);
}
console.log('✅ Operation completed, have a good day!');
browser.close();
})();
{
"name": "g-form-automator",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [
"google form automator",
"google form crawler",
"nodejs crawler",
"puppeteer"
],
"author": "Nur Muhammad",
"license": "MIT",
"dependencies": {
"dayjs": "^1.10.7",
"puppeteer": "^10.1.0",
"puppeteer-extra": "^3.1.18",
"puppeteer-extra-plugin-stealth": "^2.7.8"
}
}
{
"summaries": [
{
"title": "Survei Layanan Persuratan",
"url": "https://docs.google.com/forms/d/.../viewanalytics",
"filename": "survei-layanan-persuratan.pdf",
"ignores": [
"Alamat Email",
"Nomor HP (Whatsapp)",
]
},
{
"title": "Survei Layanan Urusan Internasional",
"url": "https://docs.google.com/forms/d/.../viewanalytics",
"filename": "survei-layanan-urusan-internasional.pdf",
"ignores": [
"Email address:",
"Mobile Number (Whatsapp)",
"EVALUATION AND IMPROVEMENT"
]
}
]
}
@ngekoding
Copy link
Author

Notes:

survey.json

  • url: get this URL from google form response -> print
  • ignores: summary title list to remove before generating PDF (sensitive data)

How to run?

  • Change email and password account in g-form-automator.js that have access to the google form data
  • Run npm install to install the libraries
  • Run node g-form-automator.js to start the app
  • Enjoy!

Feel free to make a changes to met your needs!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment