Last active
November 28, 2018 14:28
-
-
Save mayosuke/45f40eae39389720845c1945646482f5 to your computer and use it in GitHub Desktop.
Count Actions on Google
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
const URL_BASE = 'https://assistant.google.com/'; | |
const locale = process.argv[2]; // 'ja-JP', 'en-US', etc... | |
(async () => { | |
const browser = await puppeteer.launch(); | |
const categories = await getElements(browser, `${URL_BASE}explore?hl=${locale}`, _ => { | |
var elements = document.getElementsByClassName('DH6Rkf-ibnC6b'); | |
var ar = []; | |
for (var i = 0; i < elements.length; i++) { | |
var firstChild = elements[i].firstElementChild; | |
if (firstChild.className === "hSRGPd" && firstChild.href.includes('/explore')) { | |
ar.push(elements[i].firstElementChild.href); | |
} | |
} | |
return ar; | |
}, true); | |
// console.log(categories); | |
let entireSubCategories = []; | |
for (let i = 0; i < categories.length; i++) { | |
const subCategories = await getElements(browser, categories[i], _ => { | |
var elements = document.getElementsByClassName('v4cdld-r6IkTd'); | |
var ar = []; | |
for (var i = 0; i < elements.length; i++) { | |
if (elements[i].dataset.link) { | |
ar.push(elements[i].dataset.link); | |
} | |
} | |
return ar; | |
}, true); | |
entireSubCategories = entireSubCategories.concat(subCategories); | |
} | |
// console.log(entireSubCategories); | |
let entireAogsWithDuplication = []; | |
for (let i = 0; i < entireSubCategories.length; i++) { | |
const aogs = await getElements(browser, `${URL_BASE}${entireSubCategories[i]}`, _ => { | |
var elements = document.getElementsByClassName('SNU6J'); | |
var ar = []; | |
for (var i = 0; i < elements.length; i++) { | |
if (elements[i].firstChild.getAttribute('data-agent-id')) { | |
ar.push(elements[i].firstChild.getAttribute('data-agent-id')); | |
} | |
} | |
return ar; | |
}/* lazy loading should not be needed here */); | |
// console.log(aogs); | |
entireAogsWithDuplication = entireAogsWithDuplication.concat(aogs); | |
// console.log(entireAogsWithDuplication); | |
} | |
entireAogsWithDuplication.forEach( e => { | |
console.log(e); | |
}); | |
await browser.close(); | |
})(); | |
const loadEntirePage = async (page) => { | |
await page.evaluate(_ => { | |
window.document.getElementsByClassName("y3IDJd X3BTXb mwk6ad FcSP1b")[0].scrollBy(0,50000); | |
}); | |
await page.waitFor(3000); | |
}; | |
const getElements = async (browser, url, f, waitLazyLoad) => { | |
// console.log(`loading URL: ${url}`); | |
const page = await browser.newPage(); | |
await page.goto(url, {waitUntil: 'networkidle2'}); | |
if(waitLazyLoad) { | |
await loadEntirePage(page); | |
} | |
return await page.evaluate(f); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment