Skip to content

Instantly share code, notes, and snippets.

@natzir
Created June 23, 2023 07:19
Show Gist options
  • Star 14 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save natzir/473d9731087c9f82d0cd9ec4e27d69c2 to your computer and use it in GitHub Desktop.
Save natzir/473d9731087c9f82d0cd9ec4e27d69c2 to your computer and use it in GitHub Desktop.
var depth = parseInt(prompt("Please enter the scraping depth (we recommend between 1 to 3):", "2"));
if(!Number.isInteger(depth) || depth < 1) {
alert("Invalid depth! Setting depth to default (2).");
depth = 2;
}
var count = 1;
async function start() {
var elements = document.querySelectorAll('div[data-lk]');
for(let element of elements){
await new Promise(resolve => setTimeout(resolve, 1500));
element.parentElement.firstChild.firstChild.childNodes[1].firstChild.click();
}
}
async function scrollPage() {
var lastHeight = document.body.scrollHeight;
window.scrollTo(0, lastHeight);
await new Promise(resolve => setTimeout(resolve, 1500));
if (lastHeight < document.body.scrollHeight) {
scrollPage();
} else if (count < depth) {
count++;
await start();
scrollPage();
} else {
await new Promise(resolve => setTimeout(resolve, 5000));
extractData();
document.getElementById("loading-overlay").remove();
}
}
function extractData() {
var questions = document.querySelectorAll('div[data-lk] span.CSkcDe');
var answers = document.querySelectorAll('div[data-lk] div[data-md="61"], div[data-lk] div[data-md="83"]');
var urls = document.querySelectorAll('div[data-lk] div.yuRUbf a');
var uniqueQuestions = {};
var csvContent = 'Question,Answer,URL,Type\n';
questions.forEach(function(question, i) {
var urlText = urls[i] ? urls[i].href.split('#:~:')[0] : '';
var questionText = question.textContent.replace(/,/g, '').replace(/"/g, '\"\"');
var answerText = answers[i] ? answers[i].textContent.replace(/,/g, '').replace(/"/g, '\"\"') : '';
var type = answers[i] && (answers[i].getAttribute('data-md') === '83') ? 'List' : 'Paragraph';
if (urlText && !urlText.includes('webcache.googleusercontent.com')) {
uniqueQuestions[questionText] = [answerText, urlText, type];
}
});
var tableHTML = '<style>' +
'body { font-family: Arial, sans-serif; background-color: #f3f3f3; margin: 20px; }' +
'h1 { text-align: center; }' +
'p { margin-bottom: 20px; }' +
'table { width: 100%; border-collapse: collapse; background-color: #fff; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); }' +
'thead th { background-color: #f5f5f5; text-align: left; padding: 8px; }' +
'tbody td { padding: 8px; }' +
'a.button { display: inline-block; margin-top: 20px; margin-bottom: 40px; padding: 10px 15px; background-color: #4caf50; color: #fff; text-decoration: none; border-radius: 4px; cursor: pointer; }' +
'</style>' +
'<head><title>PAA Extractor by Natzir</title></head>' +
'<h1>PAA Extractor by Natzir</h1>' +
'<p>This code scrapes the People Also Ask (PAA) section from Google SERP. <br><br>The "Type" column in the table indicates whether the PAA entry is a list or a paragraph.To provide a cleaner result, duplicate questions are removed from the output.</p>' +
'<p>Follow me on <a href="https://twitter.com/natzir9">@natzir9</a> for more updates.</p>' +
'<a href="#" id="download-button" class="button">Download PAA</a>' +
'<table><thead><tr><th>Question</th><th>Answer</th><th>URL</th><th>Type</th></tr></thead><tbody>';
for (var question in uniqueQuestions) {
tableHTML += '<tr><td>' + question + '</td><td>' + uniqueQuestions[question][0] + '</td><td>' + uniqueQuestions[question][1] + '</td><td>' + uniqueQuestions[question][2] + '</td></tr>';
csvContent += '"' + question + '","' + uniqueQuestions[question][0] + '","' + uniqueQuestions[question][1] + '","' + uniqueQuestions[question][2] + '"\n';
}
tableHTML += '</tbody></table>';
var newWindow = window.open("", "_blank");
newWindow.document.write(tableHTML);
var button = newWindow.document.getElementById("download-button");
var file = new Blob([csvContent], {type: 'text/csv'});
button.href = URL.createObjectURL(file);
button.download = 'PAA.csv';
}
var style = document.createElement('style');
style.type = 'text/css';
style.innerHTML = '@keyframes spin {0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); }}';
document.getElementsByTagName('head')[0].appendChild(style);
var loadingOverlay = document.createElement("div");
loadingOverlay.id = "loading-overlay";
loadingOverlay.style.position = "fixed";
loadingOverlay.style.top = "0";
loadingOverlay.style.left = "0";
loadingOverlay.style.width = "100%";
loadingOverlay.style.height = "100%";
loadingOverlay.style.background = "rgba(0, 0, 0, 0.5)";
loadingOverlay.style.display = "flex";
loadingOverlay.style.justifyContent = "center";
loadingOverlay.style.alignItems = "center";
loadingOverlay.style.zIndex = "10000";
var spinner = document.createElement("div");
spinner.style.border = "16px solid #f3f3f3";
spinner.style.borderRadius = "50%";
spinner.style.borderTop = "16px solid #3498db";
spinner.style.width = "120px";
spinner.style.height = "120px";
spinner.style.animation = "spin 2s linear infinite";
loadingOverlay.appendChild(spinner);
document.body.appendChild(loadingOverlay);
start();
scrollPage();
@JesseLance99
Copy link

JesseLance99 commented Jul 22, 2023

Thanks for sharing it. For those seeking research papers for sale, a convenient option is available at https://studyclerk.com/research-paper-for-sale this site. This website provides an easy process to acquire research papers quickly and efficiently. I have personally found this link useful whenever I need to hire an essay writer. The platform offers a variety of research papers for purchase, making it a valuable resource for students and researchers who require well-crafted papers on specific subjects.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment