Skip to content

Instantly share code, notes, and snippets.

@AlexRatmansky
Created January 15, 2020 15:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AlexRatmansky/46dc2d408f3e73416d876a3953849e57 to your computer and use it in GitHub Desktop.
Save AlexRatmansky/46dc2d408f3e73416d876a3953849e57 to your computer and use it in GitHub Desktop.
Full code to Scrap Amazon behind Login Wall - Optimized and Works in Headless Mode (Avoid BOT detection)
// Get addressess from Amazon Address Book
const puppeteer = require('puppeteer');
(async () => {
// Syntactic Sugar
const Navigate = async (url) => {
await page.goto(url);
}
const EnterText = async (selector, text) => {
await page.click(selector);
await page.keyboard.type(text);
}
const ClickNavigate = async (selector, waitFor = -1) => {
await page.click(selector);
if (waitFor >= 0) {
await page.waitFor(waitFor*1000)
}
else {
await page.waitForNavigation();
}
}
// Main Flow
const C_HEADELESS = true
const C_OPTIMIZE = true
const C_SLOWMOTION = 0 // slow down by X ms
const browser = await puppeteer.launch({
headless: C_HEADELESS,
slowMo: C_SLOWMOTION
});
const page = await browser.newPage();
// To ensure Amazon doesn't detect it as a Bot
await page.setExtraHTTPHeaders({
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8'
});
// No unwanted resources
if (C_OPTIMIZE) {
await page.setRequestInterception(true);
const block_ressources = ['image', 'stylesheet', 'media', 'font', 'texttrack', 'object', 'beacon', 'csp_report', 'imageset'];
page.on('request', request => {
//if (request.resourceType() === 'image')
if (block_ressources.indexOf(request.resourceType) > 0)
request.abort();
else
request.continue();
});
}
// Creds
const USER_EMAIL = "YOUR_EMAIL_HERE"
const USER_PASSWORD = "YOUR_PASSWORD_HERE"
// Home Page constants
const U_HOMEPAGE = 'https://amazon.com'
const U_LOGIN_PAGE = 'https://www.amazon.com/ap/signin?clientContext=135-8638983-8261231&openid.return_to=https%3A%2F%2Fwww.amazon.com%2Fa%2Faddresses&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.assoc_handle=usflex&openid.mode=checkid_setup&marketPlaceId=ATVPDKIKX0DER&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&pageId=usflex&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.pape.max_auth_age=900&siteState=clientContext%3D143-3525329-4850620%2CsourceUrl%3Dhttps%253A%252F%252Fwww.amazon.com%252Fa%252Faddresses%2Csignature%3Dnull'
const S_LOGIN_LINK = '#nav-link-accountList'
// Optimzed the flow to reach address book faster, trick is to manually try to go to Target page before login and will be hit
// by the Amazon Login Wall, capture the URL which will now have return page set to openid.return_to field in the url
// This helps to land on the target page direclty after login without having to browse through heavy Home page
// Caution: Trying to go to Address Book directly (any page with sensitive information) will challenge the user with additional password screen.
// Commented, since this is now optimized
// ------------------------------------------
// // Go to Home Page
// await Navigate(U_HOMEPAGE)
//
// // Go to Login Page
// await ClickNavigate(S_LOGIN_LINK, 1)
// ------------------------------------------
// Go directly to Login Page
await Navigate(U_LOGIN_PAGE) // USER-ACTION
// Login Page constants
const S_EMAIL_TEXT = '#ap_email'
const S_CONTINUE_BUTTON = '#continue'
const S_PASSWORD_TEXT = '#ap_password'
const S_SIGNIN_BUTTON = '#signInSubmit'
// Login - Step 1
await EnterText(S_EMAIL_TEXT, USER_EMAIL); // USER-ACTION
await ClickNavigate(S_CONTINUE_BUTTON); // USER-ACTION
// Login - Step 2
await EnterText(S_PASSWORD_TEXT, USER_PASSWORD); // USER-ACTION
await ClickNavigate(S_SIGNIN_BUTTON); // USER-ACTION
// Enter password again - Secondary Protection - This is required only if you try to land on the page with sensitive information directly
await EnterText(S_PASSWORD_TEXT, USER_PASSWORD); // USER-ACTION
await ClickNavigate(S_SIGNIN_BUTTON); // USER-ACTION
// AddressBook constants
const U_ADDRESSBOOK = 'https://www.amazon.com/a/addresses'
const S_ADDRESS_TILE = '.normal-desktop-address-tile'
const S_ADDRESS_FULLNAME = '#address-ui-widgets-FullName'
const S_ADDRESS_LINEONE = '#address-ui-widgets-AddressLineOne'
const S_ADDRESS_LINETWO = '#address-ui-widgets-AddressLineTwo'
const S_ADDRESS_CITYSTATEPOSTALCODE ='#address-ui-widgets-CityStatePostalCode'
const S_ADDRESS_COUNTRY = '#address-ui-widgets-Country'
const S_ADDRESS_PHONENUMBER = '#address-ui-widgets-PhoneNumber'
const S_ADDRESS_NODEFAULT = '.address-section-no-default'
const S_ADDRESS_DEFAULT = '.default-section'
const S_ADDRESS_DEFAULT_FRESH = '#ya-myab-fresh-address-icon'
const S_ADDRESS_DEFAULT_AMAZON = '#ya-myab-default-shipping-address-icon'
// Commented, since this is now optimized
// ------------------------------------------
// // Go to AddressBook
// await Navigate(U_ADDRESSBOOK)
// ------------------------------------------
// Get All Addresses
const allAddressElements = await page.$$(S_ADDRESS_TILE);
const getAddresses = allAddressElements.map(async (addressElement) => {
let defaultAddressforAmazon = false
let defaultAddressforFresh = false
const defaultAddressElement = await addressElement.$(S_ADDRESS_DEFAULT)
if (defaultAddressElement !== null) {
const defaultAddressForAmazonElement = await defaultAddressElement.$(S_ADDRESS_DEFAULT_AMAZON)
defaultAddressforAmazon = defaultAddressForAmazonElement ? true: false
const defaultAddressForFreshElement = await defaultAddressElement.$(S_ADDRESS_DEFAULT_FRESH)
defaultAddressforFresh = defaultAddressForFreshElement ? true: false
}
const fullNameElement = await addressElement.$(S_ADDRESS_FULLNAME)
const fullName = await (await fullNameElement.getProperty('innerHTML')).jsonValue();
const addressLineOneElement = await addressElement.$(S_ADDRESS_LINEONE)
const addressLineOne = await (await addressLineOneElement.getProperty('innerHTML')).jsonValue();
const addressLineTwoElement = await addressElement.$(S_ADDRESS_LINETWO)
const addressLineTwo = addressLineTwoElement ? await (await addressLineTwoElement.getProperty('innerHTML')).jsonValue() : '';
const cityStatePostalCodeElement = await addressElement.$(S_ADDRESS_CITYSTATEPOSTALCODE)
const cityStatePostalCode = await (await cityStatePostalCodeElement.getProperty('innerHTML')).jsonValue();
const countryElement = await addressElement.$(S_ADDRESS_COUNTRY)
const country = await (await countryElement.getProperty('innerHTML')).jsonValue();
const phoneNumberElement = await addressElement.$(S_ADDRESS_PHONENUMBER)
let phoneNumber = await (await phoneNumberElement.getProperty('innerHTML')).jsonValue();
phoneNumber = phoneNumber.split(':')
phoneNumber = phoneNumber[1].trim()
return {
FullName: fullName,
AddressLineOne: addressLineOne,
AddressLineTwo: addressLineTwo,
CityStatePostalCode: cityStatePostalCode,
Country: country,
PhoneNumber: phoneNumber,
DefaultAddressforAmazon: defaultAddressforAmazon,
DefaultAddressforFresh: defaultAddressforFresh
}
});
let addresses = await Promise.all(getAddresses)
console.log(addresses)
await browser.close();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment