Created
May 7, 2018 19:50
-
-
Save nimatrueway/3ca139148cc428f0b72997a745357eb8 to your computer and use it in GitHub Desktop.
Puppeteer Shahrzad Link Extractor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import puppeteer from 'puppeteer' | |
import fs from "fs"; | |
import util from "util"; | |
let url = 'https://lotusplay.com/episode/533/1574ce'; | |
let domain = 'lotusplay.com'; | |
let cookieDumpFile = '/home/nima/cookies.txt'; | |
let browserAgent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0'; | |
let DEBUG = false; | |
let USE_COOKIE_AND_AGENT = true; | |
if (process.argv.length > 2) { | |
url = process.argv[2] | |
} | |
async function setCookies(page) { | |
const { promisify } = util; | |
const readFile = promisify(fs.readFile); | |
let dumpedCookies = await readFile(cookieDumpFile, 'utf8'); | |
let cookies = dumpedCookies.split("\n").map((item) => { | |
let [domain, session, path, secure, expires, name, value] = item.split("\t"); | |
let httpOnly = domain.startsWith("#HttpOnly_"); | |
if (httpOnly) | |
domain = domain.substring("#HttpOnly_".length); | |
return { | |
name, | |
value, | |
domain, | |
expires: parseInt(expires), | |
path, | |
httpOnly, | |
secure: secure === "TRUE", | |
session: session === "TRUE", | |
}; | |
}); | |
for (let idx in cookies) { | |
let cookie = cookies[idx]; | |
if (cookie.domain.endsWith(domain)) { | |
await page.setCookie(cookie); | |
} | |
} | |
} | |
(async function() { | |
const browser = await puppeteer.launch({ | |
headless: !DEBUG, | |
devtools: DEBUG, | |
userDataDir: USE_COOKIE_AND_AGENT ? undefined : "/home/nima/.config/puppeteer", | |
ignoreDefaultArgs: true, | |
args: [ | |
...(USE_COOKIE_AND_AGENT ? ["--user-agent="+browserAgent] : []), | |
"--disable-background-networking", | |
"--disable-background-timer-throttling", | |
"--disable-client-side-phishing-detection", | |
"--disable-default-apps", | |
"--disable-extensions", | |
"--disable-hang-monitor", | |
"--disable-popup-blocking", | |
"--disable-prompt-on-repost", | |
"--disable-sync", | |
"--disable-translate", | |
"--metrics-recording-only", | |
"--no-first-run", | |
"--blink-settings=imagesEnabled=false", | |
"--safebrowsing-disable-auto-update", | |
"--enable-automation", | |
"--password-store=basic", | |
// "--use-mock-keychain", | |
"--remote-debugging-port=0", | |
] | |
}); | |
const page = await browser.newPage(); | |
if (USE_COOKIE_AND_AGENT) | |
await setCookies(page); | |
await page.goto(url); | |
let result = null; | |
try { | |
result = await page.evaluate(async function run_inside_page() { | |
// 0. helper functions | |
async function wait(duration, _while = () => true) { | |
function delay(t) { | |
return new Promise(function (resolve) { | |
setTimeout(resolve.bind(null), t); | |
}); | |
} | |
for (let now = 0; now < duration; now += 500) { | |
await delay(500); | |
if (!_while()) | |
return false; | |
} | |
return true; | |
} | |
// 1. check if logged in | |
let isLoggedIn = $(".loggedIn").length; | |
if (!isLoggedIn) | |
throw Error("Not logged in !"); | |
// 2. find download button and click it | |
let linkContainer = $("#downloads ul li:eq(2)"); | |
let linkHref = linkContainer.find(".download-btn-wrapper a[href='#']"); | |
linkHref.click(); | |
// 3. find download button and click it | |
const wait_while = () => linkContainer.find(".download-address-wrapper input:visible").length === 0; | |
await wait(5000, wait_while); | |
if (wait_while()) | |
throw Error("The link was not generated"); | |
else | |
return linkContainer.find(".download-address-wrapper input:visible").val(); | |
}); | |
} catch (e) { | |
console.error(e.message); | |
if (!DEBUG) | |
process.exit(1); | |
return | |
} | |
console.log(result); | |
if (!DEBUG) { | |
await browser.close(); | |
process.exit(0); | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment