Skip to content

Instantly share code, notes, and snippets.

@magicaltoast
Created July 23, 2021 12:37
Show Gist options
  • Save magicaltoast/1fe097b92272aad8a972a52fe87968c2 to your computer and use it in GitHub Desktop.
Save magicaltoast/1fe097b92272aad8a972a52fe87968c2 to your computer and use it in GitHub Desktop.
Secret-agent recaptcha
import { Agent, LocationStatus } from 'secret-agent'
import { URL } from 'url'
import { ISuperElement } from 'awaited-dom/base/interfaces/super'
import child_process from 'child_process'
import axios from 'axios'
type async_func<T> = () => Promise<T>
const sleep = async (time: number) => { return new Promise(resolve => { setTimeout(resolve, time) }) }
const selector_race = async <T>(selectors: async_func<T>[], min_sleep_time: number = 1000, max_sleep_time: number = 5000): Promise<[T, number]> => {
let current_sleep_time = min_sleep_time
while (true) {
for (let i = 0; i < selectors.length; i++) {
const res = await selectors[i]()
if (res) {
await sleep(Math.max(current_sleep_time >> 1, min_sleep_time))
return [res, i]
}
}
await sleep(current_sleep_time)
current_sleep_time = Math.min(current_sleep_time << 1, max_sleep_time)
}
}
const getSiteKey = async (page: Agent) => {
await page.waitForElement(page.document.querySelector(CAPTCHA_SELECTOR))
return (new URL(await page.document.querySelector(CAPTCHA_SELECTOR).src)).searchParams.get("k")
}
const CAPTCHA_SELECTOR = "iframe[src^='https://www.google.com/recaptcha/api2/anchor']"
export class RecaptchaSolver {
providers: CaptchaHarvester
audio_providers: any[]
min_sleep_time: number
max_sleep_time: number
constructor(provider: CaptchaHarvester, min_sleep_time: number = 0, max_sleep_time: number = 0) {
this.providers = provider
this.audio_providers = []
this.max_sleep_time = max_sleep_time
this.min_sleep_time = min_sleep_time
}
add_provider() { }
challengeSelector = "iframe[src^='https://www.google.com/recaptcha/api2/bframe']"
async try_click_solve(agent: Agent): Promise<[ISuperElement, Boolean]> {
const iframeSelector = agent.document.querySelector(CAPTCHA_SELECTOR)
await agent.waitForElement(iframeSelector)
const recaptchaFrame = await agent.getFrameEnvironment(iframeSelector)
if (!recaptchaFrame) throw 'Recaptcha frame not found'
await recaptchaFrame.waitForLoad(LocationStatus.AllContentLoaded);
const solveButtonSelector = recaptchaFrame.document.querySelector('div.recaptcha-checkbox-border')
await recaptchaFrame.waitForElement(solveButtonSelector)
await solveButtonSelector.click()
const [element, result] = await selector_race([
(async () => await agent.document.querySelector(this.challengeSelector)),
(async () => await recaptchaFrame.document.querySelector("span#recaptcha-anchor.recaptcha-checkbox-checked")),
])
return [element, Boolean(result)]
}
async try_solve_audio(agent: Agent): Promise<boolean> {
const [challengeElement, click_result] = await this.try_click_solve(agent)
if (click_result) return true
const challengeFrame = (await agent.getFrameEnvironment(challengeElement))!
const audioButton = challengeFrame.document.querySelector("button#recaptcha-audio-button")
await challengeFrame.waitForElement(audioButton)
await audioButton.click()
const [element, result] = await selector_race([
(async () => await challengeFrame.document.querySelector('div.rc-doscaptcha-body')),
(async () => await challengeFrame.document.querySelector('.rc-audiochallenge-tdownload-link')),
])
if (result) {
console.log(await challengeFrame.document.querySelector('audio#audio-source').src)
return true
}
else {
console.log("Failed")
return false
}
}
async solve(agent: Agent) {
if (await this.try_solve_audio(agent)) return true
const token = await this.providers.solve()
//@ts-expect-error
agent.executeJs(`document.getElementById("g-recaptcha-response").innerHTML = "${token}"`)
}
}
export class CaptchaHarvester {
domain: string
dataKey: string
min_sleep_time: number
max_sleep_time: number
constructor(domain: string, dataKey: string, min_sleep_time: number = 1000, max_sleep_time: number = 20_000) {
this.dataKey = dataKey
this.domain = domain
this.min_sleep_time = min_sleep_time
this.max_sleep_time = max_sleep_time
}
async solve() {
let timeout = this.min_sleep_time
let i = 0
while (true) {
try {
const response = await axios.get(`http://127.0.0.1:5000/${this.domain}/token`)
if (response.status == 200) {
console.log(response.data)
return response.data
}
} catch (error) {
await sleep(timeout)
timeout = Math.min(this.max_sleep_time, timeout << 1)
}
}
}
start(additional_flags: { [key: string]: string } = {}) {
const serialized_flags = Object.entries(additional_flags).reduce((prev, [key, value]) => { return prev + `-${key} ${value} ` }, "")
const command = `harvester -k ${this.dataKey} -d ${this.domain} ${serialized_flags} recaptcha-v2`
const process = child_process.exec(command)
process.on('message', console.log)
process.once('spawn', () => console.debug(`Harvester spawned using "${command}"`))
}
static async fromPage(page: Agent): Promise<CaptchaHarvester> {
const domain = (new URL(await page.url)).hostname
const siteKey = await getSiteKey(page)
if (!siteKey) { throw 'Captcha not found' }
return new CaptchaHarvester(domain, siteKey)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment