Created
August 19, 2025 20:24
-
-
Save olawanlejoel/fedcd8ed6182c979c8b6f8bb503dbd16 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| now we are more focused on the dev mode and not the AI mode. but here is the issue we experience. when we scrape on headless: true, we experience this thing where we get the parameter to be injected but i dont know if it injects but it ends up not solving. but when we set headless to false then it scrapes | |
| Edit | |
| Relevant chats | |
| I see the issue! This is a common problem with Cloudflare Turnstile captchas in headless mode. Let me analyze what's happening: | |
| The Problem | |
| When headless: true, you're getting the Turnstile parameters intercepted correctly, but the captcha isn't actually being solved. When headless: false, it works fine. This suggests a few potential issues: | |
| 1. Headless Detection by Cloudflare | |
| Cloudflare can detect headless browsers through various fingerprinting techniques, even when you think you've masked them. | |
| 2. Callback Timing Issues in Headless Mode | |
| The cfCallback function injection might be happening at the wrong time in headless mode due to different rendering behavior. | |
| 3. Missing Browser Features in Headless | |
| Some browser APIs or behaviors that Turnstile relies on might be missing or behave differently in headless mode. | |
| Debugging Steps | |
| Let's add some debugging to understand what's happening. Here are the changes I'd suggest: | |
| 1. Enhanced Logging in the Injection Script | |
| Update utils/captcha/injectTurnstileParams.ts: | |
| typescript | |
| // Type assertion to bypass TypeScript checks for external libraries | |
| const windowWithTurnstile = window as any; | |
| console.clear = () => console.log("Console was cleared"); | |
| console.log("π§ Turnstile injection script loaded"); | |
| const i = setInterval(() => { | |
| if (windowWithTurnstile.turnstile) { | |
| console.log("β Turnstile object found, setting up interception"); | |
| clearInterval(i); | |
| const originalRender = windowWithTurnstile.turnstile.render; | |
| windowWithTurnstile.turnstile.render = (a: any, b: any) => { | |
| console.log("π― Turnstile render intercepted", { a, b }); | |
| let params = { | |
| sitekey: b.sitekey, | |
| pageurl: window.location.href, | |
| data: b.cData, | |
| pagedata: b.chlPageData, | |
| action: b.action, | |
| userAgent: navigator.userAgent, | |
| json: 1, | |
| }; | |
| console.log("π€ Sending intercepted params:", params); | |
| console.log("intercepted-params:" + JSON.stringify(params)); | |
| windowWithTurnstile.cfCallback = (token: string) => { | |
| console.log("π Callback received with token:", token ? "β Token received" : "β No token"); | |
| if (b.callback && typeof b.callback === 'function') { | |
| console.log("π Calling original callback with token"); | |
| b.callback(token); | |
| } else { | |
| console.warn("β οΈ No original callback found"); | |
| } | |
| }; | |
| // Store original callback for debugging | |
| windowWithTurnstile.originalCallback = b.callback; | |
| console.log("π§ Callback setup complete"); | |
| }; | |
| } else { | |
| console.log("β³ Waiting for turnstile object..."); | |
| } | |
| }, 50); | |
| // Add timeout to prevent infinite waiting | |
| setTimeout(() => { | |
| clearInterval(i); | |
| console.log("β° Turnstile interception timeout - object not found"); | |
| }, 10000); | |
| 2. Enhanced Turnstile Handler with Better Error Handling | |
| Update the setupTurnstileHandler function in services/scrape.service.ts: | |
| typescript | |
| function setupTurnstileHandler(page: Page | any): Promise<void> { | |
| return new Promise<void>((resolve, reject) => { | |
| let resolved = false; | |
| const timeout = setTimeout(() => { | |
| if (!resolved) { | |
| resolved = true; | |
| reject(new Error("Turnstile handler timeout - no params received")); | |
| } | |
| }, 45000); // 45 second timeout | |
| page.on("console", async (msg: any) => { | |
| const txt = msg.text(); | |
| console.log("π Console message:", txt); | |
| if (txt.includes("intercepted-params:")) { | |
| if (resolved) { | |
| console.log("β οΈ Received params after already resolved"); | |
| return; | |
| } | |
| try { | |
| const params = JSON.parse(txt.replace("intercepted-params:", "")); | |
| console.log("β οΈ Turnstile params intercepted:", params); | |
| const { Solver } = await import("@2captcha/captcha-solver"); | |
| const solver = new Solver(process.env.CAPTCHA_API_KEY!); | |
| console.log("π Sending to 2captcha..."); | |
| const res = await solver.cloudflareTurnstile(params); | |
| console.log("β Turnstile solved:", res.id); | |
| console.log("π Injecting token..."); | |
| await page.evaluate((token: string) => { | |
| console.log("π― Token injection executing:", token ? "β Has token" : "β No token"); | |
| if ((window as any).cfCallback) { | |
| console.log("π Calling cfCallback"); | |
| (window as any).cfCallback(token); | |
| } else { | |
| console.error("β cfCallback not found on window"); | |
| } | |
| }, res.data); | |
| console.log("β Token injection complete"); | |
| resolved = true; | |
| clearTimeout(timeout); | |
| resolve(); | |
| } catch (e) { | |
| console.error("β Turnstile solving error:", e); | |
| if (!resolved) { | |
| resolved = true; | |
| clearTimeout(timeout); | |
| reject(e); | |
| } | |
| } | |
| } | |
| }); | |
| }); | |
| } | |
| 3. Better Headless Browser Configuration | |
| Update your browser launch options in utils/getBrowser.ts to better mimic a real browser: | |
| typescript | |
| export async function getBrowser( | |
| userUuid: string, | |
| useProxy = false | |
| ): Promise<Browser> { | |
| const now = Date.now(); | |
| const cached = browserPool.get(userUuid); | |
| if (cached && now - cached.lastUsed < MAX_BROWSER_AGE) { | |
| cached.lastUsed = now; | |
| return cached.browser; | |
| } | |
| if (cached) await cached.browser.close(); | |
| const launchOptions: Parameters<typeof chromium.launch>[0] = { | |
| headless: true, | |
| args: [ | |
| "--disable-dev-shm-usage", | |
| "--no-sandbox", | |
| // Add these for better headless compatibility | |
| "--disable-blink-features=AutomationControlled", | |
| "--disable-features=VizDisplayCompositor", | |
| "--disable-background-networking", | |
| "--disable-background-timer-throttling", | |
| "--disable-backgrounding-occluded-windows", | |
| "--disable-renderer-backgrounding", | |
| "--disable-field-trial-config", | |
| "--disable-ipc-flooding-protection", | |
| // Canvas fingerprinting | |
| "--disable-canvas-aa", | |
| "--disable-2d-canvas-clip-aa", | |
| // WebGL fingerprinting | |
| "--disable-gl-drawing-for-tests", | |
| "--disable-accelerated-2d-canvas" | |
| ], | |
| }; | |
| if (useProxy) { | |
| launchOptions.proxy = { | |
| server: process.env.PROXY_SERVER!, | |
| username: process.env.PROXY_USER!, | |
| password: process.env.PROXY_PASS!, | |
| }; | |
| } | |
| const browser = await chromium.launch(launchOptions); | |
| // Add stealth modifications after launch | |
| const context = await browser.newContext({ | |
| userAgent: require("./captcha/userAgents").getRandomUserAgent(), | |
| viewport: { width: 1366, height: 768 }, | |
| locale: 'en-US', | |
| timezoneId: 'America/New_York' | |
| }); | |
| // Override navigator properties to avoid detection | |
| await context.addInitScript(() => { | |
| // Remove webdriver property | |
| delete (navigator as any).webdriver; | |
| // Override the plugins property | |
| Object.defineProperty(navigator, 'plugins', { | |
| get: () => [1, 2, 3, 4, 5] | |
| }); | |
| // Override the languages property | |
| Object.defineProperty(navigator, 'languages', { | |
| get: () => ['en-US', 'en'] | |
| }); | |
| // Mock webgl | |
| const getParameter = WebGLRenderingContext.prototype.getParameter; | |
| WebGLRenderingContext.prototype.getParameter = function(parameter) { | |
| if (parameter === 37445) { | |
| return 'Intel Open Source Technology Center'; | |
| } | |
| if (parameter === 37446) { | |
| return 'Mesa DRI Intel(R) Ivybridge Mobile '; | |
| } | |
| return getParameter(parameter); | |
| }; | |
| }); | |
| browserPool.set(userUuid, { browser, lastUsed: now }); | |
| return browser; | |
| } | |
| 4. Test with New Context Creation | |
| Since you're using a browser pool, try creating a fresh context for each captcha-heavy operation: | |
| typescript | |
| // In scrapePage function, after getting the browser | |
| if (aiMode) { | |
| // AI mode logic stays the same | |
| } else { | |
| // Instead of using context from pool, create fresh one for captcha pages | |
| const freshContext = await browser.newContext({ | |
| userAgent, | |
| viewport: { width: 1366, height: 768 }, | |
| }); | |
| // Add the same stealth scripts as above | |
| await freshContext.addInitScript(/* stealth script */); | |
| const page = await freshContext.newPage(); | |
| // ... rest of the logic | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment