-
-
Save friccobaldi/8abce5d9affcb1b7214d322bf081d9a1 to your computer and use it in GitHub Desktop.
Detect parked or inactive domains using browser automation and pattern matching.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| const { chromium } = require('playwright'); | |
| const fs = require('fs').promises; | |
| const path = require('path'); | |
| // List of domain names commonly found in URLs of parked pages | |
| const PARKING_URL_PATTERNS = [ | |
| 'above.com', | |
| 'afternic.com', | |
| 'bodis.com', | |
| 'brandbucket.com', | |
| 'dan.com', | |
| 'domainnamesales.com', | |
| 'dynadot.com', | |
| 'efty.com', | |
| 'enom.com', | |
| 'epik.com', | |
| 'flippa.com', | |
| 'godaddy.com', | |
| 'hover.com', | |
| 'hugedomains.com', | |
| 'name.com', | |
| 'namecheap.com', | |
| 'parklogic.com', | |
| 'parkingcrew.com', | |
| 'porkbun.com', | |
| 'sav.com', | |
| 'sedo.com', | |
| 'sedoparking.com', | |
| 'squadhelp.com', | |
| 'undeveloped.com', | |
| 'uniregistry.com', | |
| 'voodoo.com' | |
| ]; | |
| // Common parked domain indicators in page content | |
| const PARKED_CONTENT_PATTERNS = [ | |
| 'this domain is for sale', | |
| 'domain for sale', | |
| 'parked domain', | |
| 'this domain may be for sale', | |
| 'buy this domain', | |
| 'domain parking', | |
| 'expired domain' | |
| ]; | |
| class ParkedDomainScanner { | |
| constructor(concurrency = 4, takeScreenshots = false) { | |
| this.browser = null; | |
| this.results = []; | |
| this.concurrency = concurrency; | |
| this.takeScreenshots = takeScreenshots; | |
| } | |
| async init() { | |
| this.browser = await chromium.launch({ | |
| headless: true | |
| }); | |
| } | |
| async createWorkerContext() { | |
| const context = await this.browser.newContext({ | |
| userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| }); | |
| // Block images, fonts, and other resources to speed up loading | |
| await context.route('**/*', (route) => { | |
| const resourceType = route.request().resourceType(); | |
| if (['image', 'font', 'media', 'stylesheet'].includes(resourceType)) { | |
| route.abort(); | |
| } else { | |
| route.continue(); | |
| } | |
| }); | |
| const page = await context.newPage(); | |
| page.setDefaultTimeout(30000); // Reduced to 30 seconds | |
| return { context, page }; | |
| } | |
| async close() { | |
| if (this.browser) await this.browser.close(); | |
| } | |
| isParkedByUrl(url) { | |
| const matchedPattern = PARKING_URL_PATTERNS.find(pattern => url.includes(pattern)); | |
| return matchedPattern; | |
| } | |
| isParkedByContent(content) { | |
| const lowerContent = content.toLowerCase(); | |
| const matchedPattern = PARKED_CONTENT_PATTERNS.find(pattern => lowerContent.includes(pattern)); | |
| return matchedPattern; | |
| } | |
| isTimeoutError(error) { | |
| const timeoutKeywords = ['timeout', 'timed out', 'navigation timeout']; | |
| const errorMessage = error.toLowerCase(); | |
| return timeoutKeywords.some(keyword => errorMessage.includes(keyword)); | |
| } | |
| async takeScreenshot(page, url) { | |
| if (!this.takeScreenshots) return null; | |
| try { | |
| // Create screenshots directory if it doesn't exist | |
| const screenshotsDir = 'screenshots'; | |
| await fs.mkdir(screenshotsDir, { recursive: true }); | |
| // Generate filename from URL | |
| const domain = new URL(url).hostname; | |
| const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); | |
| const filename = `${domain}_${timestamp}.png`; | |
| const filepath = path.join(screenshotsDir, filename); | |
| // Take screenshot | |
| await page.screenshot({ | |
| path: filepath, | |
| fullPage: true, | |
| timeout: 30000 | |
| }); | |
| return filepath; | |
| } catch (error) { | |
| console.log(`Failed to take screenshot for ${url}: ${error.message}`); | |
| return null; | |
| } | |
| } | |
| async scanUrl(url) { | |
| const { context, page } = await this.createWorkerContext(); | |
| const result = { | |
| url: url, | |
| isParked: false, | |
| reason: null, | |
| title: null, | |
| statusCode: null, | |
| error: null, | |
| screenshot: null, | |
| timestamp: new Date().toISOString() | |
| }; | |
| try { | |
| console.log(`Scanning: ${url}`); | |
| // Track all network requests for this URL | |
| const requests = []; | |
| const requestHandler = (request) => { | |
| requests.push(request.url()); | |
| }; | |
| // Add request listener | |
| page.on('request', requestHandler); | |
| try { | |
| // Navigate to the URL | |
| const response = await page.goto(url, { | |
| waitUntil: 'networkidle', | |
| timeout: 30000 // Reduced to 30 seconds | |
| }); | |
| result.statusCode = response.status(); | |
| result.title = await page.title(); | |
| // Check if any request URL indicates parking | |
| const parkedUrlPattern = requests.find(reqUrl => this.isParkedByUrl(reqUrl)); | |
| if (parkedUrlPattern) { | |
| const matchedPattern = this.isParkedByUrl(parkedUrlPattern); | |
| result.isParked = true; | |
| result.reason = `Parked domain detected in network requests: "${matchedPattern}"`; | |
| console.log(`🚨 PARKED DOMAIN DETECTED: ${url} - Found pattern: "${matchedPattern}" in network requests`); | |
| } | |
| // Check page content for parking indicators | |
| if (!result.isParked) { | |
| const bodyText = await page.textContent('body').catch(() => ''); | |
| const matchedContentPattern = this.isParkedByContent(bodyText); | |
| if (matchedContentPattern) { | |
| result.isParked = true; | |
| result.reason = `Parked domain detected in page content: "${matchedContentPattern}"`; | |
| console.log(`🚨 PARKED DOMAIN DETECTED: ${url} - Found pattern: "${matchedContentPattern}" in page content`); | |
| } | |
| } | |
| // Only take screenshot if domain is actually parked (not just errors) | |
| if (result.isParked) { | |
| result.screenshot = await this.takeScreenshot(page, url); | |
| } | |
| } finally { | |
| // Remove request listener to prevent memory leaks | |
| page.off('request', requestHandler); | |
| } | |
| } catch (error) { | |
| result.error = error.message; | |
| // Only consider non-timeout errors as potential parking indicators | |
| // Timeout errors are now treated as regular errors, not parked domains | |
| if (this.isTimeoutError(error.message)) { | |
| result.isParked = false; | |
| result.reason = `Timeout error: ${error.message}`; | |
| console.log(`⏰ TIMEOUT: ${url} - ${error.message}`); | |
| } else { | |
| // Other errors might still indicate parking (like redirects to parking pages) | |
| result.isParked = false; // Changed: errors are no longer automatically considered parked | |
| result.reason = `Error occurred: ${error.message}`; | |
| console.log(`❌ ERROR: ${url} - ${error.message}`); | |
| } | |
| } finally { | |
| // Always clean up the context | |
| await context.close(); | |
| } | |
| return result; | |
| } | |
| async scanUrlsFromFile(filePath, outputFile) { | |
| try { | |
| const fileContent = await fs.readFile(filePath, 'utf8'); | |
| const urls = fileContent | |
| .split('\n') | |
| .map(line => line.trim()) | |
| .filter(line => line && !line.startsWith('#')) | |
| .map(url => { | |
| // Add protocol if missing | |
| if (!url.startsWith('http://') && !url.startsWith('https://')) { | |
| return `https://${url}`; | |
| } | |
| return url; | |
| }); | |
| console.log(`Found ${urls.length} URLs to scan`); | |
| // Load existing results if output file exists (for resume functionality) | |
| let existingResults = []; | |
| let processedUrls = new Set(); | |
| try { | |
| const existingData = await fs.readFile(outputFile, 'utf8'); | |
| const existingReport = JSON.parse(existingData); | |
| existingResults = existingReport.allResults || []; | |
| processedUrls = new Set(existingResults.map(r => r.url)); | |
| console.log(`Found ${existingResults.length} existing results, resuming from where we left off...`); | |
| } catch (error) { | |
| // File doesn't exist or is invalid, start fresh | |
| console.log('Starting fresh scan...'); | |
| } | |
| // Filter out already processed URLs | |
| const urlsToProcess = urls.filter(url => !processedUrls.has(url)); | |
| console.log(`${urlsToProcess.length} URLs remaining to process`); | |
| // Start with existing results | |
| this.results = [...existingResults]; | |
| // Process URLs in batches with concurrency control | |
| for (let i = 0; i < urlsToProcess.length; i += this.concurrency) { | |
| const batch = urlsToProcess.slice(i, i + this.concurrency); | |
| console.log(`\nProcessing batch ${Math.floor(i / this.concurrency) + 1}/${Math.ceil(urlsToProcess.length / this.concurrency)} (${batch.length} URLs)`); | |
| // Process batch concurrently | |
| const batchPromises = batch.map((url, idx) => { | |
| const globalIdx = i + idx; | |
| console.log(`[${globalIdx + 1}/${urlsToProcess.length}] Starting: ${url}`); | |
| return this.scanUrl(url); | |
| }); | |
| const batchResults = await Promise.all(batchPromises); | |
| this.results.push(...batchResults); | |
| // Save progress after each batch | |
| const report = this.generateReport(); | |
| await fs.writeFile(outputFile, JSON.stringify(report, null, 2)); | |
| console.log(`Batch completed. Progress saved to ${outputFile} (${this.results.length} total results)`); | |
| } | |
| return this.results; | |
| } catch (error) { | |
| throw new Error(`Failed to read file ${filePath}: ${error.message}`); | |
| } | |
| } | |
| getParkedDomains() { | |
| return this.results.filter(result => result.isParked); | |
| } | |
| getTimeoutDomains() { | |
| return this.results.filter(result => result.error && this.isTimeoutError(result.error)); | |
| } | |
| getErrorDomains() { | |
| return this.results.filter(result => result.error && !this.isTimeoutError(result.error)); | |
| } | |
| generateReport() { | |
| const parkedDomains = this.getParkedDomains(); | |
| const timeoutDomains = this.getTimeoutDomains(); | |
| const errorDomains = this.getErrorDomains(); | |
| const totalScanned = this.results.length; | |
| const totalParked = parkedDomains.length; | |
| const totalTimeouts = timeoutDomains.length; | |
| const totalErrors = errorDomains.length; | |
| return { | |
| summary: { | |
| totalScanned, | |
| totalParked, | |
| totalTimeouts, | |
| totalErrors, | |
| screenshotsEnabled: this.takeScreenshots, | |
| scanDate: new Date().toISOString() | |
| }, | |
| parkedDomains: parkedDomains.map(domain => ({ | |
| url: domain.url, | |
| reason: domain.reason, | |
| title: domain.title, | |
| statusCode: domain.statusCode, | |
| screenshot: domain.screenshot, | |
| timestamp: domain.timestamp | |
| })), | |
| timeoutDomains: timeoutDomains.map(domain => ({ | |
| url: domain.url, | |
| reason: domain.reason, | |
| error: domain.error, | |
| timestamp: domain.timestamp | |
| })), | |
| errorDomains: errorDomains.map(domain => ({ | |
| url: domain.url, | |
| reason: domain.reason, | |
| error: domain.error, | |
| timestamp: domain.timestamp | |
| })), | |
| allResults: this.results | |
| }; | |
| } | |
| } | |
| async function main() { | |
| const args = process.argv.slice(2); | |
| if (args.length === 0) { | |
| console.error('Usage: node parked-domain-scanner.js <url-file> [output-file] [options]'); | |
| console.error(''); | |
| console.error('Arguments:'); | |
| console.error(' url-file Path to file containing URLs (one per line)'); | |
| console.error(' output-file Optional output file for JSON results (default: result.txt)'); | |
| console.error(''); | |
| console.error('Options:'); | |
| console.error(' --screenshots Take screenshots of parked domains only'); | |
| console.error(''); | |
| console.error('Examples:'); | |
| console.error(' node parked-domain-scanner.js urls.txt results.json'); | |
| console.error(' node parked-domain-scanner.js urls.txt results.json --screenshots'); | |
| console.error(''); | |
| console.error('Note: Timeout errors are no longer considered parked domains.'); | |
| console.error(' Screenshots are only taken for actually parked domains.'); | |
| console.error(' Timeout reduced to 30 seconds for faster scanning.'); | |
| process.exit(1); | |
| } | |
| const urlFile = args[0]; | |
| const outputFile = args[1] || 'result.txt'; | |
| const takeScreenshots = args.includes('--screenshots'); | |
| // Check if input file exists | |
| try { | |
| await fs.access(urlFile); | |
| } catch (error) { | |
| console.error(`Error: Input file '${urlFile}' not found`); | |
| process.exit(1); | |
| } | |
| const scanner = new ParkedDomainScanner(5, takeScreenshots); // 5 concurrent scans | |
| try { | |
| console.log('Initializing browser...'); | |
| if (takeScreenshots) { | |
| console.log('Screenshots enabled - will be saved to ./screenshots/ directory (parked domains only)'); | |
| } | |
| await scanner.init(); | |
| console.log(`Reading URLs from ${urlFile}...`); | |
| await scanner.scanUrlsFromFile(urlFile, outputFile); | |
| const report = scanner.generateReport(); | |
| console.log('\n=== SCAN COMPLETE ==='); | |
| console.log(`Total URLs scanned: ${report.summary.totalScanned}`); | |
| console.log(`Parked domains found: ${report.summary.totalParked}`); | |
| console.log(`Timeout errors: ${report.summary.totalTimeouts}`); | |
| console.log(`Other errors: ${report.summary.totalErrors}`); | |
| if (takeScreenshots) { | |
| const screenshotsTaken = report.allResults.filter(r => r.screenshot).length; | |
| console.log(`Screenshots taken: ${screenshotsTaken} (parked domains only)`); | |
| } | |
| // Write final results to file | |
| await fs.writeFile(outputFile, JSON.stringify(report, null, 2)); | |
| console.log(`\nFinal results saved to: ${outputFile}`); | |
| // Display parked domains | |
| if (report.parkedDomains.length > 0) { | |
| console.log('\n=== PARKED DOMAINS ==='); | |
| report.parkedDomains.forEach(domain => { | |
| console.log(`${domain.url} - ${domain.reason}`); | |
| }); | |
| } | |
| // Display timeout summary | |
| if (report.timeoutDomains.length > 0) { | |
| console.log(`\n=== TIMEOUT DOMAINS (${report.timeoutDomains.length}) ===`); | |
| console.log('These domains timed out and are not considered parked:'); | |
| report.timeoutDomains.slice(0, 10).forEach(domain => { | |
| console.log(`${domain.url}`); | |
| }); | |
| if (report.timeoutDomains.length > 10) { | |
| console.log(`... and ${report.timeoutDomains.length - 10} more (see full report for details)`); | |
| } | |
| } | |
| } catch (error) { | |
| console.error('Error:', error.message); | |
| process.exit(1); | |
| } finally { | |
| await scanner.close(); | |
| } | |
| } | |
| // Run the scanner if this file is executed directly | |
| if (require.main === module) { | |
| main().catch(console.error); | |
| } | |
| module.exports = { ParkedDomainScanner }; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment