Skip to content

Instantly share code, notes, and snippets.

@crock
Last active January 7, 2024 21:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save crock/cad9d098482d97b54a154bddd244bc93 to your computer and use it in GitHub Desktop.
Save crock/cad9d098482d97b54a154bddd244bc93 to your computer and use it in GitHub Desktop.
Scrape all expiring, pending delete, and exclusive auction lists from NameJet.com and SnapNames.com
.cache
node_modules
.DS_Store
package-lock.json
lists
tmp

Drop List Scraper

Requirements

Installation

  1. Clone the gist

    git clone https://gist.github.com/cad9d098482d97b54a154bddd244bc93.git droplist-scraper
  2. Change directory

    cd droplist-scraper
  3. Install dependencies

    npm install

Usage

These commands will generate a series of files in a list directory and sorted by platform and list type.

npm run namejet
npm run snapnames
npm run godaddy
{
"author": {
"name": "Alex Crocker",
"email": "alex@croc.io"
},
"scripts": {
"namejet": "ts-node scrape-namejet.ts",
"snapnames": "ts-node scrape-snapnames.ts",
"godaddy": "ts-node scrape-godaddy.ts"
},
"dependencies": {
"basic-ftp": "^5.0.4",
"csv-parser": "^3.0.0",
"moment": "^2.29.4",
"puppeteer": "^21.6.1",
"yauzl": "^2.10.0"
},
"devDependencies": {
"@types/node": "^18.11.0",
"ts-node": "^10.9.1",
"typescript": "^4.8.4"
}
}
const {join} = require('path');
/**
* @type {import("puppeteer").Configuration}
*/
module.exports = {
// Changes the cache location for Puppeteer.
cacheDirectory: join(__dirname, '.cache', 'puppeteer'),
};
import fs from 'fs';
import path from 'path';
import { Client } from 'basic-ftp';
import yauzl from 'yauzl';
const TMP_DIR = path.join(__dirname, 'tmp');
const GD_EXCLUSIVE_AUCTION_DIR = path.join(__dirname, 'lists/godaddy/in_auction');
if (!fs.existsSync(TMP_DIR)) {
fs.mkdirSync(TMP_DIR);
}
if (!fs.existsSync(GD_EXCLUSIVE_AUCTION_DIR)) {
fs.mkdirSync(GD_EXCLUSIVE_AUCTION_DIR, { recursive: true });
}
const ftpConfig = {
host: 'ftp.godaddy.com',
user: 'auctions',
password: '',
secure: false,
};
const client = new Client(0);
const tempLocalFilePath = path.join(TMP_DIR, 'all_expiring_auctions.json')
const remoteFilePath = `/all_expiring_auctions.json.zip`;
function processData() {
const fileContent = fs.readFileSync(tempLocalFilePath, 'utf-8');
const json = JSON.parse(fileContent);
const listings = json.data
console.log('Writing json file to disk...')
fs.writeFileSync(path.join(GD_EXCLUSIVE_AUCTION_DIR, 'all_expiring_auctions.json'), JSON.stringify(listings));
console.log('Deleting temporary files to save space...')
fs.rmSync(TMP_DIR, { force: true, recursive: true })
}
(async () => {
try {
console.log('Connecting to GoDaddy FTP server...')
await client.access(ftpConfig);
console.log('Downloading zip file from GoDaddy FTP server...')
await client.downloadTo(fs.createWriteStream(path.join(TMP_DIR, 'all_expiring_auctions.json.zip')), remoteFilePath);
console.log('Unzipping file...')
yauzl.open(path.join(TMP_DIR, 'all_expiring_auctions.json.zip'), { lazyEntries: true }, function(err, zipfile) {
if (err) throw err;
zipfile.readEntry();
zipfile.on("entry", function(entry) {
console.log(`Extracting ${entry.fileName}...`)
if (/all_expiring_auctions.json/.test(entry.fileName)) {
zipfile.openReadStream(entry, function(err, readStream) {
if (err) throw err;
readStream.on("end", processData);
readStream.pipe(fs.createWriteStream(tempLocalFilePath));
});
}
});
});
} catch (err) {
console.error('Error downloading file:', err);
} finally {
// Close the FTP connection
await client.close();
}
})();
import puppeteer from "puppeteer";
import fs from 'fs';
import path from 'path';
import moment from 'moment';
import csv from "csv-parser";
const NJ_PENDING_DELETE_DIR = path.join(__dirname, 'lists/namejet/pending_delete');
const NJ_EXPIRING_DIR = path.join(__dirname, 'lists/namejet/expiring');
const NJ_EXCLUSIVE_AUCTION_DIR = path.join(__dirname, 'lists/namejet/in_auction');
if (!fs.existsSync(NJ_EXPIRING_DIR)) {
fs.mkdirSync(NJ_EXPIRING_DIR, { recursive: true });
}
if (!fs.existsSync(NJ_PENDING_DELETE_DIR)) {
fs.mkdirSync(NJ_PENDING_DELETE_DIR, { recursive: true });
}
if (!fs.existsSync(NJ_EXCLUSIVE_AUCTION_DIR)) {
fs.mkdirSync(NJ_EXCLUSIVE_AUCTION_DIR, { recursive: true });
}
const basePage = 'https://www.namejet.com/download.action?format=csv';
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto(basePage);
const target = page.target();
const session = await target.createCDPSession();
const result1: any = await page.evaluate('Array.from(document.querySelectorAll("#list-subscribe > div:nth-child(3) a")).map(a => ({href: a.href, name: a.title}))') // Expiring Domains
const result2: any = await page.evaluate('Array.from(document.querySelectorAll("#list-subscribe > div:nth-child(4) a")).map(a => ({href: a.href, name: a.title}))') // Pending Delete Domains
const result3: any = await page.evaluate('Array.from(document.querySelectorAll("#list-subscribe > div:nth-child(2) a")).map(a => ({href: a.href, name: a.title}))') // Exclusive Auctions
const allDomains = [...result1, ...result2, ...result3];
const datedEntries = allDomains.filter((item) => {
const match = item.name.match(/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d+/);
// get closest year to matched date
let closestYear = moment().year();
// if the month is less than the current month, add a year
if (moment(match, 'MMM DD').month() < moment().month()) {
closestYear + 1;
}
if (match) {
item.date = moment(`${match}, ${closestYear}`, 'MMM DD').format('YYYY-MM-DD');
return item
}
});
const entries = datedEntries.filter(entry => {
if (entry.name.match(/expiring/)) {
entry.type = 'expiring'
}
if (entry.name.match(/deleting/)) {
if (entry.href.match(/live/)) {
entry.type = 'in_auction'
} else {
entry.type = 'pending_delete'
}
}
return entry
})
for (const entry of entries) {
const { name, href, type } = entry;
console.log(`Downloading ${name}...`);
const downloadPath = path.join(__dirname, `lists/namejet/${type}/`);
await session.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: downloadPath
});
await page.click(`a[href="${href.split('.com/')[1]}"]`);
await page.goto(basePage);
}
await new Promise((resolve) => {
setTimeout(resolve, 5000);
}
);
console.log(`Downloaded ${entries.length} files.`)
await page.close();
await browser.close();
const expiringFiles = fs.readdirSync(NJ_EXPIRING_DIR, { withFileTypes: true })
.filter(dirent => dirent.name.endsWith('.csv'));
const pendingDeleteFiles = fs.readdirSync(NJ_PENDING_DELETE_DIR, { withFileTypes: true })
.filter(dirent => dirent.name.endsWith('.csv'));
const exclusiveAuctionFiles = fs.readdirSync(NJ_EXCLUSIVE_AUCTION_DIR, { withFileTypes: true })
.filter(dirent => dirent.name.endsWith('.csv'));
for (const file of expiringFiles) {
const filePath = path.join(NJ_EXPIRING_DIR, file.name);
const parsed: any[] = await new Promise((resolve, reject) => {
const results = [];
fs.createReadStream(filePath)
.pipe(csv())
.on('data', (data) => results.push(data))
.on('end', () => resolve(results))
.on('error', reject);
});
console.log(`Parsed ${parsed.length} domains from ${file.name}.`);
const domains = parsed.map((item) => ({
fqdn: item["Domain Name"],
currentBid: item["Current Bid"],
preorderByDate: item["Join By Date (ET)"],
}))
const output = JSON.stringify(domains);
fs.writeFileSync(filePath.replace('.csv', '.json'), output);
}
for (const file of pendingDeleteFiles) {
const filePath = path.join(NJ_PENDING_DELETE_DIR, file.name);
const parsed: any[] = await new Promise((resolve, reject) => {
const results = [];
fs.createReadStream(filePath)
.pipe(csv())
.on('data', (data) => results.push(data))
.on('end', () => resolve(results))
.on('error', reject);
});
console.log(`Parsed ${parsed.length} domains from ${file.name}.`);
const domains = parsed.map((item) => ({
fqdn: item["Domain Name"],
currentBid: item["Current Bid"],
preorderByDate: item["Join By Date (ET)"],
}))
const output = JSON.stringify(domains);
fs.writeFileSync(filePath.replace('.csv', '.json'), output);
}
for (const file of exclusiveAuctionFiles) {
const filePath = path.join(NJ_EXCLUSIVE_AUCTION_DIR, file.name);
const parsed: any[] = await new Promise((resolve, reject) => {
const results = [];
fs.createReadStream(filePath)
.pipe(csv({ skipLines: 2 }))
.on('data', (data) => results.push(data))
.on('end', () => resolve(results))
.on('error', reject);
});
console.log(`Parsed ${parsed.length} domains from ${file.name}.`);
const domains = parsed.map((item) => ({
fqdn: item["Domain name"],
currentBid: item["Current bid"],
endDate: item["Auction end date"],
}))
const output = JSON.stringify(domains);
fs.writeFileSync(filePath.replace('.csv', '.json'), output);
}
})();
import puppeteer from "puppeteer";
import fs from 'fs';
import path from 'path';
import moment from 'moment';
import csv from 'csv-parser';
const SN_PENDING_DELETE_DIR = path.join(__dirname, 'lists/snapnames/pending_delete');
const SN_EXPIRING_DIR = path.join(__dirname, 'lists/snapnames/expiring');
const SN_EXCLUSIVE_AUCTION_DIR = path.join(__dirname, 'lists/snapnames/in_auction');
if (!fs.existsSync(SN_EXPIRING_DIR)) {
fs.mkdirSync(SN_EXPIRING_DIR, { recursive: true });
}
if (!fs.existsSync(SN_PENDING_DELETE_DIR)) {
fs.mkdirSync(SN_PENDING_DELETE_DIR, { recursive: true });
}
if (!fs.existsSync(SN_EXCLUSIVE_AUCTION_DIR)) {
fs.mkdirSync(SN_EXCLUSIVE_AUCTION_DIR, { recursive: true });
}
const basePage = 'https://www.snapnames.com/download.action?format=csv';
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto(basePage);
const target = page.target();
const session = await target.createCDPSession();
const result1: any = await page.evaluate('Array.from(document.querySelectorAll("#list-subscribe > div:nth-child(3) a")).map(a => ({href: a.href, name: a.title}))') // Expiring Domains
const result2: any = await page.evaluate('Array.from(document.querySelectorAll("#list-subscribe > div:nth-child(4) a")).map(a => ({href: a.href, name: a.title}))') // Pending Delete Domains
const result3: any = await page.evaluate('Array.from(document.querySelectorAll("#list-subscribe > div:nth-child(2) a")).map(a => ({href: a.href, name: a.title}))') // Exclusive Auctions
const allDomains = [...result1, ...result2, ...result3];
const datedEntries = allDomains.filter((item) => {
const match = item.name.match(/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d+/);
// get closest year to matched date
let closestYear = moment().year();
// if the month is less than the current month, add a year
if (moment(match, 'MMM DD').month() < moment().month()) {
closestYear + 1;
}
if (match) {
item.date = moment(`${match}, ${closestYear}`, 'MMM DD').format('YYYY-MM-DD');
return item
}
});
const entries = datedEntries.filter(entry => {
if (entry.name.match(/expiring/)) {
entry.type = 'expiring'
}
if (entry.name.match(/deleting/)) {
if (entry.href.match(/live/)) {
entry.type = 'in_auction'
} else {
entry.type = 'pending_delete'
}
}
return entry
})
for (const entry of entries) {
const { name, href, type } = entry;
console.log(`Downloading ${name}...`);
const downloadPath = path.join(__dirname, `lists/snapnames/${type}/`);
await session.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: downloadPath
});
await page.click(`a[href="${href.split('.com/')[1]}"]`);
await page.goto(basePage);
}
await new Promise((resolve) => {
setTimeout(resolve, 5000);
}
);
console.log(`Downloaded ${entries.length} files.`)
await page.close();
await browser.close();
const expiringFiles = fs.readdirSync(SN_EXPIRING_DIR, { withFileTypes: true })
.filter(dirent => dirent.name.endsWith('.csv'));
const pendingDeleteFiles = fs.readdirSync(SN_PENDING_DELETE_DIR, { withFileTypes: true })
.filter(dirent => dirent.name.endsWith('.csv'));
const exclusiveAuctionFiles = fs.readdirSync(SN_EXCLUSIVE_AUCTION_DIR, { withFileTypes: true })
.filter(dirent => dirent.name.endsWith('.csv'));
for (const file of expiringFiles) {
const filePath = path.join(SN_EXPIRING_DIR, file.name);
const fileContents = fs.readFileSync(filePath, 'utf-8');
const parsed: any[] = await new Promise((resolve, reject) => {
const results = [];
fs.createReadStream(filePath)
.pipe(csv())
.on('data', (data) => results.push(data))
.on('end', () => resolve(results))
.on('error', reject);
});
console.log(`Parsed ${parsed.length} domains from ${file.name}.`);
const domains = parsed.map((item) => ({
fqdn: item["Domain Name"],
currentBid: item["Current Bid"],
preorderByDate: item["Join By Date (ET)"],
}))
const output = JSON.stringify(domains);
fs.writeFileSync(filePath.replace('.csv', '.json'), output);
}
for (const file of pendingDeleteFiles) {
const filePath = path.join(SN_PENDING_DELETE_DIR, file.name);
const fileContents = fs.readFileSync(filePath, 'utf-8');
const parsed: any[] = await new Promise((resolve, reject) => {
const results = [];
fs.createReadStream(filePath)
.pipe(csv())
.on('data', (data) => results.push(data))
.on('end', () => resolve(results))
.on('error', reject);
});
console.log(`Parsed ${parsed.length} domains from ${file.name}.`);
const domains = parsed.map((item) => ({
fqdn: item["Domain Name"],
currentBid: item["Current Bid"],
preorderByDate: item["Join By Date (ET)"],
}))
const output = JSON.stringify(domains);
fs.writeFileSync(filePath.replace('.csv', '.json'), output);
}
for (const file of exclusiveAuctionFiles) {
const filePath = path.join(SN_EXCLUSIVE_AUCTION_DIR, file.name);
const parsed: any[] = await new Promise((resolve, reject) => {
const results = [];
fs.createReadStream(filePath)
.pipe(csv({ skipLines: 2 }))
.on('data', (data) => results.push(data))
.on('end', () => resolve(results))
.on('error', reject);
});
console.log(`Parsed ${parsed.length} domains from ${file.name}.`);
const domains = parsed.map((item) => ({
fqdn: item["Domain name"],
currentBid: item["Current bid"],
endDate: item["Auction end date"],
}))
const output = JSON.stringify(domains);
fs.writeFileSync(filePath.replace('.csv', '.json'), output);
}
})();
{
"compilerOptions": {
"target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
"module": "commonjs", /* Specify what module code is generated. */
"rootDir": ".", /* Specify the root folder within your source files. */
"outDir": "./dist", /* Specify an output folder for all emitted files. */
"allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
"strict": false, /* Enable all strict type-checking options. */
"skipLibCheck": true /* Skip type checking all .d.ts files. */
},
"exclude": [
"node_modules",
".cache",
"dist"
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment