Created
November 26, 2024 04:46
-
-
Save Makermed/dd0c7e525a394000430fc3ad5af32237 to your computer and use it in GitHub Desktop.
A Scraper that turns Bluesky Starter Packs into Lists
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @fileoverview Script to merge Bluesky starter pack members into a list | |
* @requires dotenv | |
*/ | |
import dotenv from 'dotenv'; | |
import { promises as fs } from 'fs'; | |
import fetch from 'node-fetch'; | |
import path from 'path'; | |
import { fileURLToPath } from 'url'; | |
// Get the directory name of the current module | |
const __filename = fileURLToPath(import.meta.url); | |
const __dirname = path.dirname(__filename); | |
// Initialize dotenv with explicit path to .env file | |
const result = dotenv.config({ | |
path: path.resolve(__dirname, '..', '.env'), | |
debug: true | |
}); | |
if (result.error) { | |
console.error("Error loading .env file:", result.error); | |
process.exit(1); | |
} | |
// Debug log to see all environment variables | |
console.log("All environment variables:", { | |
...process.env, | |
BLUESKY_PASSWORD: process.env.BLUESKY_PASSWORD ? '***' : undefined | |
}); | |
// Update the environment variable names to match .env | |
const BSKY_HANDLE = process.env.BLUESKY_USERNAME; | |
const BSKY_PASSWORD = process.env.BLUESKY_PASSWORD; | |
console.log("Loaded credentials:", { | |
handle: BSKY_HANDLE, | |
password: BSKY_PASSWORD ? '***' : undefined | |
}); | |
if (!BSKY_HANDLE || !BSKY_PASSWORD) { | |
console.error("Missing required environment variables. Please check your .env file:"); | |
console.error("BLUESKY_USERNAME:", BSKY_HANDLE ? "✓" : "✗"); | |
console.error("BLUESKY_PASSWORD:", BSKY_PASSWORD ? "✓" : "✗"); | |
process.exit(1); | |
} | |
class BlueskyAPI { | |
/** | |
* @param {string} handle - Bluesky handle | |
* @param {string} password - App password | |
*/ | |
constructor(handle, password) { | |
this.apiEndpoint = "https://bsky.social/xrpc/"; | |
this.handle = handle; | |
this.password = password; | |
this.accessJwt = null; | |
this.did = null; | |
} | |
/** | |
* Initialize API connection and get authentication token | |
* @returns {Promise<void>} | |
*/ | |
async init() { | |
try { | |
console.log(`Initializing Bluesky API connection for ${this.handle}...`); | |
const response = await fetch(`${this.apiEndpoint}com.atproto.server.createSession`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json" | |
}, | |
body: JSON.stringify({ | |
"identifier": this.handle, | |
"password": this.password | |
}) | |
}); | |
if (!response.ok) { | |
const errorData = await response.json(); | |
console.error("Authentication failed:", errorData); | |
throw new Error(`Authentication failed: ${errorData.message || response.statusText}`); | |
} | |
const data = await response.json(); | |
this.accessJwt = data.accessJwt; | |
this.did = data.did; | |
console.log("Successfully authenticated with Bluesky"); | |
console.log("DID:", this.did); | |
} catch (error) { | |
console.error("Failed to initialize Bluesky API:", error); | |
throw error; | |
} | |
} | |
/** | |
* Get the AT-URI for a starter pack | |
* @param {string} userHandle - User's handle | |
* @param {string} packId - Starter pack ID | |
* @returns {Promise<string>} Pack AT-URI | |
*/ | |
async getStarterPackUri(userHandle, packId) { | |
try { | |
console.log(`Getting starter pack URI for handle: ${userHandle}, packId: ${packId}`); | |
// First get the user's DID using the public API | |
const profileResponse = await fetch( | |
`${this.apiEndpoint}app.bsky.actor.getProfile?actor=${userHandle}`, | |
{ | |
headers: { | |
Authorization: `Bearer ${this.accessJwt}` | |
} | |
} | |
); | |
if (!profileResponse.ok) { | |
const errorData = await profileResponse.json(); | |
console.error("Profile lookup failed:", errorData); | |
throw new Error(`Failed to get profile: ${profileResponse.statusText}`); | |
} | |
const profileData = await profileResponse.json(); | |
const did = profileData.did; | |
// Get all starter packs for the user | |
const packsResponse = await fetch( | |
`${this.apiEndpoint}app.bsky.graph.getActorStarterPacks?actor=${userHandle}`, | |
{ | |
headers: { | |
Authorization: `Bearer ${this.accessJwt}` | |
} | |
} | |
); | |
if (!packsResponse.ok) { | |
const errorData = await packsResponse.json(); | |
console.error("Failed to get starter packs:", errorData); | |
throw new Error(`Failed to get starter packs: ${packsResponse.statusText}`); | |
} | |
const packsData = await packsResponse.json(); | |
console.log("Got starter packs:", packsData); | |
// Find the specific pack we want | |
const pack = packsData.starterPacks.find(p => { | |
const parts = p.uri.split('/'); | |
return parts[parts.length - 1] === packId; | |
}); | |
if (!pack) { | |
throw new Error("Starter pack not found"); | |
} | |
// Get the specific pack details | |
const packResponse = await fetch( | |
`${this.apiEndpoint}app.bsky.graph.getStarterPack?starterPack=${pack.uri}`, | |
{ | |
headers: { | |
Authorization: `Bearer ${this.accessJwt}` | |
} | |
} | |
); | |
if (!packResponse.ok) { | |
const errorData = await packResponse.json(); | |
console.error("Failed to get starter pack details:", errorData); | |
throw new Error(`Failed to get starter pack details: ${packResponse.statusText}`); | |
} | |
const packData = await packResponse.json(); | |
console.log("Got starter pack details:", packData); | |
const listUri = packData.starterPack.list.uri; | |
console.log("Found list URI:", listUri); | |
return listUri; | |
} catch (error) { | |
console.error("Failed to get starter pack URI:", error); | |
throw error; | |
} | |
} | |
/** | |
* Get list members and add them to target list | |
* @param {string} sourceListUri - Source list AT-URI | |
* @param {string} targetListUri - Target list AT-URI | |
*/ | |
async mergeListMembers(sourceListUri, targetListUri) { | |
try { | |
console.log(`Merging members from ${sourceListUri} to ${targetListUri}`); | |
const members = []; | |
let cursor = ""; | |
// Get all members from source list | |
do { | |
const response = await fetch( | |
`${this.apiEndpoint}app.bsky.graph.getList?list=${sourceListUri}${cursor ? `&cursor=${cursor}` : ""}`, | |
{ | |
headers: { | |
Authorization: `Bearer ${this.accessJwt}` | |
} | |
} | |
); | |
if (!response.ok) { | |
const errorData = await response.json(); | |
console.error("Failed to get list:", errorData); | |
throw new Error(`Failed to get list: ${response.statusText}`); | |
} | |
const data = await response.json(); | |
console.log("Got list data:", { | |
items: data.items?.length || 0, | |
cursor: data.cursor || 'none' | |
}); | |
if (data.items && data.items.length > 0) { | |
members.push(...data.items.map(item => ({ | |
did: item.subject.did, | |
handle: item.subject.handle | |
}))); | |
} | |
cursor = data.cursor || ""; | |
} while (cursor); | |
console.log(`Found ${members.length} members in source list`); | |
// Add each member to the target list | |
for (const member of members) { | |
try { | |
const response = await fetch(`${this.apiEndpoint}com.atproto.repo.createRecord`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json", | |
"Authorization": `Bearer ${this.accessJwt}` | |
}, | |
body: JSON.stringify({ | |
collection: "app.bsky.graph.listitem", | |
repo: this.did, | |
record: { | |
subject: member.did, | |
list: targetListUri, | |
createdAt: new Date().toISOString(), | |
"$type": "app.bsky.graph.listitem" | |
} | |
}) | |
}); | |
if (!response.ok) { | |
const errorData = await response.json(); | |
if (errorData.error?.includes("duplicate")) { | |
console.log(`Member ${member.handle} already in list, skipping`); | |
} else { | |
console.error(`Failed to add ${member.handle}:`, errorData); | |
} | |
} else { | |
console.log(`Added ${member.handle} to target list`); | |
} | |
// Add a small delay between requests to avoid rate limiting | |
await new Promise(resolve => setTimeout(resolve, 100)); | |
} catch (error) { | |
console.error(`Failed to add ${member.handle}:`, error); | |
} | |
} | |
// Update the list record | |
const listId = targetListUri.split('/').pop(); | |
const updateResponse = await fetch(`${this.apiEndpoint}com.atproto.repo.putRecord`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json", | |
"Authorization": `Bearer ${this.accessJwt}` | |
}, | |
body: JSON.stringify({ | |
repo: this.did, | |
collection: "app.bsky.graph.list", | |
rkey: listId, | |
record: { | |
name: "Updated List", | |
purpose: "app.bsky.graph.defs#curatelist", | |
description: "Updated list with merged members", | |
createdAt: new Date().toISOString(), | |
labels: { | |
$type: "com.atproto.label.defs#selfLabels", | |
values: [] | |
}, | |
"$type": "app.bsky.graph.list" | |
} | |
}) | |
}); | |
if (!updateResponse.ok) { | |
const errorData = await updateResponse.json(); | |
console.error("Failed to update list record:", errorData); | |
} else { | |
console.log("Successfully updated list record"); | |
} | |
// Verify the final count | |
const verifyResponse = await fetch( | |
`${this.apiEndpoint}app.bsky.graph.getList?list=${targetListUri}`, | |
{ | |
headers: { | |
Authorization: `Bearer ${this.accessJwt}` | |
} | |
} | |
); | |
if (verifyResponse.ok) { | |
const verifyData = await verifyResponse.json(); | |
console.log(`Final list member count: ${verifyData.items?.length || 0}`); | |
} | |
console.log(`Successfully processed ${members.length} members`); | |
} catch (error) { | |
console.error("Failed to merge list members:", error); | |
throw error; | |
} | |
} | |
async updateListRecord(listUri) { | |
try { | |
const response = await fetch(`${this.apiEndpoint}com.atproto.repo.putRecord`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json", | |
"Authorization": `Bearer ${this.accessJwt}` | |
}, | |
body: JSON.stringify({ | |
repo: this.did, | |
collection: "app.bsky.graph.list", | |
rkey: listUri.split('/').pop(), | |
record: { | |
name: "Test", | |
purpose: "app.bsky.graph.defs#curatelist", | |
createdAt: new Date().toISOString(), | |
description: "Updated list" | |
} | |
}) | |
}); | |
if (!response.ok) { | |
console.error("Failed to update list record:", await response.json()); | |
} | |
} catch (error) { | |
console.error("Error updating list record:", error); | |
} | |
} | |
/** | |
* Create a new list | |
* @param {string} listName - Name of the new list | |
* @param {string} description - Description of the new list | |
* @returns {Promise<string>} The AT-URI of the newly created list | |
*/ | |
async createNewList(listName, description) { | |
try { | |
const response = await fetch(`${this.apiEndpoint}com.atproto.repo.createRecord`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json", | |
"Authorization": `Bearer ${this.accessJwt}` | |
}, | |
body: JSON.stringify({ | |
repo: this.did, | |
collection: "app.bsky.graph.list", | |
record: { | |
name: listName, | |
purpose: "app.bsky.graph.defs#curatelist", | |
description: description, | |
createdAt: new Date().toISOString(), | |
"$type": "app.bsky.graph.list" | |
} | |
}) | |
}); | |
if (!response.ok) { | |
const errorData = await response.json(); | |
console.error("Failed to create new list:", errorData); | |
throw new Error(`Failed to create new list: ${response.statusText}`); | |
} | |
const data = await response.json(); | |
const newListUri = data.uri; | |
console.log("Created new list with URI:", newListUri); | |
return newListUri; | |
} catch (error) { | |
console.error("Error creating new list:", error); | |
throw error; | |
} | |
} | |
} | |
/** | |
* Follow redirects to get the full URL from a short URL | |
* @param {string} url - Short URL to expand | |
* @returns {Promise<string>} Full URL after following redirects | |
*/ | |
async function curlGetFullUrl(url) { | |
try { | |
console.log("Fetching URL:", url); | |
const response = await fetch(url, { | |
method: 'GET', | |
headers: { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
} | |
}); | |
const html = await response.text(); | |
console.log("Got HTML response"); | |
// Look for og:url meta tag | |
if (html.includes("og:url")) { | |
const startIndex = html.indexOf("og:url") + 17; | |
const endIndex = html.indexOf('"', startIndex); | |
const fullUrl = html.substring(startIndex, endIndex); | |
console.log("Extracted full URL:", fullUrl); | |
return fullUrl; | |
} | |
console.log("No og:url found in response"); | |
return ''; | |
} catch (error) { | |
console.error("Error in curlGetFullUrl:", error); | |
return ''; | |
} | |
} | |
/** | |
* Extract pack ID and handle from a Bluesky starter pack URL | |
* @param {string} url - Full Bluesky starter pack URL | |
* @returns {Promise<{handle: string, packId: string}>} | |
*/ | |
async function parseStarterPackUrl(url) { | |
try { | |
console.log("Parsing URL:", url); | |
// Remove any query parameters | |
if (url.includes("?")) { | |
url = url.substring(0, url.indexOf("?")); | |
} | |
// Handle short URLs | |
if (url.includes("starter-pack-short")) { | |
console.log("Converting short URL to go.bsky.app format"); | |
url = url.replace("bsky.app/starter-pack-short", "go.bsky.app"); | |
console.log("Converted URL:", url); | |
const fullUrl = await curlGetFullUrl(url); | |
console.log("Expanded URL:", fullUrl); | |
if (fullUrl) { | |
url = fullUrl; | |
} else { | |
throw new Error("Failed to expand short URL"); | |
} | |
} | |
console.log("Final URL to parse:", url); | |
// Parse URL parts | |
const urlParts = url.split("/"); | |
console.log("URL parts:", urlParts); | |
// Handle different URL formats | |
if (url.includes("/starter-pack/")) { | |
const handle = urlParts[urlParts.length - 2]; | |
const packId = urlParts[urlParts.length - 1]; | |
console.log("Extracted handle:", handle, "packId:", packId); | |
return { handle, packId }; | |
} else if (url.includes("/start/")) { | |
// Handle the /start/ format | |
const did = urlParts[urlParts.length - 2]; | |
const packId = urlParts[urlParts.length - 1]; | |
// Get the handle from the DID using the Bluesky API | |
const response = await fetch( | |
`https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=${did}`, | |
{ | |
method: 'GET', | |
headers: { | |
'Accept': 'application/json' | |
} | |
} | |
); | |
if (!response.ok) { | |
throw new Error(`Failed to resolve DID: ${response.statusText}`); | |
} | |
const data = await response.json(); | |
const handle = data.handle; | |
console.log("Extracted handle from DID:", handle, "packId:", packId); | |
return { handle, packId }; | |
} | |
throw new Error("Invalid starter pack URL format"); | |
} catch (error) { | |
console.error("Error in parseStarterPackUrl:", error); | |
throw new Error(`Failed to parse starter pack URL: ${error.message}`); | |
} | |
} | |
/** | |
* Extract list URI from a Bluesky list URL | |
* @param {string} url - Full Bluesky list URL | |
* @returns {Promise<string>} List AT-URI | |
*/ | |
async function getListUriFromUrl(url) { | |
try { | |
// Remove any query parameters | |
url = url.split("?")[0]; | |
// Parse URL parts | |
const urlParts = url.split("/"); | |
if (url.includes("bsky.app/profile/") && url.includes("/lists/")) { | |
const handle = urlParts[urlParts.length - 3]; | |
const listId = urlParts[urlParts.length - 1]; | |
console.log("Getting list URI for:", {handle, listId}); | |
// Get user's DID first | |
const profileResponse = await fetch( | |
`https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=${handle}` | |
); | |
const profileData = await profileResponse.json(); | |
// Construct AT-URI | |
const listUri = `at://${profileData.did}/app.bsky.graph.list/${listId}`; | |
console.log("Constructed list URI:", listUri); | |
return listUri; | |
} | |
throw new Error("Invalid list URL format"); | |
} catch (error) { | |
throw new Error(`Failed to parse list URL: ${error.message}`); | |
} | |
} | |
/** | |
* Main function to merge starter packs into a list | |
* @param {string} targetListUrl - Target list URL | |
* @param {Array<string>} starterPackUrls - Array of starter pack URLs | |
*/ | |
async function mergeStarterPacks(targetListUrl, starterPackUrls) { | |
const bluesky = new BlueskyAPI( | |
process.env.BLUESKY_USERNAME, | |
process.env.BLUESKY_PASSWORD | |
); | |
await bluesky.init(); | |
let targetListUri; | |
if (!targetListUrl) { | |
console.log("No target list URL specified. Creating a new list..."); | |
targetListUri = await bluesky.createNewList("New List", "A new list created for merging starter packs"); | |
} else { | |
targetListUri = await getListUriFromUrl(targetListUrl); | |
} | |
for (const packUrl of starterPackUrls) { | |
try { | |
console.log(`Processing starter pack from ${packUrl}...`); | |
// Parse starter pack URL | |
const packInfo = await parseStarterPackUrl(packUrl); | |
// Get the source list URI from the starter pack | |
const sourceListUri = await bluesky.getStarterPackUri(packInfo.handle, packInfo.packId); | |
// Merge members into target list | |
await bluesky.mergeListMembers(sourceListUri, targetListUri); | |
console.log(`Completed processing pack from ${packUrl}`); | |
} catch (error) { | |
console.error(`Failed to process pack from ${packUrl}:`, error); | |
} | |
} | |
} | |
// Example usage with full URL | |
const starterPackUrls = [ | |
"https://bsky.app/starter-pack/kashprime.bsky.social/3larb4ahvoy2c", | |
]; | |
const targetListUrl = ""; //you can specify a target list URL or leave blank to create a new list | |
console.log("Starting with credentials:", { | |
handle: BSKY_HANDLE, | |
password: "********" // Don't log the actual password | |
}); | |
// Run the main function with the correct environment variables | |
mergeStarterPacks(targetListUrl, starterPackUrls) | |
.catch(error => { | |
console.error("Fatal error:", error); | |
process.exit(1); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment