Skip to content

Instantly share code, notes, and snippets.

@myusuf3
Created November 16, 2024 20:26
Show Gist options
  • Save myusuf3/ff7e46c6f851536a4c844ca83aa6ac85 to your computer and use it in GitHub Desktop.
Save myusuf3/ff7e46c6f851536a4c844ca83aa6ac85 to your computer and use it in GitHub Desktop.
import csv
from atproto import Client
import os
from dotenv import load_dotenv
from loguru import logger
from collections import defaultdict
from time import sleep
from dataclasses import dataclass
import pprint
from typing import Optional
dotenv_path = os.path.join(os.path.dirname(__file__), ".env")
load_dotenv(dotenv_path)
# Meta Config
pp = pprint.PrettyPrinter(indent=4)
BSKY_USER = os.getenv("BSKY_USER")
BSKY_PWD = os.getenv("BSKY_PWD")
@dataclass
class PaginationConfig:
"""Configure for pagination results"""
batch_size: int = 100
rate_limit_delay: float = 0.5
max_items: Optional[int] = None
def get_bsky_client(user: str, pwd: str):
"""
Set your env variables in your .env file (your Bluesky login)
"""
load_dotenv()
client = Client()
profile = client.login(user, pwd)
return profile, client
def fetch_user_profiles(client, user_ids: list[str]) -> dict[str, dict]:
"""
Fetch detailed profiles for a list of user IDs
"""
profiles = {}
for user_id in user_ids:
try:
user_profile = client.get_profile(actor=user_id)
profiles[user_id] = {
"handle": user_profile.handle,
"display_name": user_profile.display_name,
"bio": user_profile.description,
"created_at": user_profile.created_at,
}
except Exception as e:
logger.error(f"Failed to fetch profile for {user_id}: {e}")
return profiles
def bsky_get_followers_with_profiles(client: Client, config=None) -> dict[str, dict]:
"""
Gets all the accounts that have followed you, including bio and display name
"""
if config is None:
config = PaginationConfig()
items_from_api = 0
cursor = None
follower_ids = []
while True:
remaining = float("inf") if config.max_items is None else config.max_items - items_from_api
batch_limit = min(config.batch_size, remaining)
# Fetch the current page
response = client.get_followers(
actor=BSKY_USER, cursor=cursor, limit=batch_limit
)
follower_ids.extend([follower.did for follower in response.followers])
items_from_api += len(response.followers)
cursor = response.cursor
if not cursor or (config.max_items is not None and items_from_api >= config.max_items):
break
sleep(config.rate_limit_delay)
# Fetch detailed profiles for all followers
followers_with_profiles = fetch_user_profiles(client, follower_ids)
return followers_with_profiles
def save_followers_to_csv(followers: dict[str, dict], file_name: str):
"""
Save followers data to a CSV file
"""
with open(file_name, mode="w", newline="", encoding="utf-8") as file:
writer = csv.writer(file)
# Write the header
writer.writerow(["DID", "Handle", "Display Name", "Bio", "Created At"])
# Write follower data
for did, data in followers.items():
writer.writerow([
did,
data.get("handle", ""),
data.get("display_name", ""),
data.get("bio", ""),
data.get("created_at", ""),
])
logger.info(f"Followers data saved to {file_name}")
def save_bios_to_csv(followers: dict[str, dict], file_name: str):
"""
Save only the bios of followers to a CSV file
"""
with open(file_name, mode="w", newline="", encoding="utf-8") as file:
writer = csv.writer(file)
# Write the header
writer.writerow(["Handle", "Bio"])
# Write follower bios
for data in followers.values():
if data.get("bio") == "":
continue
writer.writerow([
data.get("handle", ""),
data.get("bio", ""),
])
logger.info(f"Bios saved to {file_name}")
def clean_bios_csv(file_name: str):
"""
Delete rows from bios.csv that only have a handle and no bio.
Rewrites the file in place.
"""
rows_to_keep = []
with open(file_name, mode="r", newline="", encoding="utf-8") as file:
reader = csv.reader(file)
header = next(reader) # Get header row
rows_to_keep.append(header)
for row in reader:
if len(row) >= 2 and row[1].strip(): # Check if bio column exists and has content
rows_to_keep.append(row)
with open(file_name, mode="w", newline="", encoding="utf-8") as file:
writer = csv.writer(file)
writer.writerows(rows_to_keep)
logger.info(f"Cleaned {file_name} - removed rows with empty bios")
if __name__ == "__main__":
# Login
profile, client = get_bsky_client(BSKY_USER, BSKY_PWD)
# Confirm connection
logger.info(f"Welcome {profile.display_name}")
# Get followers with detailed profiles (including bios)
followers = bsky_get_followers_with_profiles(client, config=PaginationConfig())
pp.pprint(followers)
# Save to CSV
save_bios_to_csv(followers, "bios.csv")
save_followers_to_csv(followers, "followers.csv")
annotated-types==0.7.0
anyio==4.6.2.post1
atproto==0.0.55
blusky @ file:///Users/myusuf3/workspace/blusky
certifi==2024.8.30
cffi==1.17.1
charset-normalizer==3.4.0
click==8.1.7
click-default-group==1.2.4
cryptography==43.0.3
distro==1.9.0
dnspython==2.7.0
h11==0.14.0
httpcore==1.0.7
httpx==0.27.2
idna==3.10
jiter==0.7.1
libipld==3.0.0
llm==0.17.1
loguru==0.7.2
openai==1.54.4
pluggy==1.5.0
puremagic==1.28
pycparser==2.22
pydantic==2.9.2
pydantic_core==2.23.4
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-ulid==3.0.0
PyYAML==6.0.2
regex==2024.11.6
requests==2.32.3
ruff==0.7.4
setuptools==75.5.0
six==1.16.0
sniffio==1.3.1
sqlite-fts4==1.0.3
sqlite-migrate==0.1b0
sqlite-utils==3.37
tabulate==0.9.0
tiktoken==0.8.0
tqdm==4.67.0
ttok==0.3
typing_extensions==4.12.2
urllib3==2.2.3
websockets==13.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment