-
-
Save myusuf3/ff7e46c6f851536a4c844ca83aa6ac85 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from atproto import Client | |
import os | |
from dotenv import load_dotenv | |
from loguru import logger | |
from collections import defaultdict | |
from time import sleep | |
from dataclasses import dataclass | |
import pprint | |
from typing import Optional | |
dotenv_path = os.path.join(os.path.dirname(__file__), ".env") | |
load_dotenv(dotenv_path) | |
# Meta Config | |
pp = pprint.PrettyPrinter(indent=4) | |
BSKY_USER = os.getenv("BSKY_USER") | |
BSKY_PWD = os.getenv("BSKY_PWD") | |
@dataclass | |
class PaginationConfig: | |
"""Configure for pagination results""" | |
batch_size: int = 100 | |
rate_limit_delay: float = 0.5 | |
max_items: Optional[int] = None | |
def get_bsky_client(user: str, pwd: str): | |
""" | |
Set your env variables in your .env file (your Bluesky login) | |
""" | |
load_dotenv() | |
client = Client() | |
profile = client.login(user, pwd) | |
return profile, client | |
def fetch_user_profiles(client, user_ids: list[str]) -> dict[str, dict]: | |
""" | |
Fetch detailed profiles for a list of user IDs | |
""" | |
profiles = {} | |
for user_id in user_ids: | |
try: | |
user_profile = client.get_profile(actor=user_id) | |
profiles[user_id] = { | |
"handle": user_profile.handle, | |
"display_name": user_profile.display_name, | |
"bio": user_profile.description, | |
"created_at": user_profile.created_at, | |
} | |
except Exception as e: | |
logger.error(f"Failed to fetch profile for {user_id}: {e}") | |
return profiles | |
def bsky_get_followers_with_profiles(client: Client, config=None) -> dict[str, dict]: | |
""" | |
Gets all the accounts that have followed you, including bio and display name | |
""" | |
if config is None: | |
config = PaginationConfig() | |
items_from_api = 0 | |
cursor = None | |
follower_ids = [] | |
while True: | |
remaining = float("inf") if config.max_items is None else config.max_items - items_from_api | |
batch_limit = min(config.batch_size, remaining) | |
# Fetch the current page | |
response = client.get_followers( | |
actor=BSKY_USER, cursor=cursor, limit=batch_limit | |
) | |
follower_ids.extend([follower.did for follower in response.followers]) | |
items_from_api += len(response.followers) | |
cursor = response.cursor | |
if not cursor or (config.max_items is not None and items_from_api >= config.max_items): | |
break | |
sleep(config.rate_limit_delay) | |
# Fetch detailed profiles for all followers | |
followers_with_profiles = fetch_user_profiles(client, follower_ids) | |
return followers_with_profiles | |
def save_followers_to_csv(followers: dict[str, dict], file_name: str): | |
""" | |
Save followers data to a CSV file | |
""" | |
with open(file_name, mode="w", newline="", encoding="utf-8") as file: | |
writer = csv.writer(file) | |
# Write the header | |
writer.writerow(["DID", "Handle", "Display Name", "Bio", "Created At"]) | |
# Write follower data | |
for did, data in followers.items(): | |
writer.writerow([ | |
did, | |
data.get("handle", ""), | |
data.get("display_name", ""), | |
data.get("bio", ""), | |
data.get("created_at", ""), | |
]) | |
logger.info(f"Followers data saved to {file_name}") | |
def save_bios_to_csv(followers: dict[str, dict], file_name: str): | |
""" | |
Save only the bios of followers to a CSV file | |
""" | |
with open(file_name, mode="w", newline="", encoding="utf-8") as file: | |
writer = csv.writer(file) | |
# Write the header | |
writer.writerow(["Handle", "Bio"]) | |
# Write follower bios | |
for data in followers.values(): | |
if data.get("bio") == "": | |
continue | |
writer.writerow([ | |
data.get("handle", ""), | |
data.get("bio", ""), | |
]) | |
logger.info(f"Bios saved to {file_name}") | |
def clean_bios_csv(file_name: str): | |
""" | |
Delete rows from bios.csv that only have a handle and no bio. | |
Rewrites the file in place. | |
""" | |
rows_to_keep = [] | |
with open(file_name, mode="r", newline="", encoding="utf-8") as file: | |
reader = csv.reader(file) | |
header = next(reader) # Get header row | |
rows_to_keep.append(header) | |
for row in reader: | |
if len(row) >= 2 and row[1].strip(): # Check if bio column exists and has content | |
rows_to_keep.append(row) | |
with open(file_name, mode="w", newline="", encoding="utf-8") as file: | |
writer = csv.writer(file) | |
writer.writerows(rows_to_keep) | |
logger.info(f"Cleaned {file_name} - removed rows with empty bios") | |
if __name__ == "__main__": | |
# Login | |
profile, client = get_bsky_client(BSKY_USER, BSKY_PWD) | |
# Confirm connection | |
logger.info(f"Welcome {profile.display_name}") | |
# Get followers with detailed profiles (including bios) | |
followers = bsky_get_followers_with_profiles(client, config=PaginationConfig()) | |
pp.pprint(followers) | |
# Save to CSV | |
save_bios_to_csv(followers, "bios.csv") | |
save_followers_to_csv(followers, "followers.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
annotated-types==0.7.0 | |
anyio==4.6.2.post1 | |
atproto==0.0.55 | |
blusky @ file:///Users/myusuf3/workspace/blusky | |
certifi==2024.8.30 | |
cffi==1.17.1 | |
charset-normalizer==3.4.0 | |
click==8.1.7 | |
click-default-group==1.2.4 | |
cryptography==43.0.3 | |
distro==1.9.0 | |
dnspython==2.7.0 | |
h11==0.14.0 | |
httpcore==1.0.7 | |
httpx==0.27.2 | |
idna==3.10 | |
jiter==0.7.1 | |
libipld==3.0.0 | |
llm==0.17.1 | |
loguru==0.7.2 | |
openai==1.54.4 | |
pluggy==1.5.0 | |
puremagic==1.28 | |
pycparser==2.22 | |
pydantic==2.9.2 | |
pydantic_core==2.23.4 | |
python-dateutil==2.9.0.post0 | |
python-dotenv==1.0.1 | |
python-ulid==3.0.0 | |
PyYAML==6.0.2 | |
regex==2024.11.6 | |
requests==2.32.3 | |
ruff==0.7.4 | |
setuptools==75.5.0 | |
six==1.16.0 | |
sniffio==1.3.1 | |
sqlite-fts4==1.0.3 | |
sqlite-migrate==0.1b0 | |
sqlite-utils==3.37 | |
tabulate==0.9.0 | |
tiktoken==0.8.0 | |
tqdm==4.67.0 | |
ttok==0.3 | |
typing_extensions==4.12.2 | |
urllib3==2.2.3 | |
websockets==13.1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment