Skip to content

Instantly share code, notes, and snippets.

@usrbinkat
Last active June 21, 2024 19:30
Show Gist options
  • Save usrbinkat/a20ceacf3f9756095396c5e31c49f22a to your computer and use it in GitHub Desktop.
Save usrbinkat/a20ceacf3f9756095396c5e31c49f22a to your computer and use it in GitHub Desktop.

DLX.py

Simple python script to download personal tweet history.

  1. Export Credentials
export XUSERNAME=$username
export XPASSWORD=$password
  1. Install requirements
pip install twikit pandas
  1. Execute Script
./dlx2.py
  1. Logs & CSV

All tweets are saved to the CSV file: $username_tweets.csv All logs are saved to: dlx2.log

Credentials cookie is stored in cookies.json and auth only executes when the cookies file is not found.

#!/usr/bin/env python3
import os
import logging
import time
from twikit import Client
import pandas as pd
# Enable or disable debug mode
debug = True
# Load environment variables
username = os.getenv('XUSERNAME')
password = os.getenv('XPASSWORD')
# Initialize parameters for fetching tweets
batch_size = 60 # Number of tweets per batch
delay_seconds = 60 # Delay in seconds between batches
output_file = f'{username}_tweets.csv'
# Setup logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG if debug else logging.INFO)
# Create file handler
file_handler = logging.FileHandler('dlx2.log')
file_handler.setLevel(logging.DEBUG if debug else logging.INFO)
file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)
# Create console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG if debug else logging.INFO)
console_formatter = logging.Formatter('%(message)s')
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)
# Log environment variables
logger.debug(f'Environment variables loaded - XUSERNAME: {username}, XPASSWORD: {password}')
# Initialize the client
client = Client('en-US')
logger.debug('Twikit client initialized')
# Check if cookies file exists
cookies_file = 'cookies.json'
if os.path.exists(cookies_file):
logger.debug(f'Cookies file {cookies_file} exists. Loading cookies.')
client.load_cookies(path=cookies_file)
else:
logger.debug(f'Cookies file {cookies_file} does not exist. Logging in.')
# Log in with your credentials
client.login(
auth_info_1=username,
password=password,
)
# Save cookies for future use
client.save_cookies(cookies_file)
logger.debug(f'Cookies saved to {cookies_file}')
# Retrieve the user object for your account
user = client.get_user_by_screen_name(username)
logger.debug(f'User object retrieved for username: {username}')
# Create a new CSV file with headers
if not os.path.exists(output_file):
df = pd.DataFrame(columns=['username', 'user_id', 'tweet_id', 'tweet_date', 'tweet_likes', 'tweet_views', 'tweet_comments', 'tweet_text'])
df.to_csv(output_file, index=False)
logger.debug(f'Created new CSV file: {output_file}')
# Fetch initial batch of tweets
tweets = user.get_tweets('Tweets')
logger.debug('Initial batch of tweets fetched')
while tweets:
logger.debug(f'Processing batch of {len(tweets)} tweets')
batch_data = []
for tweet in tweets:
tweet_data = {
'username': tweet.user.name,
'user_id': tweet.user.id,
'tweet_id': tweet.id,
'tweet_date': tweet.created_at,
'tweet_likes': tweet.favorite_count,
'tweet_views': tweet.view_count,
'tweet_comments': tweet.reply_count,
'tweet_text': tweet.full_text,
}
logger.debug(f'Processed tweet data: {tweet_data}')
batch_data.append(tweet_data)
# Convert batch data to DataFrame and append to CSV
df_batch = pd.DataFrame(batch_data)
df_batch.to_csv(output_file, mode='a', header=False, index=False)
logger.debug(f'Appended batch of {len(batch_data)} tweets to {output_file}')
try:
tweets = tweets.next()
logger.debug('Fetched next batch of tweets')
# Introduce a delay to avoid hitting the rate limit
logger.debug(f'Waiting for {delay_seconds} seconds before fetching the next batch...')
time.sleep(delay_seconds)
except Exception as e:
logger.debug(f"No more tweets to fetch: {e}")
break
print(f"Successfully scraped tweets for {username}. Check the CSV file for results.")
logger.info(f'Successfully scraped tweets for {username}. Check the CSV file for results.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment