usrbinkat/README.md

## README.md

      
    Raw
  

              README.md
            
          
    DLX.py

Simple python script to download personal tweet history.

Export Credentials

export XUSERNAME=$username
export XPASSWORD=$password

Install requirements

pip install twikit pandas

Execute Script

./dlx2.py

Logs & CSV

All tweets are saved to the CSV file: $username_tweets.csv
All logs are saved to: dlx2.log
Credentials cookie is stored in cookies.json and auth only executes when the cookies file is not found.

  
## dlx2.py
#!/usr/bin/env python3
import os
import logging
import time
from twikit import Client
import pandas as pd

# Enable or disable debug mode
debug = True

# Load environment variables
username = os.getenv('XUSERNAME')
password = os.getenv('XPASSWORD')

# Initialize parameters for fetching tweets
batch_size = 60  # Number of tweets per batch
delay_seconds = 60  # Delay in seconds between batches
output_file = f'{username}_tweets.csv'

# Setup logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG if debug else logging.INFO)

# Create file handler
file_handler = logging.FileHandler('dlx2.log')
file_handler.setLevel(logging.DEBUG if debug else logging.INFO)
file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)

# Create console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG if debug else logging.INFO)
console_formatter = logging.Formatter('%(message)s')
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)

# Log environment variables
logger.debug(f'Environment variables loaded - XUSERNAME: {username}, XPASSWORD: {password}')

# Initialize the client
client = Client('en-US')
logger.debug('Twikit client initialized')

# Check if cookies file exists
cookies_file = 'cookies.json'
if os.path.exists(cookies_file):
    logger.debug(f'Cookies file {cookies_file} exists. Loading cookies.')
    client.load_cookies(path=cookies_file)
else:
    logger.debug(f'Cookies file {cookies_file} does not exist. Logging in.')
    # Log in with your credentials
    client.login(
        auth_info_1=username,
        password=password,
    )
    # Save cookies for future use
    client.save_cookies(cookies_file)
    logger.debug(f'Cookies saved to {cookies_file}')

# Retrieve the user object for your account
user = client.get_user_by_screen_name(username)
logger.debug(f'User object retrieved for username: {username}')

# Create a new CSV file with headers
if not os.path.exists(output_file):
    df = pd.DataFrame(columns=['username', 'user_id', 'tweet_id', 'tweet_date', 'tweet_likes', 'tweet_views', 'tweet_comments', 'tweet_text'])
    df.to_csv(output_file, index=False)
    logger.debug(f'Created new CSV file: {output_file}')

# Fetch initial batch of tweets
tweets = user.get_tweets('Tweets')
logger.debug('Initial batch of tweets fetched')

while tweets:
    logger.debug(f'Processing batch of {len(tweets)} tweets')
    batch_data = []
    for tweet in tweets:
        tweet_data = {
            'username': tweet.user.name,
            'user_id': tweet.user.id,
            'tweet_id': tweet.id,
            'tweet_date': tweet.created_at,
            'tweet_likes': tweet.favorite_count,
            'tweet_views': tweet.view_count,
            'tweet_comments': tweet.reply_count,
            'tweet_text': tweet.full_text,
        }
        logger.debug(f'Processed tweet data: {tweet_data}')
        batch_data.append(tweet_data)

    # Convert batch data to DataFrame and append to CSV
    df_batch = pd.DataFrame(batch_data)
    df_batch.to_csv(output_file, mode='a', header=False, index=False)
    logger.debug(f'Appended batch of {len(batch_data)} tweets to {output_file}')

    try:
        tweets = tweets.next()
        logger.debug('Fetched next batch of tweets')

        # Introduce a delay to avoid hitting the rate limit
        logger.debug(f'Waiting for {delay_seconds} seconds before fetching the next batch...')
        time.sleep(delay_seconds)

    except Exception as e:
        logger.debug(f"No more tweets to fetch: {e}")
        break

print(f"Successfully scraped tweets for {username}. Check the CSV file for results.")
logger.info(f'Successfully scraped tweets for {username}. Check the CSV file for results.')
	#!/usr/bin/env python3
	import os
	import logging
	import time
	from twikit import Client
	import pandas as pd

	# Enable or disable debug mode
	debug = True

	# Load environment variables
	username = os.getenv('XUSERNAME')
	password = os.getenv('XPASSWORD')

	# Initialize parameters for fetching tweets
	batch_size = 60 # Number of tweets per batch
	delay_seconds = 60 # Delay in seconds between batches
	output_file = f'{username}_tweets.csv'

	# Setup logging
	logger = logging.getLogger()
	logger.setLevel(logging.DEBUG if debug else logging.INFO)

	# Create file handler
	file_handler = logging.FileHandler('dlx2.log')
	file_handler.setLevel(logging.DEBUG if debug else logging.INFO)
	file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
	file_handler.setFormatter(file_formatter)
	logger.addHandler(file_handler)

	# Create console handler
	console_handler = logging.StreamHandler()
	console_handler.setLevel(logging.DEBUG if debug else logging.INFO)
	console_formatter = logging.Formatter('%(message)s')
	console_handler.setFormatter(console_formatter)
	logger.addHandler(console_handler)

	# Log environment variables
	logger.debug(f'Environment variables loaded - XUSERNAME: {username}, XPASSWORD: {password}')

	# Initialize the client
	client = Client('en-US')
	logger.debug('Twikit client initialized')

	# Check if cookies file exists
	cookies_file = 'cookies.json'
	if os.path.exists(cookies_file):
	logger.debug(f'Cookies file {cookies_file} exists. Loading cookies.')
	client.load_cookies(path=cookies_file)
	else:
	logger.debug(f'Cookies file {cookies_file} does not exist. Logging in.')
	# Log in with your credentials
	client.login(
	auth_info_1=username,
	password=password,
	)
	# Save cookies for future use
	client.save_cookies(cookies_file)
	logger.debug(f'Cookies saved to {cookies_file}')

	# Retrieve the user object for your account
	user = client.get_user_by_screen_name(username)
	logger.debug(f'User object retrieved for username: {username}')

	# Create a new CSV file with headers
	if not os.path.exists(output_file):
	df = pd.DataFrame(columns=['username', 'user_id', 'tweet_id', 'tweet_date', 'tweet_likes', 'tweet_views', 'tweet_comments', 'tweet_text'])
	df.to_csv(output_file, index=False)
	logger.debug(f'Created new CSV file: {output_file}')

	# Fetch initial batch of tweets
	tweets = user.get_tweets('Tweets')
	logger.debug('Initial batch of tweets fetched')

	while tweets:
	logger.debug(f'Processing batch of {len(tweets)} tweets')
	batch_data = []
	for tweet in tweets:
	tweet_data = {
	'username': tweet.user.name,
	'user_id': tweet.user.id,
	'tweet_id': tweet.id,
	'tweet_date': tweet.created_at,
	'tweet_likes': tweet.favorite_count,
	'tweet_views': tweet.view_count,
	'tweet_comments': tweet.reply_count,
	'tweet_text': tweet.full_text,
	}
	logger.debug(f'Processed tweet data: {tweet_data}')
	batch_data.append(tweet_data)

	# Convert batch data to DataFrame and append to CSV
	df_batch = pd.DataFrame(batch_data)
	df_batch.to_csv(output_file, mode='a', header=False, index=False)
	logger.debug(f'Appended batch of {len(batch_data)} tweets to {output_file}')

	try:
	tweets = tweets.next()
	logger.debug('Fetched next batch of tweets')

	# Introduce a delay to avoid hitting the rate limit
	logger.debug(f'Waiting for {delay_seconds} seconds before fetching the next batch...')
	time.sleep(delay_seconds)

	except Exception as e:
	logger.debug(f"No more tweets to fetch: {e}")
	break

	print(f"Successfully scraped tweets for {username}. Check the CSV file for results.")
	logger.info(f'Successfully scraped tweets for {username}. Check the CSV file for results.')