Created
June 12, 2023 19:16
-
-
Save abhishekmishragithub/a24ce0d462ec62a72c19804f7878ec9a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import pandas as pd | |
import numpy as np | |
from datetime import datetime, timedelta | |
import random | |
from rich.progress import Progress | |
import time | |
def create_dataset(file_name, num_rows, start_date, end_date): | |
"""Create a dataset""" | |
with Progress() as progress: | |
task = progress.add_task("[cyan]Creating dataset...", total=num_rows) | |
# Generate dates | |
date_range = pd.date_range(start_date, end_date).tolist() | |
dates = [] | |
for _ in range(num_rows): | |
dates.append(random.choice(date_range)) | |
progress.update(task, advance=1) | |
# Generate random tickers | |
tickers = np.random.choice(['AAPL', 'GOOG', 'MSFT', 'AMZN', 'FB'], num_rows) | |
# Generate random open, high, low, close prices and volumes | |
opens = np.random.uniform(100, 200, num_rows) | |
highs = opens + np.random.uniform(0, 10, num_rows) | |
lows = opens - np.random.uniform(0, 10, num_rows) | |
closes = np.random.uniform(lows, highs, num_rows) | |
volumes = np.random.randint(5000, 10000, num_rows) | |
# Create a DataFrame | |
df = pd.DataFrame({ | |
'Date': dates, | |
'Ticker': tickers, | |
'Open': opens, | |
'High': highs, | |
'Low': lows, | |
'Close': closes, | |
'Volume': volumes | |
}) | |
# Write the DataFrame to a CSV file | |
df.to_csv(file_name, index=False) | |
def upload_dataset_to_s3(file_name): | |
"""Uploads a dataset to S3 bucket""" | |
with Progress() as progress: | |
task = progress.add_task("[green]Uploading dataset to S3...", total=100) | |
# Create a session using your AWS credentials | |
session = boto3.Session( | |
aws_access_key_id= 'AxxxxxxxK', #'YOUR_ACCESS_KEY', | |
aws_secret_access_key='wTxxxxxxS', #'YOUR_SECRET_KEY', | |
region_name= 'us-west-2' #'YOUR_REGION' # e.g. 'us-west-1' | |
) | |
# Create an S3 client using the session | |
s3 = session.client('s3') | |
# Name of the bucket | |
bucket_name = ''#'your-bucket-name' | |
# Upload the file | |
s3.upload_file(file_name, bucket_name, 'stock_data_monthly/' + file_name) # Updated to upload to 'stock_data_monthly' folder | |
progress.update(task, advance=100) | |
if __name__ == '__main__': | |
file_name = input(f'Enter the file name (default: stock_data_monthly.csv): ') or 'stock_data_monthly.csv' | |
if not file_name.endswith('.csv'): | |
file_name += '.csv' | |
num_rows = int(input(f'Enter the number of rows (default: 10000000): ') or 10000000) | |
start_date = datetime(2023, 1, 1) | |
end_date = datetime(2023, 12, 31) | |
start_time = time.time() | |
create_dataset(file_name=file_name, num_rows=num_rows, start_date=start_date, end_date=end_date) | |
end_time = time.time() | |
print(f'Execution time of create_dataset: {end_time - start_time:.2f} seconds') | |
start_time = time.time() | |
upload_dataset_to_s3(file_name=file_name) | |
end_time = time.time() | |
print(f'Execution time of upload_dataset_to_s3: {end_time - start_time:.2f} seconds') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment