yash201040/polygon-api.py

## polygon-api.py
from bs4 import BeautifulSoup
from datetime import timedelta
import pandas as pd
import pandas_market_calendars as mcal
import requests
import time

# ----------------------------------------------
# Scrape NASDAQ 100 stock ticker names from wiki
# ----------------------------------------------
def get_nasdaq_100_tickers():
    # Create soup object of the target html page
    url = 'https://en.wikipedia.org/wiki/Nasdaq-100'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'lxml')

    # Find the table of rows containing NASDAQ 100 constituents
    table = soup.find('table', {'id': 'constituents'})
    rows = table.tbody.find_all('tr')

    # Create a list to store tickers
    tickers = []
    for row in rows[1:]: # Skip the header row
        # Access the ticker from row data (in 2nd column)
        tds = row.find_all('td')[1]
        # Append ticker to list
        tickers.append(tds.text.strip())

    # Remove class-A ticker for Alphabet as class-C (GOOG) already exists
    tickers.remove('GOOGL')
    return tickers

# Get NASDAQ 100 tickers
tickers = get_nasdaq_100_tickers()
# Print number of tickers collected
print(f'Tickers Found: {len(tickers)}') ## Tickers Found: 100

# -------------------------------------------
# Get valid trading days for trailing 2 years
# -------------------------------------------
# Get all calendar days for New York Stock Exchange (NYSE)
nyse = mcal.get_calendar('NYSE')
# Set the latest end date in US Eastern Time Zone (ET)
end_date = pd.Timestamp.now(tz='US/Eastern')
# Set start date 2 years behind the end date in the same timezone
start_date = (end_date - pd.DateOffset(years=2)).tz_convert('US/Eastern')
# Get valid trading days for the last 2 years but don't specify a timezone here
trading_days = nyse.valid_days(start_date=start_date.tz_localize(None), end_date=end_date.tz_localize(None))
# Then convert the timezone-aware result to 'US/Eastern'
trading_days = trading_days.tz_convert('US/Eastern')
# Print the formatted number of trading days of trailing 2 years
print(f'No. of trading days in trailing 2 years: {len(trading_days)}')

# -------------------------------
# Define a function to fetch data
# -------------------------------
# Function to fetch data for a ticker within given date range
def fetch_data(ticker, start_date, end_date):
    url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/minute/{start_date}/{end_date}"
    params = {"sort": "asc", "limit": 50000}
    headers = {"Authorization": "Bearer gwetlM945TUXEqGt_peMizmmLyg3U6fS"}
    response = requests.get(url, params=params, headers=headers)
    data = response.json()['results']
    df = pd.DataFrame(data)
    return df

# --------------------------------------------------
# Get data for each ticker and save it to a csv file
# --------------------------------------------------
# Initialize variables to track progress
start_time = time.time()
total_api_calls = 0

for t in range(100):
    # Select a ticker
    ticker = tickers[t]
    # Initialize an empty DataFrame for the ticker
    df_all = pd.DataFrame()

    # Loop through trading days with a window of 52 days because the aggregates max limit is 50,000 data points per api request
    for day in range(0, len(trading_days), 52):
        # Select date range for a span of 52 days
        start_date = trading_days[day].date().isoformat()
        end_date = trading_days[min(day + 52, len(trading_days) - 1)].date().isoformat()

        # Fetch data for a ticker in that date range
        df = fetch_data(ticker, start_date, end_date)
        # Increment total api calls by 1
        total_api_calls += 1

        # Append the data in the ticker's data frame
        df_all = pd.concat([df_all, df])

        # Sleep for 12 seconds to stay within API rate limits (5 requests per minute)
        time.sleep(12)

        # Print progress update
        elapsed_time = time.time() - start_time
        print(f"\rProcessing ticker {t+1}/100: {ticker} || API Calls: {total_api_calls}/1000 || Elapsed Time: {str(timedelta(seconds=int(elapsed_time)))}", end="")

    # Write ticker data to CSV
    df_all.to_csv(f"{ticker}.csv", index=False)

print(f'Trailing 2 years trade data was successfully fetched for {total_api_calls//10} out of 100 stocks and total api calls made were: {total_api_calls}')
## OUTPUT: Processing ticker 100/100: ZS || API Calls: 960/1000 || Elapsed Time: 7:43:31
## Trailing 2 years trade data was successfully fetched for 96 out of 100 stocks and total api calls made were: 960
# ----------------------------------------------------- E N D -----------------------------------------------------
	from bs4 import BeautifulSoup
	from datetime import timedelta
	import pandas as pd
	import pandas_market_calendars as mcal
	import requests
	import time

	# ----------------------------------------------
	# Scrape NASDAQ 100 stock ticker names from wiki
	# ----------------------------------------------
	def get_nasdaq_100_tickers():
	# Create soup object of the target html page
	url = 'https://en.wikipedia.org/wiki/Nasdaq-100'
	response = requests.get(url)
	soup = BeautifulSoup(response.content, 'lxml')

	# Find the table of rows containing NASDAQ 100 constituents
	table = soup.find('table', {'id': 'constituents'})
	rows = table.tbody.find_all('tr')

	# Create a list to store tickers
	tickers = []
	for row in rows[1:]: # Skip the header row
	# Access the ticker from row data (in 2nd column)
	tds = row.find_all('td')[1]
	# Append ticker to list
	tickers.append(tds.text.strip())

	# Remove class-A ticker for Alphabet as class-C (GOOG) already exists
	tickers.remove('GOOGL')
	return tickers

	# Get NASDAQ 100 tickers
	tickers = get_nasdaq_100_tickers()
	# Print number of tickers collected
	print(f'Tickers Found: {len(tickers)}') ## Tickers Found: 100

	# -------------------------------------------
	# Get valid trading days for trailing 2 years
	# -------------------------------------------
	# Get all calendar days for New York Stock Exchange (NYSE)
	nyse = mcal.get_calendar('NYSE')
	# Set the latest end date in US Eastern Time Zone (ET)
	end_date = pd.Timestamp.now(tz='US/Eastern')
	# Set start date 2 years behind the end date in the same timezone
	start_date = (end_date - pd.DateOffset(years=2)).tz_convert('US/Eastern')
	# Get valid trading days for the last 2 years but don't specify a timezone here
	trading_days = nyse.valid_days(start_date=start_date.tz_localize(None), end_date=end_date.tz_localize(None))
	# Then convert the timezone-aware result to 'US/Eastern'
	trading_days = trading_days.tz_convert('US/Eastern')
	# Print the formatted number of trading days of trailing 2 years
	print(f'No. of trading days in trailing 2 years: {len(trading_days)}')

	# -------------------------------
	# Define a function to fetch data
	# -------------------------------
	# Function to fetch data for a ticker within given date range
	def fetch_data(ticker, start_date, end_date):
	url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/minute/{start_date}/{end_date}"
	params = {"sort": "asc", "limit": 50000}
	headers = {"Authorization": "Bearer gwetlM945TUXEqGt_peMizmmLyg3U6fS"}
	response = requests.get(url, params=params, headers=headers)
	data = response.json()['results']
	df = pd.DataFrame(data)
	return df

	# --------------------------------------------------
	# Get data for each ticker and save it to a csv file
	# --------------------------------------------------
	# Initialize variables to track progress
	start_time = time.time()
	total_api_calls = 0

	for t in range(100):
	# Select a ticker
	ticker = tickers[t]
	# Initialize an empty DataFrame for the ticker
	df_all = pd.DataFrame()

	# Loop through trading days with a window of 52 days because the aggregates max limit is 50,000 data points per api request
	for day in range(0, len(trading_days), 52):
	# Select date range for a span of 52 days
	start_date = trading_days[day].date().isoformat()
	end_date = trading_days[min(day + 52, len(trading_days) - 1)].date().isoformat()

	# Fetch data for a ticker in that date range
	df = fetch_data(ticker, start_date, end_date)
	# Increment total api calls by 1
	total_api_calls += 1

	# Append the data in the ticker's data frame
	df_all = pd.concat([df_all, df])

	# Sleep for 12 seconds to stay within API rate limits (5 requests per minute)
	time.sleep(12)

	# Print progress update
	elapsed_time = time.time() - start_time
	print(f"\rProcessing ticker {t+1}/100: {ticker} \|\| API Calls: {total_api_calls}/1000 \|\| Elapsed Time: {str(timedelta(seconds=int(elapsed_time)))}", end="")

	# Write ticker data to CSV
	df_all.to_csv(f"{ticker}.csv", index=False)

	print(f'Trailing 2 years trade data was successfully fetched for {total_api_calls//10} out of 100 stocks and total api calls made were: {total_api_calls}')
	## OUTPUT: Processing ticker 100/100: ZS \|\| API Calls: 960/1000 \|\| Elapsed Time: 7:43:31
	## Trailing 2 years trade data was successfully fetched for 96 out of 100 stocks and total api calls made were: 960
	# ----------------------------------------------------- E N D -----------------------------------------------------