Created
August 5, 2023 17:59
-
-
Save narenmanoharan/cc9487643f4ba9ef50d7ba97bce4a289 to your computer and use it in GitHub Desktop.
Script for the top 10K wealth managers in the US
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
from sec_api import FormAdvApi | |
from loguru import logger | |
from tqdm import tqdm | |
import plotly.express as px | |
DATA_FILE_PATH = "wealth_managers.csv" | |
RESULTS_PER_PAGE = 50 | |
logger.info('Reading API Key from environment variables...') | |
API_KEY = os.getenv("SEC_API_KEY") | |
logger.info('Initializing API...') | |
formAdvApi = FormAdvApi(API_KEY) | |
if not os.path.exists(DATA_FILE_PATH) or os.path.getsize(DATA_FILE_PATH) == 0: | |
pd.DataFrame(columns=['CRD', 'Name', 'AUM ($)', 'Accounts', 'Employees', 'Street1', 'City', 'State', 'Phone']).to_csv(DATA_FILE_PATH, index=False) | |
df_existing = pd.read_csv(DATA_FILE_PATH) | |
current_page = len(df_existing) // RESULTS_PER_PAGE | |
logger.info(f'Resuming from page {current_page}...') | |
required_columns = ['Info.FirmCrdNb', 'Info.BusNm', 'FormInfo.Part1A.Item5F.Q5F2C', 'FormInfo.Part1A.Item5F.Q5F2F', 'FormInfo.Part1A.Item5A.TtlEmp', 'MainAddr.Strt1', 'MainAddr.City', 'MainAddr.State', 'MainAddr.PhNb'] | |
for i in tqdm(range(current_page, current_page + 100)): | |
query = { | |
"query": { | |
"bool": { | |
"must": [ | |
{"query_string": {"query": "FormInfo.Part1A.Item5F.Q5F2C:[1000000 TO *]"}}, | |
] | |
} | |
}, | |
"from": str(i * RESULTS_PER_PAGE), | |
"size": str(RESULTS_PER_PAGE), | |
"sort": [{"FormInfo.Part1A.Item5F.Q5F2C": {"order": "desc"}}] | |
} | |
try: | |
response = formAdvApi.get_firms(query) | |
except Exception as e: | |
logger.error(f"An error occurred: {e}") | |
exit(1) | |
firm_advs = response['filings'] | |
temp_df = pd.json_normalize(firm_advs) | |
temp_df = temp_df[required_columns] | |
temp_df.columns = ['CRD', 'Name', 'AUM ($)', 'Accounts', 'Employees', 'Street1', 'City', 'State', 'Phone'] | |
temp_df['Employees'] = temp_df['Employees'].fillna(0).astype(int) | |
temp_df.dropna(subset=['AUM ($)'], inplace=True) | |
temp_df.to_csv(DATA_FILE_PATH, mode='a', header=False, index=False) | |
df = pd.read_csv(DATA_FILE_PATH) | |
df.drop_duplicates(inplace=True) | |
total_market_size = df['AUM ($)'].sum() | |
logger.info('Total market size: ${:,.0f}'.format(total_market_size)) | |
state_aum = df.groupby('State')['AUM ($)'].sum() | |
state_aum_formatted = state_aum.apply(lambda x: f"${x:,.0f}") | |
logger.info(f'AUM by state:\n{state_aum_formatted}') | |
fig = px.bar(x=state_aum.index, y=state_aum.values, title='Assets Under Management by State') | |
fig.update_xaxes(title_text='State') | |
fig.update_yaxes(title_text='Assets Under Management ($)') | |
fig.update_layout(yaxis_tickprefix='$') | |
fig.write_image("wealth_managers.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment