Skip to content

Instantly share code, notes, and snippets.

@narenmanoharan
Created August 5, 2023 17:59
Show Gist options
  • Save narenmanoharan/cc9487643f4ba9ef50d7ba97bce4a289 to your computer and use it in GitHub Desktop.
Save narenmanoharan/cc9487643f4ba9ef50d7ba97bce4a289 to your computer and use it in GitHub Desktop.
Script for the top 10K wealth managers in the US
import os
import pandas as pd
from sec_api import FormAdvApi
from loguru import logger
from tqdm import tqdm
import plotly.express as px
DATA_FILE_PATH = "wealth_managers.csv"
RESULTS_PER_PAGE = 50
logger.info('Reading API Key from environment variables...')
API_KEY = os.getenv("SEC_API_KEY")
logger.info('Initializing API...')
formAdvApi = FormAdvApi(API_KEY)
if not os.path.exists(DATA_FILE_PATH) or os.path.getsize(DATA_FILE_PATH) == 0:
pd.DataFrame(columns=['CRD', 'Name', 'AUM ($)', 'Accounts', 'Employees', 'Street1', 'City', 'State', 'Phone']).to_csv(DATA_FILE_PATH, index=False)
df_existing = pd.read_csv(DATA_FILE_PATH)
current_page = len(df_existing) // RESULTS_PER_PAGE
logger.info(f'Resuming from page {current_page}...')
required_columns = ['Info.FirmCrdNb', 'Info.BusNm', 'FormInfo.Part1A.Item5F.Q5F2C', 'FormInfo.Part1A.Item5F.Q5F2F', 'FormInfo.Part1A.Item5A.TtlEmp', 'MainAddr.Strt1', 'MainAddr.City', 'MainAddr.State', 'MainAddr.PhNb']
for i in tqdm(range(current_page, current_page + 100)):
query = {
"query": {
"bool": {
"must": [
{"query_string": {"query": "FormInfo.Part1A.Item5F.Q5F2C:[1000000 TO *]"}},
]
}
},
"from": str(i * RESULTS_PER_PAGE),
"size": str(RESULTS_PER_PAGE),
"sort": [{"FormInfo.Part1A.Item5F.Q5F2C": {"order": "desc"}}]
}
try:
response = formAdvApi.get_firms(query)
except Exception as e:
logger.error(f"An error occurred: {e}")
exit(1)
firm_advs = response['filings']
temp_df = pd.json_normalize(firm_advs)
temp_df = temp_df[required_columns]
temp_df.columns = ['CRD', 'Name', 'AUM ($)', 'Accounts', 'Employees', 'Street1', 'City', 'State', 'Phone']
temp_df['Employees'] = temp_df['Employees'].fillna(0).astype(int)
temp_df.dropna(subset=['AUM ($)'], inplace=True)
temp_df.to_csv(DATA_FILE_PATH, mode='a', header=False, index=False)
df = pd.read_csv(DATA_FILE_PATH)
df.drop_duplicates(inplace=True)
total_market_size = df['AUM ($)'].sum()
logger.info('Total market size: ${:,.0f}'.format(total_market_size))
state_aum = df.groupby('State')['AUM ($)'].sum()
state_aum_formatted = state_aum.apply(lambda x: f"${x:,.0f}")
logger.info(f'AUM by state:\n{state_aum_formatted}')
fig = px.bar(x=state_aum.index, y=state_aum.values, title='Assets Under Management by State')
fig.update_xaxes(title_text='State')
fig.update_yaxes(title_text='Assets Under Management ($)')
fig.update_layout(yaxis_tickprefix='$')
fig.write_image("wealth_managers.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment