Skip to content

Instantly share code, notes, and snippets.

@bot-unit
Last active December 6, 2023 16:34
Show Gist options
  • Save bot-unit/ae757e68fc3616f8c35c8521fad51d83 to your computer and use it in GitHub Desktop.
Save bot-unit/ae757e68fc3616f8c35c8521fad51d83 to your computer and use it in GitHub Desktop.
get ETF holdings from ft
import requests
import bs4
import pandas as pd
headers = {'User-agent': 'Mozilla/5.0'}
def search_etf(ticker, region='UK', is_etf=True):
search_site = f"https://markets.ft.com/data/funds/ajax/search"
params = {
'region': region,
'isETF': 'true' if is_etf else 'false',
'query': ticker
}
r = requests.get(search_site, params=params, headers=headers)
return r.json()
def get_holders(holders_url):
r = requests.get(holders_url, headers=headers)
soup = bs4.BeautifulSoup(r.text, 'html.parser')
top_holding_section = soup.find_all('section', {"class": "mod-main-content"})
if len(top_holding_section) == 0:
return pd.DataFrame()
top_holding_div = top_holding_section[0].find_all('div', {"data-f2-app-id": "mod-top-ten"})
if len(top_holding_div) == 0:
return pd.DataFrame()
top_holding_table = top_holding_div[0].find_all('table', {"class": "mod-ui-table"})
if len(top_holding_table) < 2:
return pd.DataFrame()
table = top_holding_table[1]
heads = table.find_all('th')
heads = [head.text.strip() for head in heads]
heads.insert(1, 'Ticker')
columns = []
rows = table.find_all('tr')
for row in rows:
# print(row)
cols = row.find_all('td')
if cols[0].find('a') is None:
continue
name = cols[0].find('a').text
ticker = cols[0].find('span').text
cols = [ele.text for ele in cols[1:]]
cols.insert(0, name)
cols.insert(1, ticker)
columns.append(cols)
top_holding_table = pd.DataFrame(columns=heads, data=columns)
top_holding_table.drop(columns=['Long allocation'], inplace=True)
return top_holding_table
if __name__ == '__main__':
etfs = search_etf('EUNL.DE')
if isinstance(etfs, dict) and 'data' in etfs and len(etfs['data']) > 0:
etf = etfs['data'][0]
url = etf['url'].replace('~', 'https://markets.ft.com/data')
url = url.replace('summary', 'holdings')
df = get_holders(url)
print(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment