Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Parse html tables using pandas
import pandas as pd
import requests
from lxml import etree
import urllib
url = 'https://www.sec.gov/Archives/edgar/data/1090872/000156459018001685/a-def14a_20180321.htm#BENEFICIAL_OWNERSHIP'
html = requests.get(url).content
html = etree.HTML(html)
tr_nodes = html.xpath('.//table[contains(.,"Name and Address of Beneficial Owner")]')
if len(tr_nodes)>0:
print("Found matching table")
table_data =etree.tostring(tr_nodes[0])
df = pd.read_html(table_data)
df[0].to_csv('table.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment