robsalasco/corona.py

## corona.py

from requests import get
from lxml import html, etree
import camelot
import pandas as pd
import numpy as np

url = 'https://www.minsal.cl/nuevo-coronavirus-2019-ncov/casos-confirmados-en-chile-covid-19/'
response = get(url)

file = response.content

#with open(r'/Users/robsalasco/Downloads/covid.html', "r") as f:
#    file = f.read()
source_code = html.fromstring(file)
tree = source_code.xpath('//*[@id="main"]/div[2]/div[5]/div/table[1]')[0]

info = [[row.xpath(".//td/text()")[0], row.xpath(".//td/a/@href")[0]] for row in tree.xpath(".//tr")[1:]]

tables = camelot.read_pdf(info[0][1])

tablespd = tables[0].df

for x in [0,1,4]:
	tablespd[x][tablespd[x]==""] = np.NaN
	tablespd[x] = tablespd[x].fillna(method='ffill')
	tablespd[x] = tablespd[x].str.replace(r'\n', ' ').str.replace(r'  ', ' ')

tablespd.columns = tablespd.iloc[1]
tablespd.drop(tablespd.index[:2	], inplace=True)

pd.set_option('display.max_rows', None)

print(tablespd.to_json(orient='records',force_ascii=False))

	from requests import get
	from lxml import html, etree
	import camelot
	import pandas as pd
	import numpy as np

	url = 'https://www.minsal.cl/nuevo-coronavirus-2019-ncov/casos-confirmados-en-chile-covid-19/'
	response = get(url)

	file = response.content

	#with open(r'/Users/robsalasco/Downloads/covid.html', "r") as f:
	# file = f.read()
	source_code = html.fromstring(file)
	tree = source_code.xpath('//*[@id="main"]/div[2]/div[5]/div/table[1]')[0]

	info = [[row.xpath(".//td/text()")[0], row.xpath(".//td/a/@href")[0]] for row in tree.xpath(".//tr")[1:]]

	tables = camelot.read_pdf(info[0][1])

	tablespd = tables[0].df

	for x in [0,1,4]:
	tablespd[x][tablespd[x]==""] = np.NaN
	tablespd[x] = tablespd[x].fillna(method='ffill')
	tablespd[x] = tablespd[x].str.replace(r'\n', ' ').str.replace(r' ', ' ')

	tablespd.columns = tablespd.iloc[1]
	tablespd.drop(tablespd.index[:2 ], inplace=True)

	pd.set_option('display.max_rows', None)

	print(tablespd.to_json(orient='records',force_ascii=False))