boyank/genealogy.py

## genealogy.py
import requests
from bs4 import BeautifulSoup
import csv

headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0'}
url = 'http://bdmhistoricalrecords.dia.govt.nz/Search/Search?Path=querySubmit.m%3fReportName%3dDeathSearch%26recordsPP%3d100000'
search_data = {'dsur':'Jones', 'dfirst':'Robert', 'ddate_lower':'01/01/1901', 'current_tab':'tab1', 'switch_tab':'Submit'}
resp = requests.post(url, data=search_data, headers=headers)
soup = BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class':'inner_table_left'})
tbl_header = [th.text for th in table.find_all('th') if th.text]
tbl_data_rows = table.find_all('tr', {'class':lambda x: x.startswith('Cell_Search_Field')})
with open('output.csv', 'w', newline='', encoding='utf-8') as fout:
    wrtr = csv.DictWriter(fout, fieldnames=tbl_header)
    wrtr.writeheader()
    for row in tbl_data_rows:
        data = [td.text.strip() for td in row.find_all('td')][:-1]
        wrtr.writerow(dict(zip(tbl_header, data)))
	import requests
	from bs4 import BeautifulSoup
	import csv

	headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0'}
	url = 'http://bdmhistoricalrecords.dia.govt.nz/Search/Search?Path=querySubmit.m%3fReportName%3dDeathSearch%26recordsPP%3d100000'
	search_data = {'dsur':'Jones', 'dfirst':'Robert', 'ddate_lower':'01/01/1901', 'current_tab':'tab1', 'switch_tab':'Submit'}
	resp = requests.post(url, data=search_data, headers=headers)
	soup = BeautifulSoup(resp.text, 'lxml')
	table = soup.find('table', {'class':'inner_table_left'})
	tbl_header = [th.text for th in table.find_all('th') if th.text]
	tbl_data_rows = table.find_all('tr', {'class':lambda x: x.startswith('Cell_Search_Field')})
	with open('output.csv', 'w', newline='', encoding='utf-8') as fout:
	wrtr = csv.DictWriter(fout, fieldnames=tbl_header)
	wrtr.writeheader()
	for row in tbl_data_rows:
	data = [td.text.strip() for td in row.find_all('td')][:-1]
	wrtr.writerow(dict(zip(tbl_header, data)))