Created
May 5, 2020 10:07
-
-
Save matthew-brett/1ef815c0de606fdbbfea6a74abb31b24 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Analyzes output from NZ death registry search. | |
Input is some death records from printable (all) version of search on | |
https://bdmhistoricalrecords.dia.govt.nz | |
""" | |
import re | |
from datetime import datetime | |
import calendar | |
FIRST_RE = re.compile(r'^Mary$') | |
BIRTH_YEAR = 1864 | |
JITTER = 5 | |
# Select all, copy of some death records from printable (all) of search on | |
# bdmhistoricalrecords.dia.govt.nz | |
lines = """\ | |
1936/19448 Dunn Mary Ann 62Y | |
1940/19380 Dunn Helen Mary Hamilton 68Y | |
1941/20389 Dunn Mary Jane 81Y | |
1937/20726 Dunn Mary 77Y | |
1937/24283 Dunn Julia Mary 44Y | |
1942/26860 Dunn Mary 86Y | |
1943/22614 Dunn Mary 72Y | |
1939/24680 Dunn Mary Ellen 82Y | |
1943/29192 Dunn Florence Mary 49Y | |
1945/27823 Dunn Mary 78Y | |
1946/17607 Dunn Mary Ann 24H | |
1946/32686 Dunn Mary Stewart 78Y | |
1922/8348 Dunn Mary Allen 80Y | |
1933/3677 Dunn Mary Sophia Louisa 83Y | |
1922/1413 Dunn Mary Josephine 65Y | |
1929/8816 Dunn Mary 89Y | |
1927/2011 Dunn Mary Ellen 82Y | |
1930/992 Dunn Mary 86Y | |
1931/6814 Dunn Mary Elizabeth 64Y | |
1920/168 Dunn Mary 54Y | |
1923/4753 Dunn Mary 75Y | |
1926/6870 Dunn Mary 79Y | |
1926/7114 Dunn Mary 86Y | |
1915/2600 Dunn Beatrice Mary 54Y | |
1923/9775 Dunn Mary Blanche 78Y | |
1909/6801 Dunn Mary Elizabeth 44Y | |
1876/2474 Dunn Mary 68Y | |
1908/7403 Dunn Mary 24Y | |
1900/4082 Dunn Mary Elizabeth 66Y | |
1906/4155 Dunn Mary 77Y | |
1895/2328 Dunn Mary 20Y | |
1903/4159 Dunn Mary 33Y | |
1882/750 Dunn Mary Elizabeth 42Y | |
1901/7171 Dunn Rose Mary 10Y | |
1905/8278 Dunn Mary 58Y | |
1894/4050 Dunn Mary 8D | |
1881/335 Dunn Mary Mabel 15M | |
1890/1904 Dunn Mary 34Y | |
1892/767 Dunn Mary 27Y | |
1890/5410 Dunn Mary Elizabeth 31Y | |
1889/4266 Dunn Mary 54Y | |
1891/5170 Dunn Mary 2M | |
1891/6173 Dunn Mary 40Y | |
1886/3713 Dunn Mary 62Y | |
1885/625 Dunn Mary 56Y | |
1879/2569 Dunn Mary Ann 76Y | |
1879/4012 Dunn Mary Elizabeth 6W | |
1884/2339 Dunn Emma Mary Elizabeth 18Y | |
1884/5153 Dunn Mary 1H | |
1933/19544 Dunn Mary 15Y | |
1916/1599 Dunn Mary Pauline 10Y | |
1906/8279 Dunn Myrtle Mary 5M | |
1936/19849 Dunn Mary Ann 65Y | |
1926/9323 Dunn Mary Allan 50Y | |
1942/31208 Dunn Edith Mary 11M | |
1941/20187 Dunn Mary Caroline 65Y | |
1922/3267 Dunn Mary 57Y | |
1946/34848 Dunn Catherine Mary 64Y | |
1949/31163 Dunn Mary Ann 75Y | |
1873/4795 Dunn Mary 30Y | |
1874/11089 Dunn Mary Agnes Somerville 10M | |
1872/7297 Dunn Isabella Mary 16M | |
1869/5529 Dunn Mary 1H | |
1905/4432 Dunn Mary Somerville 71Y | |
2005/12496 Miles Mary 14 January 1935 | |
1922/244 Dunn Mary 66Y""".strip().splitlines() | |
YEAR_DIVS = dict(H=365.25 * 24, D=365.25, M=12, Y=1) # Units to years conversion. | |
# Select records with chosen first name. | |
# Change age records to estimated year. | |
records = [] | |
for line in lines: | |
reg_no, last, firsts, age = [s.strip() for s in line.split('\t')] | |
if not re.search(FIRST_RE, firsts): | |
continue | |
death_yr, no = reg_no.split('/') | |
try: | |
# Age is date of birth. | |
dt = datetime.strptime(age, '%d %B %Y') | |
birth_yr = (dt.year + | |
dt.timetuple().tm_yday / (365 + calendar.isleap(dt.year))) | |
except ValueError: | |
# Age is something of form: 10H, 5D, 3M, 66Y | |
age_years = float(age[:-1]) / YEAR_DIVS[age[-1]] | |
birth_yr = round(int(death_yr) - age_years, 1) | |
if abs(birth_yr - BIRTH_YEAR) < JITTER: | |
records.append((reg_no, death_yr, age, firsts, last, birth_yr)) | |
for record in sorted(records, key=lambda r: r[-1]): | |
print(*record) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment