Created
March 18, 2021 23:08
-
-
Save thomasforth/669cbe55c78ed2a0c190e07268111cb6 to your computer and use it in GitHub Desktop.
Calculating excess death by national origin of name from French open mortality data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CsvHelper; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.IO; | |
using System.Linq; | |
using System.Text; | |
namespace FrenchDeaths | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
Dictionary<string, NamesAndDeath> NamesAndDeaths = new Dictionary<string, NamesAndDeath>(); | |
// get these files from https://www.data.gouv.fr/fr/datasets/fichier-des-personnes-decedees/ | |
List<string> Deaths2019Raw = File.ReadAllLines("Assets/deces-2019.txt").ToList(); | |
List<string> Deaths2020Raw = File.ReadAllLines("Assets/deces-2020.txt").ToList(); | |
foreach (string Deaths in Deaths2019Raw) | |
{ | |
string Name = Deaths.Split("*").First(); | |
if (NamesAndDeaths.ContainsKey(Name) == false) | |
{ | |
NamesAndDeaths.Add(Name, new NamesAndDeath() { FamilyName = Name, Deaths2019 = 0, Deaths2020 = 0, Countries = new List<string>() }); | |
} | |
string country = Deaths.Substring(124, 30).Trim(); | |
if (country != "") | |
{ | |
NamesAndDeaths[Name].Countries.Add(country); | |
} | |
NamesAndDeaths[Name].Deaths2019++; | |
} | |
foreach (string Deaths in Deaths2020Raw) | |
{ | |
string Name = Deaths.Split("*").First(); | |
if (NamesAndDeaths.ContainsKey(Name) == false) | |
{ | |
NamesAndDeaths.Add(Name, new NamesAndDeath() { FamilyName = Name, Deaths2019 = 0, Deaths2020 = 0, Countries = new List<string>() }); | |
} | |
string country = Deaths.Substring(124, 30).Trim(); | |
if (country != "") | |
{ | |
NamesAndDeaths[Name].Countries.Add(country); | |
} | |
NamesAndDeaths[Name].Deaths2020++; | |
} | |
foreach(NamesAndDeath NAD in NamesAndDeaths.Values) | |
{ | |
NAD.CountriesToWrite = string.Join(", ", NAD.Countries.Distinct()); | |
} | |
List<string> AllCountries = NamesAndDeaths.Values.SelectMany(x => x.Countries).Distinct().ToList(); | |
List<CountryAndDeath> CountriesAndDeaths = new List<CountryAndDeath>(); | |
foreach (string Country in AllCountries) | |
{ | |
CountryAndDeath CAD = new CountryAndDeath() | |
{ | |
Country = Country, | |
Deaths2019 = NamesAndDeaths.Values.Where(x => x.Countries.Contains(Country)).Sum(x => x.Deaths2019), | |
Deaths2020 = NamesAndDeaths.Values.Where(x => x.Countries.Contains(Country)).Sum(x => x.Deaths2020) | |
}; | |
if (CAD.Deaths2019 != 0) | |
{ | |
CAD.PercentIncreaseIn2020 = (CAD.Deaths2020 - CAD.Deaths2019) / (double)CAD.Deaths2019; | |
} | |
CountriesAndDeaths.Add(CAD); | |
} | |
CountryAndDeath CADAll = new CountryAndDeath() | |
{ | |
Country = "All", | |
Deaths2019 = NamesAndDeaths.Values.Sum(x => x.Deaths2019), | |
Deaths2020 = NamesAndDeaths.Values.Sum(x => x.Deaths2020) | |
}; | |
CADAll.PercentIncreaseIn2020 = (CADAll.Deaths2020 - CADAll.Deaths2019) / (double)CADAll.Deaths2019; | |
CountriesAndDeaths.Add(CADAll); | |
// this method of writing the CSV is long -- but it create CSVs better suited for opening in Excel and PowerBI | |
if (File.Exists(@"FrenchDeaths.csv")) | |
{ | |
File.Delete(@"FrenchDeaths.csv"); | |
} | |
using (StreamWriter _textWriter = new StreamWriter(File.OpenWrite(@"FrenchDeaths.csv"), new UTF8Encoding(true))) | |
{ | |
using (CsvWriter _csvwriter = new CsvWriter(_textWriter, CultureInfo.CurrentCulture)) | |
{ | |
_csvwriter.WriteRecords(NamesAndDeaths.Values.OrderByDescending(x => x.Deaths2020)); | |
} | |
} | |
// this method of writing the CSV is long -- but it create CSVs better suited for opening in Excel and PowerBI | |
if (File.Exists(@"FrenchDeathsByCountry.csv")) | |
{ | |
File.Delete(@"FrenchDeathsByCountry.csv"); | |
} | |
using (StreamWriter _textWriter = new StreamWriter(File.OpenWrite(@"FrenchByCountry.csv"), new UTF8Encoding(true))) | |
{ | |
using (CsvWriter _csvwriter = new CsvWriter(_textWriter, CultureInfo.CurrentCulture)) | |
{ | |
_csvwriter.WriteRecords(CountriesAndDeaths.OrderByDescending(x => x.PercentIncreaseIn2020)); | |
} | |
} | |
} | |
} | |
class CountryAndDeath | |
{ | |
public string Country { get; set; } | |
public int Deaths2019 { get; set; } | |
public int Deaths2020 { get; set; } | |
public double PercentIncreaseIn2020 { get; set; } | |
} | |
class NamesAndDeath | |
{ | |
public string FamilyName { get; set; } | |
public int Deaths2019 { get; set; } | |
public int Deaths2020 { get; set; } | |
public List<string> Countries { get; set; } | |
public string CountriesToWrite { get; set; } | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment