Created
November 7, 2018 22:17
-
-
Save yellis/58508290fa28064a231d1a5433bc22ed to your computer and use it in GitHub Desktop.
LinqPad file to retrieve names from the Izkor.gov.il site, export to csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<Query Kind="Program"> | |
<NuGetReference>CsvHelper</NuGetReference> | |
<NuGetReference Prerelease="true">StackExchange.Utils.Http</NuGetReference> | |
<Namespace>CsvHelper</Namespace> | |
<Namespace>StackExchange.Utils</Namespace> | |
<Namespace>System.Threading.Tasks</Namespace> | |
<Namespace>System.Globalization</Namespace> | |
</Query> | |
async Task Main() | |
{ | |
var start = new DateTime(1873,1,1,0,0,0,DateTimeKind.Utc); // earliest record is from 1873 | |
//var start = new DateTime(2018,1,1,0,0,0,DateTimeKind.Utc); | |
var end = DateTime.UtcNow.Date; | |
var data = await GetResponse(start, end, 500); | |
$"Total Rows Retrieved: {data.Count}".Dump(); | |
using (TextWriter textWriter = new StreamWriter($@"PATH\izkor_{DateTime.UtcNow:yyyy-MM-dd_hh-mm-ss}.csv", false, Encoding.Unicode)) | |
{ | |
var csv = new CsvWriter(textWriter); | |
csv.Configuration.QuoteAllFields = true; | |
csv.WriteRecords(data); | |
} | |
$"File Generated! Last date: {data.Last().death_date_final}".Dump(); | |
} | |
async Task<List<IzkorData>> GetResponse(DateTime start, DateTime end, int batchSize = 500) { | |
var results = new List<IzkorData>(); | |
IzkorResponse response = null; | |
const string baseUrl = "http://izkorapi.mod.gov.il/search"; | |
string nextUrl = null; | |
int round = 0; | |
do | |
{ | |
nextUrl = baseUrl + (response?.metadata?.next ?? $"/date/{start:dd-MM-yyyy}/{end:dd-MM-yyyy}/0/{batchSize}/d"); | |
if (++round % 10 == 0) | |
{ | |
$"{round++}: Total So Far {results.Count}".Dump(); | |
} | |
var rawResponse = await Http.Request(nextUrl).ExpectJson<IzkorResponse>().GetAsync(); | |
if (rawResponse.Success) { | |
response = rawResponse.Data; | |
if (response.data.Any()) | |
{ | |
//$"Adding {response.data.Count} records".Dump(); | |
results.AddRange(response.data); | |
} else { | |
break; | |
} | |
} else { | |
Console.WriteLine("Uh oh"); | |
rawResponse.Dump(); | |
break; | |
} | |
} | |
// their api doesnt let you go over the 10,000th item. | |
// So if this isn't the end, need to do another batch starting from the last date of prev batch | |
// can try to do this in one go here in a future version | |
while((response == null || response.data.Count() > 0) && results.Count < 10000); | |
return results; | |
} | |
public class IzkorResponse { | |
public string instructions { get; set; } | |
public IzkorMetadata metadata { get; set; } | |
public List<IzkorData> data {get; set;} | |
public class IzkorMetadata { | |
public int total { get; set; } | |
public string next { get; set; } | |
} | |
} | |
static CultureInfo he = new CultureInfo("he-IL"); | |
static readonly string df = "d-M-yyyy"; | |
public class IzkorData | |
{ | |
public string birth_city { get; set; } | |
public string gender { get; set; } | |
public string default_search_field { get; set; } | |
public string mother_name { get; set; } | |
public string last_name { get; set; } | |
public string uuid { get; set; } | |
public string death_date { private get; set; } | |
public string death_date_final { | |
// make sure that the dates are consistently parseable by any system | |
get { return $"{DateTime.ParseExact(death_date, df, he):yyyy-MM-dd}"; } | |
} | |
public string unit { get; set; } | |
public string father_name { get; set; } | |
public string nickname { get; set; } | |
public string rank { get; set; } | |
public string legacy_id { get; set; } | |
public string cemetery_name { get; set; } | |
public string death_date_hebrew { get; set; } | |
public string first_name { get; set; } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment