Created
February 2, 2021 20:12
-
-
Save damieng/9930a497fe6496f8047cb57ad483d278 to your computer and use it in GitHub Desktop.
Throwaway script that created all the necessary metadata for uploading ST Report to archive.org
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.IO; | |
using System.Linq; | |
using System.Text.RegularExpressions; | |
namespace ConsoleApp8 | |
{ | |
class Program | |
{ | |
static string[] months = new[] { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" }; | |
static string[] lmonths = months.Select(m => m.ToLower()).ToArray(); | |
static string[] smonths = months.Select(m => m.Substring(0, 3)).ToArray(); | |
static string[] lsmonths = smonths.Select(m => m.ToLower()).ToArray(); | |
static string[] xmonths = months.Select(m => m.Substring(0, Math.Min(m.Length, 4))).ToArray(); | |
static string[] lxmonths = xmonths.Select(m => m.ToLower()).ToArray(); | |
static string anyMonth = String.Join('|', months.Concat(smonths).Concat(xmonths)); | |
static Regex regexDate = new Regex("(" + anyMonth + @")\W*[.]?\W*(\d{1,2})?[,]?\W*(\d{4})", RegexOptions.IgnoreCase); | |
static Regex issueRegex = new Regex(@"Issue\W*(\d+)", RegexOptions.IgnoreCase); | |
static Regex noRegex = new Regex(@"No[.]?\W*(\d+[.]?\d*)", RegexOptions.IgnoreCase); | |
static Regex volRegex = new Regex(@"Vol[.]?\W*(\d+)\W*No[.]?(\d+)", RegexOptions.IgnoreCase); | |
static CultureInfo provider = CultureInfo.InvariantCulture; | |
static void Main(string[] args) | |
{ | |
var files = Directory.GetFiles(@"C:\src\legacy-to-utf8\test", "*.TXT"); | |
var csv = new List<string>(); | |
var defaultEntry = new Entry | |
{ | |
subjects = new[] { "Atari ST", "Atari", "e-zine" }, | |
creator = "STR Publishing Inc." | |
}; | |
var fields = new string[] { "identifier", "file", "description" } | |
.Concat(defaultEntry.subjects.Select((s, i) => "subject[" + i + "]")) | |
.Concat(new[] { "title", "creator", "date" }); | |
csv.Add(MakeCsvRow(fields)); | |
foreach (var file in files) | |
{ | |
var entry = new Entry | |
{ | |
file = Path.GetFileName(file) | |
}; | |
var lines = File.ReadAllText(file).Split('\n'); | |
// Find date | |
for (var i = 0; i < 15; i++) | |
{ | |
var match = regexDate.Match(lines[i]); | |
if (match.Success) | |
{ | |
var year = match.Groups[3].Value; | |
var month = Array.IndexOf(lmonths, match.Groups[1].Value.ToLower()); | |
if (month < 0) | |
month = Array.IndexOf(lsmonths, match.Groups[1].Value.ToLower()); | |
if (month < 0) | |
month = Array.IndexOf(lxmonths, match.Groups[1].Value.ToLower()); | |
entry.date = year + "-" + (month +1).ToString("00") + | |
(match.Groups[2].Success | |
? "-" + int.Parse(match.Groups[2].Value).ToString("00") | |
: ""); | |
break; | |
} | |
} | |
if (entry.date == null) | |
{ | |
for (var i = 1; i < 10; i++) | |
{ | |
var match = regexDate.Match(lines[lines.Length - i]); | |
if (match.Success) | |
{ | |
var year = match.Groups[3].Value; | |
var month = (Array.IndexOf(lmonths, match.Groups[1].Value.ToLower()) +1).ToString("00"); | |
entry.date = year + "-" + month + | |
(match.Groups[2].Success | |
? "-" + int.Parse(match.Groups[2].Value).ToString("00") | |
: ""); | |
break; | |
} | |
} | |
} | |
var id = int.Parse(entry.file.Replace("STR", "").Replace(".TXT", "")); | |
// Find issue | |
var issue = id.ToString(); | |
for (var i = 0; i < 15; i++) | |
{ | |
var match = issueRegex.Match(lines[i]); | |
if (match.Success) | |
{ | |
issue = match.Value; | |
break; | |
} | |
match = noRegex.Match(lines[i]); | |
if (match.Success) | |
{ | |
issue = match.Value; | |
break; | |
} | |
match = volRegex.Match(lines[i]); | |
if (match.Success) | |
{ | |
issue = match.Value; | |
break; | |
} | |
} | |
var published = DateTime.TryParseExact(entry.date, "yyyy-MM-dd", provider, DateTimeStyles.AssumeLocal, out DateTime date) | |
? date.ToString("d MMMM yyyy") | |
: "June 1987"; | |
entry.identifier = "st-report-" + id.ToString(); | |
entry.title = "ST Report " + issue; | |
entry.description = (id < 924 ? "ST Report" : "Silicon Times Report (formerly STReport)") + " " + issue + " published " + published + "."; | |
csv.Add(MakeCsvRow(OverlayEntries(defaultEntry, entry).AsFields())); | |
Console.WriteLine(file); | |
} | |
File.WriteAllText("upload.csv", String.Join("\n", csv)); | |
} | |
private static string MakeCsvRow(IEnumerable<string> values) | |
{ | |
return String.Join(",", values.ToArray()); | |
} | |
struct Entry | |
{ | |
public string identifier; | |
public string file; | |
public string description; | |
public string[] subjects; | |
public string title; | |
public string creator; | |
public string date; | |
public IEnumerable<string> AsFields() | |
{ | |
return new[] { identifier, file, description } | |
.Concat(subjects) | |
.Concat(new[] { title, creator, date }); | |
} | |
} | |
static Entry OverlayEntries(Entry shared, Entry overlay) | |
{ | |
return new Entry | |
{ | |
identifier = overlay.identifier ?? shared.identifier, | |
creator = overlay.creator ?? shared.creator, | |
date = overlay.date ?? shared.date, | |
description = overlay.description ?? shared.description, | |
file = overlay.file ?? shared.file, | |
subjects = shared.subjects.Concat(overlay.subjects ?? new string[] { }).ToArray(), | |
title = overlay.title ?? shared.title | |
}; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment