Throwaway script that created all the necessary metadata for uploading ST Report to archive.org
using System; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.IO; | |
using System.Linq; | |
using System.Text.RegularExpressions; | |
namespace ConsoleApp8 | |
{ | |
class Program | |
{ | |
static string[] months = new[] { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" }; | |
static string[] lmonths = months.Select(m => m.ToLower()).ToArray(); | |
static string[] smonths = months.Select(m => m.Substring(0, 3)).ToArray(); | |
static string[] lsmonths = smonths.Select(m => m.ToLower()).ToArray(); | |
static string[] xmonths = months.Select(m => m.Substring(0, Math.Min(m.Length, 4))).ToArray(); | |
static string[] lxmonths = xmonths.Select(m => m.ToLower()).ToArray(); | |
static string anyMonth = String.Join('|', months.Concat(smonths).Concat(xmonths)); | |
static Regex regexDate = new Regex("(" + anyMonth + @")\W*[.]?\W*(\d{1,2})?[,]?\W*(\d{4})", RegexOptions.IgnoreCase); | |
static Regex issueRegex = new Regex(@"Issue\W*(\d+)", RegexOptions.IgnoreCase); | |
static Regex noRegex = new Regex(@"No[.]?\W*(\d+[.]?\d*)", RegexOptions.IgnoreCase); | |
static Regex volRegex = new Regex(@"Vol[.]?\W*(\d+)\W*No[.]?(\d+)", RegexOptions.IgnoreCase); | |
static CultureInfo provider = CultureInfo.InvariantCulture; | |
static void Main(string[] args) | |
{ | |
var files = Directory.GetFiles(@"C:\src\legacy-to-utf8\test", "*.TXT"); | |
var csv = new List<string>(); | |
var defaultEntry = new Entry | |
{ | |
subjects = new[] { "Atari ST", "Atari", "e-zine" }, | |
creator = "STR Publishing Inc." | |
}; | |
var fields = new string[] { "identifier", "file", "description" } | |
.Concat(defaultEntry.subjects.Select((s, i) => "subject[" + i + "]")) | |
.Concat(new[] { "title", "creator", "date" }); | |
csv.Add(MakeCsvRow(fields)); | |
foreach (var file in files) | |
{ | |
var entry = new Entry | |
{ | |
file = Path.GetFileName(file) | |
}; | |
var lines = File.ReadAllText(file).Split('\n'); | |
// Find date | |
for (var i = 0; i < 15; i++) | |
{ | |
var match = regexDate.Match(lines[i]); | |
if (match.Success) | |
{ | |
var year = match.Groups[3].Value; | |
var month = Array.IndexOf(lmonths, match.Groups[1].Value.ToLower()); | |
if (month < 0) | |
month = Array.IndexOf(lsmonths, match.Groups[1].Value.ToLower()); | |
if (month < 0) | |
month = Array.IndexOf(lxmonths, match.Groups[1].Value.ToLower()); | |
entry.date = year + "-" + (month +1).ToString("00") + | |
(match.Groups[2].Success | |
? "-" + int.Parse(match.Groups[2].Value).ToString("00") | |
: ""); | |
break; | |
} | |
} | |
if (entry.date == null) | |
{ | |
for (var i = 1; i < 10; i++) | |
{ | |
var match = regexDate.Match(lines[lines.Length - i]); | |
if (match.Success) | |
{ | |
var year = match.Groups[3].Value; | |
var month = (Array.IndexOf(lmonths, match.Groups[1].Value.ToLower()) +1).ToString("00"); | |
entry.date = year + "-" + month + | |
(match.Groups[2].Success | |
? "-" + int.Parse(match.Groups[2].Value).ToString("00") | |
: ""); | |
break; | |
} | |
} | |
} | |
var id = int.Parse(entry.file.Replace("STR", "").Replace(".TXT", "")); | |
// Find issue | |
var issue = id.ToString(); | |
for (var i = 0; i < 15; i++) | |
{ | |
var match = issueRegex.Match(lines[i]); | |
if (match.Success) | |
{ | |
issue = match.Value; | |
break; | |
} | |
match = noRegex.Match(lines[i]); | |
if (match.Success) | |
{ | |
issue = match.Value; | |
break; | |
} | |
match = volRegex.Match(lines[i]); | |
if (match.Success) | |
{ | |
issue = match.Value; | |
break; | |
} | |
} | |
var published = DateTime.TryParseExact(entry.date, "yyyy-MM-dd", provider, DateTimeStyles.AssumeLocal, out DateTime date) | |
? date.ToString("d MMMM yyyy") | |
: "June 1987"; | |
entry.identifier = "st-report-" + id.ToString(); | |
entry.title = "ST Report " + issue; | |
entry.description = (id < 924 ? "ST Report" : "Silicon Times Report (formerly STReport)") + " " + issue + " published " + published + "."; | |
csv.Add(MakeCsvRow(OverlayEntries(defaultEntry, entry).AsFields())); | |
Console.WriteLine(file); | |
} | |
File.WriteAllText("upload.csv", String.Join("\n", csv)); | |
} | |
private static string MakeCsvRow(IEnumerable<string> values) | |
{ | |
return String.Join(",", values.ToArray()); | |
} | |
struct Entry | |
{ | |
public string identifier; | |
public string file; | |
public string description; | |
public string[] subjects; | |
public string title; | |
public string creator; | |
public string date; | |
public IEnumerable<string> AsFields() | |
{ | |
return new[] { identifier, file, description } | |
.Concat(subjects) | |
.Concat(new[] { title, creator, date }); | |
} | |
} | |
static Entry OverlayEntries(Entry shared, Entry overlay) | |
{ | |
return new Entry | |
{ | |
identifier = overlay.identifier ?? shared.identifier, | |
creator = overlay.creator ?? shared.creator, | |
date = overlay.date ?? shared.date, | |
description = overlay.description ?? shared.description, | |
file = overlay.file ?? shared.file, | |
subjects = shared.subjects.Concat(overlay.subjects ?? new string[] { }).ToArray(), | |
title = overlay.title ?? shared.title | |
}; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment