Skip to content

Instantly share code, notes, and snippets.

@damieng
Created February 2, 2021 20:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save damieng/9930a497fe6496f8047cb57ad483d278 to your computer and use it in GitHub Desktop.
Save damieng/9930a497fe6496f8047cb57ad483d278 to your computer and use it in GitHub Desktop.
Throwaway script that created all the necessary metadata for uploading ST Report to archive.org
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
namespace ConsoleApp8
{
class Program
{
static string[] months = new[] { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" };
static string[] lmonths = months.Select(m => m.ToLower()).ToArray();
static string[] smonths = months.Select(m => m.Substring(0, 3)).ToArray();
static string[] lsmonths = smonths.Select(m => m.ToLower()).ToArray();
static string[] xmonths = months.Select(m => m.Substring(0, Math.Min(m.Length, 4))).ToArray();
static string[] lxmonths = xmonths.Select(m => m.ToLower()).ToArray();
static string anyMonth = String.Join('|', months.Concat(smonths).Concat(xmonths));
static Regex regexDate = new Regex("(" + anyMonth + @")\W*[.]?\W*(\d{1,2})?[,]?\W*(\d{4})", RegexOptions.IgnoreCase);
static Regex issueRegex = new Regex(@"Issue\W*(\d+)", RegexOptions.IgnoreCase);
static Regex noRegex = new Regex(@"No[.]?\W*(\d+[.]?\d*)", RegexOptions.IgnoreCase);
static Regex volRegex = new Regex(@"Vol[.]?\W*(\d+)\W*No[.]?(\d+)", RegexOptions.IgnoreCase);
static CultureInfo provider = CultureInfo.InvariantCulture;
static void Main(string[] args)
{
var files = Directory.GetFiles(@"C:\src\legacy-to-utf8\test", "*.TXT");
var csv = new List<string>();
var defaultEntry = new Entry
{
subjects = new[] { "Atari ST", "Atari", "e-zine" },
creator = "STR Publishing Inc."
};
var fields = new string[] { "identifier", "file", "description" }
.Concat(defaultEntry.subjects.Select((s, i) => "subject[" + i + "]"))
.Concat(new[] { "title", "creator", "date" });
csv.Add(MakeCsvRow(fields));
foreach (var file in files)
{
var entry = new Entry
{
file = Path.GetFileName(file)
};
var lines = File.ReadAllText(file).Split('\n');
// Find date
for (var i = 0; i < 15; i++)
{
var match = regexDate.Match(lines[i]);
if (match.Success)
{
var year = match.Groups[3].Value;
var month = Array.IndexOf(lmonths, match.Groups[1].Value.ToLower());
if (month < 0)
month = Array.IndexOf(lsmonths, match.Groups[1].Value.ToLower());
if (month < 0)
month = Array.IndexOf(lxmonths, match.Groups[1].Value.ToLower());
entry.date = year + "-" + (month +1).ToString("00") +
(match.Groups[2].Success
? "-" + int.Parse(match.Groups[2].Value).ToString("00")
: "");
break;
}
}
if (entry.date == null)
{
for (var i = 1; i < 10; i++)
{
var match = regexDate.Match(lines[lines.Length - i]);
if (match.Success)
{
var year = match.Groups[3].Value;
var month = (Array.IndexOf(lmonths, match.Groups[1].Value.ToLower()) +1).ToString("00");
entry.date = year + "-" + month +
(match.Groups[2].Success
? "-" + int.Parse(match.Groups[2].Value).ToString("00")
: "");
break;
}
}
}
var id = int.Parse(entry.file.Replace("STR", "").Replace(".TXT", ""));
// Find issue
var issue = id.ToString();
for (var i = 0; i < 15; i++)
{
var match = issueRegex.Match(lines[i]);
if (match.Success)
{
issue = match.Value;
break;
}
match = noRegex.Match(lines[i]);
if (match.Success)
{
issue = match.Value;
break;
}
match = volRegex.Match(lines[i]);
if (match.Success)
{
issue = match.Value;
break;
}
}
var published = DateTime.TryParseExact(entry.date, "yyyy-MM-dd", provider, DateTimeStyles.AssumeLocal, out DateTime date)
? date.ToString("d MMMM yyyy")
: "June 1987";
entry.identifier = "st-report-" + id.ToString();
entry.title = "ST Report " + issue;
entry.description = (id < 924 ? "ST Report" : "Silicon Times Report (formerly STReport)") + " " + issue + " published " + published + ".";
csv.Add(MakeCsvRow(OverlayEntries(defaultEntry, entry).AsFields()));
Console.WriteLine(file);
}
File.WriteAllText("upload.csv", String.Join("\n", csv));
}
private static string MakeCsvRow(IEnumerable<string> values)
{
return String.Join(",", values.ToArray());
}
struct Entry
{
public string identifier;
public string file;
public string description;
public string[] subjects;
public string title;
public string creator;
public string date;
public IEnumerable<string> AsFields()
{
return new[] { identifier, file, description }
.Concat(subjects)
.Concat(new[] { title, creator, date });
}
}
static Entry OverlayEntries(Entry shared, Entry overlay)
{
return new Entry
{
identifier = overlay.identifier ?? shared.identifier,
creator = overlay.creator ?? shared.creator,
date = overlay.date ?? shared.date,
description = overlay.description ?? shared.description,
file = overlay.file ?? shared.file,
subjects = shared.subjects.Concat(overlay.subjects ?? new string[] { }).ToArray(),
title = overlay.title ?? shared.title
};
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment