Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Throwaway script that created all the necessary metadata for uploading ST Report to archive.org
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
namespace ConsoleApp8
{
class Program
{
static string[] months = new[] { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" };
static string[] lmonths = months.Select(m => m.ToLower()).ToArray();
static string[] smonths = months.Select(m => m.Substring(0, 3)).ToArray();
static string[] lsmonths = smonths.Select(m => m.ToLower()).ToArray();
static string[] xmonths = months.Select(m => m.Substring(0, Math.Min(m.Length, 4))).ToArray();
static string[] lxmonths = xmonths.Select(m => m.ToLower()).ToArray();
static string anyMonth = String.Join('|', months.Concat(smonths).Concat(xmonths));
static Regex regexDate = new Regex("(" + anyMonth + @")\W*[.]?\W*(\d{1,2})?[,]?\W*(\d{4})", RegexOptions.IgnoreCase);
static Regex issueRegex = new Regex(@"Issue\W*(\d+)", RegexOptions.IgnoreCase);
static Regex noRegex = new Regex(@"No[.]?\W*(\d+[.]?\d*)", RegexOptions.IgnoreCase);
static Regex volRegex = new Regex(@"Vol[.]?\W*(\d+)\W*No[.]?(\d+)", RegexOptions.IgnoreCase);
static CultureInfo provider = CultureInfo.InvariantCulture;
static void Main(string[] args)
{
var files = Directory.GetFiles(@"C:\src\legacy-to-utf8\test", "*.TXT");
var csv = new List<string>();
var defaultEntry = new Entry
{
subjects = new[] { "Atari ST", "Atari", "e-zine" },
creator = "STR Publishing Inc."
};
var fields = new string[] { "identifier", "file", "description" }
.Concat(defaultEntry.subjects.Select((s, i) => "subject[" + i + "]"))
.Concat(new[] { "title", "creator", "date" });
csv.Add(MakeCsvRow(fields));
foreach (var file in files)
{
var entry = new Entry
{
file = Path.GetFileName(file)
};
var lines = File.ReadAllText(file).Split('\n');
// Find date
for (var i = 0; i < 15; i++)
{
var match = regexDate.Match(lines[i]);
if (match.Success)
{
var year = match.Groups[3].Value;
var month = Array.IndexOf(lmonths, match.Groups[1].Value.ToLower());
if (month < 0)
month = Array.IndexOf(lsmonths, match.Groups[1].Value.ToLower());
if (month < 0)
month = Array.IndexOf(lxmonths, match.Groups[1].Value.ToLower());
entry.date = year + "-" + (month +1).ToString("00") +
(match.Groups[2].Success
? "-" + int.Parse(match.Groups[2].Value).ToString("00")
: "");
break;
}
}
if (entry.date == null)
{
for (var i = 1; i < 10; i++)
{
var match = regexDate.Match(lines[lines.Length - i]);
if (match.Success)
{
var year = match.Groups[3].Value;
var month = (Array.IndexOf(lmonths, match.Groups[1].Value.ToLower()) +1).ToString("00");
entry.date = year + "-" + month +
(match.Groups[2].Success
? "-" + int.Parse(match.Groups[2].Value).ToString("00")
: "");
break;
}
}
}
var id = int.Parse(entry.file.Replace("STR", "").Replace(".TXT", ""));
// Find issue
var issue = id.ToString();
for (var i = 0; i < 15; i++)
{
var match = issueRegex.Match(lines[i]);
if (match.Success)
{
issue = match.Value;
break;
}
match = noRegex.Match(lines[i]);
if (match.Success)
{
issue = match.Value;
break;
}
match = volRegex.Match(lines[i]);
if (match.Success)
{
issue = match.Value;
break;
}
}
var published = DateTime.TryParseExact(entry.date, "yyyy-MM-dd", provider, DateTimeStyles.AssumeLocal, out DateTime date)
? date.ToString("d MMMM yyyy")
: "June 1987";
entry.identifier = "st-report-" + id.ToString();
entry.title = "ST Report " + issue;
entry.description = (id < 924 ? "ST Report" : "Silicon Times Report (formerly STReport)") + " " + issue + " published " + published + ".";
csv.Add(MakeCsvRow(OverlayEntries(defaultEntry, entry).AsFields()));
Console.WriteLine(file);
}
File.WriteAllText("upload.csv", String.Join("\n", csv));
}
private static string MakeCsvRow(IEnumerable<string> values)
{
return String.Join(",", values.ToArray());
}
struct Entry
{
public string identifier;
public string file;
public string description;
public string[] subjects;
public string title;
public string creator;
public string date;
public IEnumerable<string> AsFields()
{
return new[] { identifier, file, description }
.Concat(subjects)
.Concat(new[] { title, creator, date });
}
}
static Entry OverlayEntries(Entry shared, Entry overlay)
{
return new Entry
{
identifier = overlay.identifier ?? shared.identifier,
creator = overlay.creator ?? shared.creator,
date = overlay.date ?? shared.date,
description = overlay.description ?? shared.description,
file = overlay.file ?? shared.file,
subjects = shared.subjects.Concat(overlay.subjects ?? new string[] { }).ToArray(),
title = overlay.title ?? shared.title
};
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment