Last active
July 14, 2020 14:47
-
-
Save BillWagner/3b06a69c98a8c4f614b4004025fcb04f to your computer and use it in GitHub Desktop.
Program to update section references in the C# standard (markdown version)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Linq; | |
using System.IO; | |
using System.Threading.Tasks; | |
using System.Collections.Generic; | |
using System.Text; | |
// Consider using the default anchors. | |
// These anchors are of the form "-1723-arrays-and-the-generic-collection-interfaces" | |
// | |
// The current word doc has headers in this form: | |
// Main standard: | |
// 9.12.3.4 (no period at the end, except H1 headers, such as 9. | |
// Annexes: | |
// D.1 (no period, except H1 headers, such as "Annex D." | |
// Note the addition of the word "Annex" | |
// Notes for the updates: | |
// Facts learned while processing headers: | |
// 1. Existing section number (or new section) | |
// 2. New section number. | |
// Facts learned while processing existing links: | |
// 1. old section number. | |
// 2. If it's to a new section number. | |
namespace StandardAnchorTags | |
{ | |
public readonly struct SectionMapping | |
{ | |
public SectionMapping(string oldLink, string newLink, string anchor) | |
{ | |
OldLinkText = oldLink; | |
NewLinkText = newLink; | |
AnchorText = anchor; | |
} | |
public string OldLinkText { get; } | |
public string NewLinkText { get; } | |
public string AnchorText { get; } | |
} | |
public class Program | |
{ | |
const char sectionReference = '§'; | |
private static readonly Dictionary<string, SectionMapping> ExistingSectionLinkMap = new Dictionary<string, SectionMapping>(); | |
private static readonly Dictionary<string, SectionMapping> UpdatedSectionLinkMap = new Dictionary<string, SectionMapping>(); | |
private static readonly string[] frontMatter = new string[] | |
{ | |
"foreword.md", | |
"introduction.md" | |
}; | |
private static readonly string[] fileList = new string[] | |
{ | |
"scope.md", | |
"normative-references.md", | |
"terms-and-definitions.md", | |
"acronyms-and-abbreviations.md", | |
"general-description.md", | |
"conformance.md", | |
"lexical-structure.md", | |
"basic-concepts.md", | |
"types.md", | |
"variables.md", | |
"conversions.md", | |
"expressions.md", | |
"statements.md", | |
"namespaces.md", | |
"classes.md", | |
"structs.md", | |
"arrays.md", | |
"interfaces.md", | |
"enums.md", | |
"delegates.md", | |
"exceptions.md", | |
"attributes.md", | |
"unsafe-code.md" | |
}; | |
private static readonly string[] annexFiles = new string[] | |
{ | |
"grammar.md", | |
"portability-issues.md", | |
"standard-library.md", | |
"documentation-comments.md", | |
"bibliography.md" | |
}; | |
static async Task Main() | |
{ | |
using var toc = new StreamWriter("toc.md"); | |
Console.WriteLine("=========================== Front Matter ==================================="); | |
await toc.WriteLineAsync("- [Foreword](foreword.md)"); | |
await toc.WriteLineAsync("- [Introduction](introduction.md)"); | |
Console.WriteLine("=========================== GENERATE HEADERS ==================================="); | |
await GenerateHeaders(fileList, toc, false); | |
await GenerateHeaders(annexFiles, toc, true); | |
Console.WriteLine("=========================== FIND REFERENCES ==================================="); | |
await FixReferences(frontMatter.Concat(fileList).Concat(annexFiles)); | |
} | |
private static async Task FixReferences(IEnumerable<string> allFiles) | |
{ | |
foreach (var file in allFiles) | |
{ | |
Console.WriteLine(file); | |
string? line; | |
using (var readStream = new StreamReader(file)) | |
{ | |
using var writeStream = new StreamWriter("tmp.md"); | |
while ((line = await readStream.ReadLineAsync()) != null) | |
{ | |
var updatedLine = ProcessSectionLinks(line); | |
await writeStream.WriteLineAsync(updatedLine); | |
} | |
writeStream.Close(); | |
readStream.Close(); | |
} | |
File.Move("tmp.md", file, true); | |
} | |
} | |
private static string ProcessSectionLinks(string line) | |
{ | |
var returnedLine = new StringBuilder(); | |
if (line.Contains(sectionReference)) | |
{ | |
Index index = 0; | |
var range = FindNextSectionReference(line, index); | |
while (range.Start.IsFromEnd == false) // found another section reference. | |
{ | |
// Grab the section text: | |
string reference = line[range]; | |
// Optionally expand the range for an existing link: | |
// TODO: check for `#updated` | |
range = AdjustForExistingLink(line, range); | |
string linkText = line[range]; | |
bool useUpdated = linkText.Contains("#updated"); | |
var rangeToCopy = new Range(index, range.Start); | |
// Copy text up to replacement: | |
returnedLine.Append(line[rangeToCopy]); | |
// Oddly, we have a few blank references in the standard: | |
if (reference.Length > 1) | |
{ | |
var link = (useUpdated) ? UpdatedSectionLinkMap[reference.Substring(1)].AnchorText | |
: ExistingSectionLinkMap[reference.Substring(1)].AnchorText; | |
reference = (useUpdated) ? UpdatedSectionLinkMap[reference.Substring(1)].NewLinkText | |
: ExistingSectionLinkMap[reference.Substring(1)].NewLinkText; | |
var replacement = $"[{sectionReference}{reference}]({link})"; | |
returnedLine.Append(replacement); | |
} else | |
{ | |
returnedLine.Append(reference); | |
} | |
index = range.End.Value; | |
range = FindNextSectionReference(line, index); | |
} | |
// Copy remaining text | |
returnedLine.Append(line[index..^0]); | |
return returnedLine.ToString(); | |
} | |
else | |
{ | |
return line; | |
} | |
} | |
private static Range AdjustForExistingLink(string line, Range range) | |
{ | |
// If the character before the start of the range isn't the '[' character, | |
// return => no existing link. | |
var previous = range.Start.Value - 1; | |
if (line[previous] != '[') return range; | |
// Start and the end of the range, look for "](", then ']'. | |
int endIndex = range.End.Value; | |
if (line.Substring(endIndex, 2) != "](") throw new InvalidOperationException("Unexpected link text"); | |
endIndex += 2; | |
while (line[endIndex] != ')') endIndex++; | |
return new Range(previous, endIndex+1); | |
} | |
private static Range FindNextSectionReference(string line, Index index) | |
{ | |
var startIndex = (index.IsFromEnd) ? line.Length-index.Value : index.Value; | |
// Find the start: | |
while ((startIndex < line.Length) && (line[startIndex] != sectionReference)) startIndex++; | |
if (startIndex == line.Length) return new Range(^0, ^0); | |
int endIndex = startIndex + 1; | |
// Find the end: | |
// Special case the first character, because it could be an Annex: | |
if (line[endIndex] >= 'A' && line[endIndex] <= 'Z') endIndex++; | |
// Remaining must be 0..9, or .: | |
while ((endIndex < line.Length) && | |
((line[endIndex] >= '0' && line[endIndex] <= '9') || (line[endIndex] == '.'))) | |
{ | |
endIndex++; | |
} | |
// One final special case: If the last character is '.', it's not | |
// part of the section reference, it's the period at the end of a sentence: | |
if (line[endIndex - 1] == '.') | |
endIndex--; | |
return new Range(startIndex, endIndex); | |
} | |
private static async Task GenerateHeaders(IEnumerable<string> files, StreamWriter toc, bool isAnnex) | |
{ | |
string? line; | |
int[] headings = new int[6]; | |
foreach (var file in files) | |
{ | |
Console.WriteLine(file); | |
using (var stream = new StreamReader(file)) | |
{ | |
using var writeStream = new StreamWriter("tmp.md"); | |
while ((line = await stream.ReadLineAsync()) != null) | |
{ | |
line = await GenerateHeadersForLine(toc, isAnnex, line, headings, file); | |
await writeStream.WriteLineAsync(line); | |
} | |
} | |
File.Move("tmp.md", file, true); | |
} | |
} | |
private static async Task<string> GenerateHeadersForLine(StreamWriter toc, bool isAnnex, string line, int[] headings, string file) | |
{ | |
if (line.StartsWith('#') && | |
line.Trim('#').StartsWith(' ')) | |
{ | |
(SectionMapping mapping, string sectionName, int level) = ProcessHnTag(file, line, isAnnex, headings); | |
if (!string.IsNullOrWhiteSpace(mapping.OldLinkText)) | |
ExistingSectionLinkMap.Add(mapping.OldLinkText, mapping); | |
UpdatedSectionLinkMap.Add(mapping.NewLinkText, mapping); | |
// Build the new header line | |
var atxHeader = new string('#', level); | |
var header = $"{atxHeader} {(isAnnex && (level == 1) ? "Annex " : "")}{mapping.NewLinkText} {sectionName}"; | |
await toc.WriteLineAsync($"{new string(' ', (level - 1) * 2)}- [{sectionReference}{mapping.NewLinkText}]({mapping.AnchorText}) {sectionName}"); | |
return header; | |
} | |
else | |
{ | |
return line; | |
} | |
} | |
private static (SectionMapping mapping, string header, int level) ProcessHnTag(string file, string line, bool IsAnnex,int[] headings) | |
{ | |
// Set all headings: | |
int level = line.Substring(0, line.IndexOf(' ')).Length; | |
headings[level - 1]++; | |
for (int index = level; index < headings.Length; index++) | |
headings[index] = 0; | |
// Generate the correct clause name: | |
var alphaNumericSection = IsAnnex | |
? string.Join('.', headings.Take(level).Select((n, index) => (index == 0) ? ((char)(n + 64)).ToString() : n.ToString())) | |
: string.Join('.', headings.Take(level).Select(n => n.ToString())); | |
// Build the correct section name, which may involve removing an existing section number: | |
var sectionName = line.Substring(level + 1).Trim(); | |
var oldSectionName = ""; | |
bool hasOldSection = HasSectionNumber(sectionName, IsAnnex); | |
if (hasOldSection) | |
{ | |
sectionName = sectionName.Replace("Annex ", ""); | |
oldSectionName = sectionName.Substring(0, sectionName.IndexOf(' ')); | |
if (oldSectionName.EndsWith('.')) | |
oldSectionName = oldSectionName[0..^1]; | |
sectionName = sectionName.Substring(sectionName.IndexOf(' ') + 1); | |
} | |
string anchor = $"{alphaNumericSection} {sectionName}" | |
.Replace(' ', '-') | |
.Replace(".", "") | |
.Replace(",", "") | |
.Replace("`", "") | |
.Replace("/", "") | |
.Replace(":", "") | |
.Replace("?", "") | |
.Replace("&", "") | |
.Replace("|", "") | |
.Replace("!", "") | |
.Replace("\\<", "") | |
.Replace("\\>", "") | |
.ToLower(); | |
var linkDestinationUrl = $"{file}#{anchor}"; | |
var mapping = new SectionMapping(oldSectionName, alphaNumericSection, linkDestinationUrl); | |
return (mapping, sectionName, level); | |
} | |
private static bool HasSectionNumber(string sectionName, bool isAnnex) | |
{ | |
var maybeAlphaNumericSection = sectionName.Split(' ').First(); | |
if (isAnnex && (maybeAlphaNumericSection == "Annex")) return true; | |
if (isAnnex) | |
{ | |
if (maybeAlphaNumericSection[0] < 'A' && maybeAlphaNumericSection[0] > 'E') return false; | |
return maybeAlphaNumericSection.Skip(1) | |
.Where(c => c != '.') | |
.Where(c => (c < '0') || (c > '9')) | |
.Any() == false; | |
} | |
else | |
{ | |
return maybeAlphaNumericSection | |
.Where(c => c != '.') | |
.Where(c => (c < '0') || (c > '9')) | |
.Any() == false; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment