Skip to content

Instantly share code, notes, and snippets.

@BillWagner
Last active July 14, 2020 14:47
Show Gist options
  • Save BillWagner/3b06a69c98a8c4f614b4004025fcb04f to your computer and use it in GitHub Desktop.
Save BillWagner/3b06a69c98a8c4f614b4004025fcb04f to your computer and use it in GitHub Desktop.
Program to update section references in the C# standard (markdown version)
using System;
using System.Linq;
using System.IO;
using System.Threading.Tasks;
using System.Collections.Generic;
using System.Text;
// Consider using the default anchors.
// These anchors are of the form "-1723-arrays-and-the-generic-collection-interfaces"
//
// The current word doc has headers in this form:
// Main standard:
// 9.12.3.4 (no period at the end, except H1 headers, such as 9.
// Annexes:
// D.1 (no period, except H1 headers, such as "Annex D."
// Note the addition of the word "Annex"
// Notes for the updates:
// Facts learned while processing headers:
// 1. Existing section number (or new section)
// 2. New section number.
// Facts learned while processing existing links:
// 1. old section number.
// 2. If it's to a new section number.
namespace StandardAnchorTags
{
public readonly struct SectionMapping
{
public SectionMapping(string oldLink, string newLink, string anchor)
{
OldLinkText = oldLink;
NewLinkText = newLink;
AnchorText = anchor;
}
public string OldLinkText { get; }
public string NewLinkText { get; }
public string AnchorText { get; }
}
public class Program
{
const char sectionReference = '§';
private static readonly Dictionary<string, SectionMapping> ExistingSectionLinkMap = new Dictionary<string, SectionMapping>();
private static readonly Dictionary<string, SectionMapping> UpdatedSectionLinkMap = new Dictionary<string, SectionMapping>();
private static readonly string[] frontMatter = new string[]
{
"foreword.md",
"introduction.md"
};
private static readonly string[] fileList = new string[]
{
"scope.md",
"normative-references.md",
"terms-and-definitions.md",
"acronyms-and-abbreviations.md",
"general-description.md",
"conformance.md",
"lexical-structure.md",
"basic-concepts.md",
"types.md",
"variables.md",
"conversions.md",
"expressions.md",
"statements.md",
"namespaces.md",
"classes.md",
"structs.md",
"arrays.md",
"interfaces.md",
"enums.md",
"delegates.md",
"exceptions.md",
"attributes.md",
"unsafe-code.md"
};
private static readonly string[] annexFiles = new string[]
{
"grammar.md",
"portability-issues.md",
"standard-library.md",
"documentation-comments.md",
"bibliography.md"
};
static async Task Main()
{
using var toc = new StreamWriter("toc.md");
Console.WriteLine("=========================== Front Matter ===================================");
await toc.WriteLineAsync("- [Foreword](foreword.md)");
await toc.WriteLineAsync("- [Introduction](introduction.md)");
Console.WriteLine("=========================== GENERATE HEADERS ===================================");
await GenerateHeaders(fileList, toc, false);
await GenerateHeaders(annexFiles, toc, true);
Console.WriteLine("=========================== FIND REFERENCES ===================================");
await FixReferences(frontMatter.Concat(fileList).Concat(annexFiles));
}
private static async Task FixReferences(IEnumerable<string> allFiles)
{
foreach (var file in allFiles)
{
Console.WriteLine(file);
string? line;
using (var readStream = new StreamReader(file))
{
using var writeStream = new StreamWriter("tmp.md");
while ((line = await readStream.ReadLineAsync()) != null)
{
var updatedLine = ProcessSectionLinks(line);
await writeStream.WriteLineAsync(updatedLine);
}
writeStream.Close();
readStream.Close();
}
File.Move("tmp.md", file, true);
}
}
private static string ProcessSectionLinks(string line)
{
var returnedLine = new StringBuilder();
if (line.Contains(sectionReference))
{
Index index = 0;
var range = FindNextSectionReference(line, index);
while (range.Start.IsFromEnd == false) // found another section reference.
{
// Grab the section text:
string reference = line[range];
// Optionally expand the range for an existing link:
// TODO: check for `#updated`
range = AdjustForExistingLink(line, range);
string linkText = line[range];
bool useUpdated = linkText.Contains("#updated");
var rangeToCopy = new Range(index, range.Start);
// Copy text up to replacement:
returnedLine.Append(line[rangeToCopy]);
// Oddly, we have a few blank references in the standard:
if (reference.Length > 1)
{
var link = (useUpdated) ? UpdatedSectionLinkMap[reference.Substring(1)].AnchorText
: ExistingSectionLinkMap[reference.Substring(1)].AnchorText;
reference = (useUpdated) ? UpdatedSectionLinkMap[reference.Substring(1)].NewLinkText
: ExistingSectionLinkMap[reference.Substring(1)].NewLinkText;
var replacement = $"[{sectionReference}{reference}]({link})";
returnedLine.Append(replacement);
} else
{
returnedLine.Append(reference);
}
index = range.End.Value;
range = FindNextSectionReference(line, index);
}
// Copy remaining text
returnedLine.Append(line[index..^0]);
return returnedLine.ToString();
}
else
{
return line;
}
}
private static Range AdjustForExistingLink(string line, Range range)
{
// If the character before the start of the range isn't the '[' character,
// return => no existing link.
var previous = range.Start.Value - 1;
if (line[previous] != '[') return range;
// Start and the end of the range, look for "](", then ']'.
int endIndex = range.End.Value;
if (line.Substring(endIndex, 2) != "](") throw new InvalidOperationException("Unexpected link text");
endIndex += 2;
while (line[endIndex] != ')') endIndex++;
return new Range(previous, endIndex+1);
}
private static Range FindNextSectionReference(string line, Index index)
{
var startIndex = (index.IsFromEnd) ? line.Length-index.Value : index.Value;
// Find the start:
while ((startIndex < line.Length) && (line[startIndex] != sectionReference)) startIndex++;
if (startIndex == line.Length) return new Range(^0, ^0);
int endIndex = startIndex + 1;
// Find the end:
// Special case the first character, because it could be an Annex:
if (line[endIndex] >= 'A' && line[endIndex] <= 'Z') endIndex++;
// Remaining must be 0..9, or .:
while ((endIndex < line.Length) &&
((line[endIndex] >= '0' && line[endIndex] <= '9') || (line[endIndex] == '.')))
{
endIndex++;
}
// One final special case: If the last character is '.', it's not
// part of the section reference, it's the period at the end of a sentence:
if (line[endIndex - 1] == '.')
endIndex--;
return new Range(startIndex, endIndex);
}
private static async Task GenerateHeaders(IEnumerable<string> files, StreamWriter toc, bool isAnnex)
{
string? line;
int[] headings = new int[6];
foreach (var file in files)
{
Console.WriteLine(file);
using (var stream = new StreamReader(file))
{
using var writeStream = new StreamWriter("tmp.md");
while ((line = await stream.ReadLineAsync()) != null)
{
line = await GenerateHeadersForLine(toc, isAnnex, line, headings, file);
await writeStream.WriteLineAsync(line);
}
}
File.Move("tmp.md", file, true);
}
}
private static async Task<string> GenerateHeadersForLine(StreamWriter toc, bool isAnnex, string line, int[] headings, string file)
{
if (line.StartsWith('#') &&
line.Trim('#').StartsWith(' '))
{
(SectionMapping mapping, string sectionName, int level) = ProcessHnTag(file, line, isAnnex, headings);
if (!string.IsNullOrWhiteSpace(mapping.OldLinkText))
ExistingSectionLinkMap.Add(mapping.OldLinkText, mapping);
UpdatedSectionLinkMap.Add(mapping.NewLinkText, mapping);
// Build the new header line
var atxHeader = new string('#', level);
var header = $"{atxHeader} {(isAnnex && (level == 1) ? "Annex " : "")}{mapping.NewLinkText} {sectionName}";
await toc.WriteLineAsync($"{new string(' ', (level - 1) * 2)}- [{sectionReference}{mapping.NewLinkText}]({mapping.AnchorText}) {sectionName}");
return header;
}
else
{
return line;
}
}
private static (SectionMapping mapping, string header, int level) ProcessHnTag(string file, string line, bool IsAnnex,int[] headings)
{
// Set all headings:
int level = line.Substring(0, line.IndexOf(' ')).Length;
headings[level - 1]++;
for (int index = level; index < headings.Length; index++)
headings[index] = 0;
// Generate the correct clause name:
var alphaNumericSection = IsAnnex
? string.Join('.', headings.Take(level).Select((n, index) => (index == 0) ? ((char)(n + 64)).ToString() : n.ToString()))
: string.Join('.', headings.Take(level).Select(n => n.ToString()));
// Build the correct section name, which may involve removing an existing section number:
var sectionName = line.Substring(level + 1).Trim();
var oldSectionName = "";
bool hasOldSection = HasSectionNumber(sectionName, IsAnnex);
if (hasOldSection)
{
sectionName = sectionName.Replace("Annex ", "");
oldSectionName = sectionName.Substring(0, sectionName.IndexOf(' '));
if (oldSectionName.EndsWith('.'))
oldSectionName = oldSectionName[0..^1];
sectionName = sectionName.Substring(sectionName.IndexOf(' ') + 1);
}
string anchor = $"{alphaNumericSection} {sectionName}"
.Replace(' ', '-')
.Replace(".", "")
.Replace(",", "")
.Replace("`", "")
.Replace("/", "")
.Replace(":", "")
.Replace("?", "")
.Replace("&", "")
.Replace("|", "")
.Replace("!", "")
.Replace("\\<", "")
.Replace("\\>", "")
.ToLower();
var linkDestinationUrl = $"{file}#{anchor}";
var mapping = new SectionMapping(oldSectionName, alphaNumericSection, linkDestinationUrl);
return (mapping, sectionName, level);
}
private static bool HasSectionNumber(string sectionName, bool isAnnex)
{
var maybeAlphaNumericSection = sectionName.Split(' ').First();
if (isAnnex && (maybeAlphaNumericSection == "Annex")) return true;
if (isAnnex)
{
if (maybeAlphaNumericSection[0] < 'A' && maybeAlphaNumericSection[0] > 'E') return false;
return maybeAlphaNumericSection.Skip(1)
.Where(c => c != '.')
.Where(c => (c < '0') || (c > '9'))
.Any() == false;
}
else
{
return maybeAlphaNumericSection
.Where(c => c != '.')
.Where(c => (c < '0') || (c > '9'))
.Any() == false;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment