Skip to content

Instantly share code, notes, and snippets.

@guardrex
Last active May 25, 2017 22:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guardrex/29bc96a0f5cd0f3e7b2c9cc4036ab4f5 to your computer and use it in GitHub Desktop.
Save guardrex/29bc96a0f5cd0f3e7b2c9cc4036ab4f5 to your computer and use it in GitHub Desktop.
Remove uids from metadata docs (Script 1)
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using static System.IO.SearchOption;
namespace RemoveDuplicateUids
{
class Program
{
static void Main()
{
string path = @"C:\Users\XXXXXXXXXX\Documents\GitHub\docs-1\add\metadata\";
var files = Directory.EnumerateFiles(path, "*.md", AllDirectories);
Regex regExp = new Regex(@"---(.*?)---", RegexOptions.Compiled | RegexOptions.Singleline);
var countFiles = 0;
var countDrops = 0;
foreach (var file in files)
{
var fileText = File.ReadAllText(file);
var captures = regExp.Matches(fileText);
var uniqueCaptures = captures
.OfType<Match>()
.Select(m => m.Value)
.OrderByDescending(i => i.Length)
.Distinct(new CaptureComparer());
var capturesCount = captures.Count();
var uniqueCapturesCount = uniqueCaptures.Count();
if (capturesCount > uniqueCapturesCount)
{
countFiles++;
countDrops += capturesCount - uniqueCapturesCount;
Console.WriteLine($"{file.Substring(path.Length)} Captures: {capturesCount} Unique Captures: {uniqueCapturesCount}");
var removeLastReturn = false;
using (System.IO.StreamWriter outfile = new System.IO.StreamWriter(file))
{
int i;
for (i = 0; i < captures.Count() - 1; i++)
{
if (uniqueCaptures.Contains(captures[i].Value))
{
outfile.WriteLine(captures[i] + Environment.NewLine);
}
}
if (uniqueCaptures.Contains(captures[i].Value))
{
outfile.WriteLine(captures[i]);
}
else
{
removeLastReturn = true;
}
}
if (removeLastReturn)
{
var fileTextForRewrite = File.ReadAllText(file);
using (System.IO.StreamWriter outfile = new System.IO.StreamWriter(file))
{
outfile.Write(fileTextForRewrite.Substring(0, fileTextForRewrite.Length - 2));
}
}
}
}
Console.WriteLine();
Console.WriteLine($"Files updated: {countFiles} Total drops: {countDrops}");
}
}
class CaptureComparer : IEqualityComparer<string>
{
Regex _regExp = new Regex(@"uid:(.*?)\n", RegexOptions.Compiled);
public bool Equals(string x, string y)
{
return (_regExp.Match(x).ToString() == _regExp.Match(y).ToString());
}
public int GetHashCode(string capture)
{
return _regExp.Match(capture).ToString().GetHashCode();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment