Skip to content

Instantly share code, notes, and snippets.

@ayende
Created October 11, 2019 18:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ayende/6bf8ccfbf4d991472bbb20d773cd0f22 to your computer and use it in GitHub Desktop.
Save ayende/6bf8ccfbf4d991472bbb20d773cd0f22 to your computer and use it in GitHub Desktop.
static void Analyze(string dir)
{
var spearators = new char[] { ' ', '\t', ',', '!', '\r', '(', ')', '?', '-', '"', '\n', '/' };
var trim = new char[] { '.', };
var blockingCollection = new BlockingCollection<string>(2048);
var tasks = new List<Task>();
var dics = new ConcurrentQueue<Dictionary<string, HashSet<string>>>();
for (int i = 0; i < 16; i++)
{
var task = Task.Run(() =>
{
while (blockingCollection.IsCompleted == false)
{
using var stream = File.OpenRead(blockingCollection.Take());
var parser = new MimeParser(stream, MimeFormat.Entity);
while (parser.IsEndOfStream == false)
{
var entity = parser.ParseMessage();
var dic = new Dictionary<string, HashSet<string>>
{
["Id"] = new HashSet<string> { entity.MessageId.ToLower() },
["Date"] = new HashSet<string> { entity.Date.ToString("r") },
["From"] = entity.From.Select(x => x.ToString().ToLower()).ToHashSet(),
["To"] = entity.To.Select(x => x.ToString().ToLower()).ToHashSet(),
["Body"] = entity.GetTextBody(TextFormat.Plain)
.Split(spearators, StringSplitOptions.RemoveEmptyEntries)
.Select(x => x.Trim(trim).ToLower())
.Where(x =>
{
if (x.Length > 3)
return true;
if (x.Length == 0)
return false;
return char.IsDigit(x[0]);
})
.ToHashSet()
};
dics.Enqueue(dic);
}
}
});
tasks.Add(task);
}
var so = Stopwatch.StartNew();
tasks.Add(Task.Run(() =>
{
foreach (var file in Directory.EnumerateFiles(dir, "*", SearchOption.AllDirectories))
{
blockingCollection.Add(file);
}
blockingCollection.CompleteAdding();
}));
var final = Task.WhenAll(tasks.ToArray());
// do stuff with it.
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment