Created
November 17, 2016 23:23
-
-
Save anonymous/83cfd3363a47fbf16d18dd25f94a1c99 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Diagnostics; | |
using System.IO; | |
using System.IO.MemoryMappedFiles; | |
using System.Runtime.CompilerServices; | |
using System.Threading.Tasks; | |
namespace TryOuts.AyendeParsingChallenge3 | |
{ | |
public static class Alternative | |
{ | |
const long kRecordSize = 50; | |
/// <summary> | |
/// For parsing and output of records. | |
/// </summary> | |
private static unsafe class Record | |
{ | |
private static readonly uint[] CumulativeMonthDaysRegular = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; | |
private static readonly uint[] CumulativeMonthDaysLeap = { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 }; | |
public static uint Parse(byte* record, out uint duration) | |
{ | |
// Duration parsing | |
var dur = 0; | |
var entryDt = record; | |
var exitDt = record + 20; | |
// Date difference part only if not same day. | |
if (*(long*)entryDt != *(long*)exitDt) | |
dur = ParseDateDif(entryDt, exitDt); | |
else if (*(ushort*)(entryDt + 8) != *(ushort*)(exitDt + 8)) | |
dur = (24 * 60 * 60) * TwoDigitDif(exitDt + 8, entryDt + 8); | |
// Add time difference. | |
duration = (uint)(dur + | |
TwoDigitDif(exitDt + 11, entryDt + 11) * 60 * 60 + | |
TwoDigitDif(exitDt + 14, entryDt + 14) * 60 + | |
TwoDigitDif(exitDt + 17, entryDt + 17)); | |
// Id. | |
var idByteVals = *(long*)(record + 40) - 0x3030303030303030; // parallel '0' subtraction for all bytes. | |
var bp = (byte*)&idByteVals; | |
return (uint)(bp[0] * 10000000 + bp[1] * 1000000 + bp[2] * 100000 + bp[3] * 10000 + bp[4] * 1000 + bp[5] * 100 + bp[6] * 10 + bp[7]); | |
} | |
public static int WriteTo(uint id, uint duration, byte[] buffer) | |
{ | |
// Write the id first | |
var pos = 8; | |
buffer[pos] = (byte)' '; | |
while (id != 0) | |
{ | |
buffer[--pos] = (byte)((id % 10) + '0'); | |
id /= 10; | |
} | |
while (pos > 0) | |
buffer[--pos] = (byte)'0'; | |
// decode duration into parts before writing. | |
var secs = duration % 60; | |
duration /= 60; | |
var mins = duration % 60; | |
duration /= 60; | |
var hrs = duration % 24; | |
var days = (duration / 24); | |
// write duration as time stamp, skip writing days if 0. | |
pos = 9; | |
if (days >= 10) | |
{ | |
var ndx = buffer.Length - 1; | |
buffer[ndx] = (byte)'.'; | |
while (days > 0) | |
{ | |
buffer[--ndx] = (byte)((days % 10) + '0'); | |
days /= 10; | |
} | |
var len = buffer.Length - ndx; | |
Array.Copy(buffer, ndx, buffer, pos, len); | |
pos += len; | |
} | |
else if (days > 0) | |
{ | |
buffer[pos++] = (byte)('0' + days); | |
buffer[pos++] = (byte)'.'; | |
} | |
// write hours, mins, secs | |
WriteTwoDigitInt(hrs, pos, buffer); | |
buffer[pos + 2] = (byte)':'; | |
WriteTwoDigitInt(mins, pos + 3, buffer); | |
buffer[pos + 5] = (byte)':'; | |
WriteTwoDigitInt(secs, pos + 6, buffer); | |
pos += 8; | |
// write record terminator | |
buffer[pos++] = (byte)'\r'; | |
buffer[pos++] = (byte)'\n'; | |
return pos; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static void WriteTwoDigitInt(uint val, int offset, byte[] buf) | |
{ | |
buf[offset] = (byte)((val / 10) + '0'); | |
buf[offset + 1] = (byte)((val % 10) + '0'); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static int TwoDigitDif(byte* later, byte* earlier) | |
{ | |
return (later[0] - earlier[0]) * 10 + (later[1] - earlier[1]); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsLeapYear(int year) | |
{ | |
return (year % 4) == 0 && (((year % 100) != 0) || ((year % 400) == 0)); | |
} | |
private static int ParseDateDif(byte* entryDt, byte* exitDt) | |
{ | |
const long kSubtractionMask = 0x0030300030303030; // subtract '0' from all non-dash digits. | |
var entry = *(long*)entryDt - kSubtractionMask; | |
var exit = *(long*)exitDt - kSubtractionMask; | |
var daysDif = TwoDigitDif(exitDt + 8, entryDt + 8); | |
var entrybp = (byte*)&entry; | |
var exitbp = (byte*)&exit; | |
var entryYr = entrybp[0] * 1000 + entrybp[1] * 100 + entrybp[2] * 10 + entrybp[3]; | |
var entryPrevMo = entrybp[5] * 10 + entrybp[6] - 1; | |
var exitYr = exitbp[0] * 1000 + exitbp[1] * 100 + exitbp[2] * 10 + exitbp[3]; | |
var exitPrevMo = exitbp[5] * 10 + exitbp[6] - 1; | |
var entryPrevYr = entryYr - 1; | |
var entryYrMoDays = (long) | |
(entryPrevYr * 365 + entryPrevYr / 4 - entryPrevYr / 100 + entryPrevYr / 400) + | |
(IsLeapYear(entryYr) ? CumulativeMonthDaysLeap[entryPrevMo] : CumulativeMonthDaysRegular[entryPrevMo]); | |
var exitPrevYs = exitYr - 1; | |
var exitYrMoDays = (long) | |
(exitPrevYs * 365 + exitPrevYs / 4 - exitPrevYs / 100 + exitPrevYs / 400) + | |
(IsLeapYear(exitYr) ? CumulativeMonthDaysLeap[exitPrevMo] : CumulativeMonthDaysRegular[exitPrevMo]); | |
return (int)(((exitYrMoDays - entryYrMoDays) + daysDif) * (24 * 60 * 60)); | |
} | |
} | |
/// <summary> | |
/// Simple 2-level trie. | |
/// </summary> | |
private class EntryMap | |
{ | |
private const uint kMaxId = 99999999; | |
private const uint kDirSize = (kMaxId >> 15) + 1; // 1525 + 1 | |
private const uint kVectSize = (1 << 15); | |
private const uint kVectMask = kVectSize - 1; | |
private readonly uint[][] _entriesDir = new uint[kDirSize][]; | |
private readonly byte[] _outputBuf = new byte[32]; // we should never need more than 26. | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public void Add(uint id, uint secondsDuration) | |
{ | |
Debug.Assert(id <= kMaxId); | |
var idx1 = id >> 15; // we use the high 12 bits for the first trie level. | |
var idx2 = id & kVectMask; // remaining 15 bits for the second trie level. | |
var es = _entriesDir[idx1]; | |
if (es == null) | |
_entriesDir[idx1] = es = new uint[kVectSize]; | |
es[idx2] += secondsDuration; | |
} | |
public void MergeWith(EntryMap other) | |
{ | |
for (var i = 0; i < _entriesDir.Length; ++i) | |
{ | |
var thatEs = other._entriesDir[i]; | |
if (thatEs != null) | |
{ | |
var thisEs = _entriesDir[i]; | |
if (thisEs == null) | |
_entriesDir[i] = thatEs; | |
else | |
{ | |
for (var j = 0; j < thisEs.Length; ++j) | |
thisEs[j] += thatEs[j]; | |
} | |
} | |
} | |
} | |
public void WriteTo(Stream output) | |
{ | |
for (var i = 0; i < _entriesDir.Length; ++i) | |
{ | |
var es = _entriesDir[i]; | |
if (es != null) | |
{ | |
for (var j = 0; j < es.Length; ++j) | |
{ | |
if (es[j] > 0) | |
output.Write(_outputBuf, 0, Record.WriteTo((uint)(i << 15 | j), es[j], _outputBuf)); | |
} | |
} | |
} | |
} | |
} | |
/// <summary> | |
/// Simple 3-level trie. | |
/// </summary> | |
private class EntryMap2 | |
{ | |
private const uint kMaxId = 99999999; | |
private const uint kDirSize = (kMaxId >> 16) + 1; // 1525 + 1 | |
private const uint kVectMask = (1 << 16) - 1; | |
private const uint kLevel2Size = (kVectMask >> 8) + 1; | |
private const uint kLevel3Size = (1 << 8); | |
private const uint kLevel3Mask = kLevel3Size - 1; | |
private readonly uint[][][] _entriesDir = new uint[kDirSize][][]; | |
private readonly byte[] _outputBuf = new byte[32]; // we should never need more than 26. | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public void Add(uint id, uint secondsDuration) | |
{ | |
Debug.Assert(id <= kMaxId); | |
var idx1 = id >> 16; // we use the high 11 bits for the first trie level. 224, 255 | |
var idx_rem = id & kVectMask; | |
var idx2 = idx_rem >> 8; | |
var idx3 = idx_rem & kLevel3Mask; | |
var l2 = _entriesDir[idx1]; | |
var l3 = default(uint[]); | |
if (l2 == null) | |
{ | |
_entriesDir[idx1] = l2 = new uint[kLevel2Size][]; | |
l2[idx2] = l3 = new uint[kLevel3Size]; | |
} | |
else | |
{ | |
l3 = l2[idx2] ?? (l2[idx2] = new uint[kLevel3Size]); | |
} | |
l3[idx3] += secondsDuration; | |
} | |
public void MergeWith(EntryMap2 other) | |
{ | |
for (var i = 0; i < _entriesDir.Length; ++i) | |
{ | |
var thatL2 = other._entriesDir[i]; | |
if (thatL2 != null) | |
{ | |
var thisL2 = _entriesDir[i]; | |
if (thisL2 == null) | |
_entriesDir[i] = thatL2; | |
else | |
{ | |
for (var j = 0; j < thisL2.Length; ++j) | |
{ | |
var thatL3 = thatL2[j]; | |
if (thatL3 != null) | |
{ | |
var thisL3 = thisL2[j]; | |
if (thisL3 == null) | |
thisL2[j] = thatL2[j]; | |
else | |
{ | |
for (var k = 0; k < thisL3.Length; ++k) | |
thisL3[k] += thatL3[k]; | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
public void WriteTo(Stream output) | |
{ | |
for (var i = 0; i < _entriesDir.Length; ++i) | |
{ | |
var l2 = _entriesDir[i]; | |
if (l2 != null) | |
{ | |
for (var j = 0; j < l2.Length; ++j) | |
{ | |
var l3 = l2[j]; | |
if (l3 != null) | |
{ | |
for (var k = 0; k < l3.Length; ++k) | |
{ | |
if (l3[k] > 0) | |
output.Write(_outputBuf, 0, Record.WriteTo((uint)(i << 16 | j << 8 | k), l3[k], _outputBuf)); | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
public static void Run(string inputFileName, string outputFileName) | |
{ | |
//var entries = new EntryMap(); | |
var entries = new EntryMap2(); // uncomment/comment switch with above. | |
var size = new FileInfo(inputFileName).Length; | |
using (var mmf = MemoryMappedFile.CreateFromFile(inputFileName, FileMode.Open)) | |
using (var accessor = mmf.CreateViewAccessor(0, size, MemoryMappedFileAccess.Read)) | |
{ | |
unsafe | |
{ | |
var start = default(byte*); | |
accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref start); | |
var end = start + (size / kRecordSize) * kRecordSize; | |
ReadFile(start, end, entries); | |
accessor.SafeMemoryMappedViewHandle.ReleasePointer(); | |
} | |
} | |
using (var stream = File.Create(outputFileName, 32 * 1024)) | |
entries.WriteTo(stream); | |
} | |
public static void RunParallel(string inputFileName, string outputFileName) | |
{ | |
var nThreads = Math.Max(1, Environment.ProcessorCount - 1); | |
var size = new FileInfo(inputFileName).Length; | |
var tasks = new Task<EntryMap>[nThreads]; | |
var entries = default(EntryMap); | |
using (var mmf = MemoryMappedFile.CreateFromFile(inputFileName, FileMode.Open)) | |
using (var accessor = mmf.CreateViewAccessor(0, size, MemoryMappedFileAccess.Read)) | |
{ | |
unsafe | |
{ | |
var start = default(byte*); | |
accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref start); | |
var nRecords = (size / kRecordSize); | |
var end = start + nRecords * kRecordSize; | |
var nRecordsPerThread = nRecords / nThreads; | |
while (nRecordsPerThread < 1024 && nThreads > 1) | |
nRecordsPerThread = nRecords / --nThreads; | |
var blockPerThread = nRecordsPerThread * kRecordSize; | |
for (var i = 0; i < nThreads; ++i) | |
{ | |
var thrStart = start + i * blockPerThread; | |
var thrEnd = i == nThreads - 1 ? end : thrStart + blockPerThread; | |
tasks[i] = Task.Run(() => | |
{ | |
var map = new EntryMap(); | |
ReadFile(thrStart, thrEnd, map); | |
return map; | |
}); | |
} | |
Task.WaitAll(tasks); | |
accessor.SafeMemoryMappedViewHandle.ReleasePointer(); | |
} | |
// Merge maps into the first one. | |
entries = tasks[0].Result; | |
for (var i = 1; i < tasks.Length; ++i) | |
entries.MergeWith(tasks[i].Result); | |
} | |
// Write the combined results. | |
using (var stream = File.Create(outputFileName, 32 * 1024)) | |
entries.WriteTo(stream); | |
} | |
private static unsafe void ReadFile(byte* first, byte* end, EntryMap entries) | |
{ | |
uint duration = 0; | |
var recPtr = first; | |
for (; recPtr < end; recPtr += kRecordSize) | |
{ | |
var id = Record.Parse(recPtr, out duration); | |
entries.Add(id, duration); | |
} | |
} | |
private static unsafe void ReadFile(byte* first, byte* end, EntryMap2 entries) | |
{ | |
uint duration = 0; | |
var recPtr = first; | |
for (; recPtr < end; recPtr += kRecordSize) | |
{ | |
var id = Record.Parse(recPtr, out duration); | |
entries.Add(id, duration); | |
} | |
} | |
} | |
public static class BenchMain | |
{ | |
public static void Run(string inputFileName, string outputFileName, string runnerName, Action<string, string> runner) | |
{ | |
Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.RealTime; | |
AppDomain.MonitoringIsEnabled = true; | |
Console.Write($"Running {runnerName} ... "); | |
var sw = Stopwatch.StartNew(); | |
runner(inputFileName, outputFileName); | |
sw.Stop(); | |
var elapsedMs = sw.ElapsedMilliseconds; | |
var allocKb = AppDomain.CurrentDomain.MonitoringTotalAllocatedMemorySize / 1024.0; | |
var peakKb = Process.GetCurrentProcess().PeakWorkingSet64 / 1024.0; | |
Console.WriteLine($"Took: {elapsedMs:#,#} ms and allocated {allocKb:#,#} kb with peak working set of {peakKb:#,#} kb"); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment