Skip to content

Instantly share code, notes, and snippets.

Created November 17, 2016 23:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/83cfd3363a47fbf16d18dd25f94a1c99 to your computer and use it in GitHub Desktop.
Save anonymous/83cfd3363a47fbf16d18dd25f94a1c99 to your computer and use it in GitHub Desktop.
using System;
using System.Diagnostics;
using System.IO;
using System.IO.MemoryMappedFiles;
using System.Runtime.CompilerServices;
using System.Threading.Tasks;
namespace TryOuts.AyendeParsingChallenge3
{
public static class Alternative
{
const long kRecordSize = 50;
/// <summary>
/// For parsing and output of records.
/// </summary>
private static unsafe class Record
{
private static readonly uint[] CumulativeMonthDaysRegular = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
private static readonly uint[] CumulativeMonthDaysLeap = { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 };
public static uint Parse(byte* record, out uint duration)
{
// Duration parsing
var dur = 0;
var entryDt = record;
var exitDt = record + 20;
// Date difference part only if not same day.
if (*(long*)entryDt != *(long*)exitDt)
dur = ParseDateDif(entryDt, exitDt);
else if (*(ushort*)(entryDt + 8) != *(ushort*)(exitDt + 8))
dur = (24 * 60 * 60) * TwoDigitDif(exitDt + 8, entryDt + 8);
// Add time difference.
duration = (uint)(dur +
TwoDigitDif(exitDt + 11, entryDt + 11) * 60 * 60 +
TwoDigitDif(exitDt + 14, entryDt + 14) * 60 +
TwoDigitDif(exitDt + 17, entryDt + 17));
// Id.
var idByteVals = *(long*)(record + 40) - 0x3030303030303030; // parallel '0' subtraction for all bytes.
var bp = (byte*)&idByteVals;
return (uint)(bp[0] * 10000000 + bp[1] * 1000000 + bp[2] * 100000 + bp[3] * 10000 + bp[4] * 1000 + bp[5] * 100 + bp[6] * 10 + bp[7]);
}
public static int WriteTo(uint id, uint duration, byte[] buffer)
{
// Write the id first
var pos = 8;
buffer[pos] = (byte)' ';
while (id != 0)
{
buffer[--pos] = (byte)((id % 10) + '0');
id /= 10;
}
while (pos > 0)
buffer[--pos] = (byte)'0';
// decode duration into parts before writing.
var secs = duration % 60;
duration /= 60;
var mins = duration % 60;
duration /= 60;
var hrs = duration % 24;
var days = (duration / 24);
// write duration as time stamp, skip writing days if 0.
pos = 9;
if (days >= 10)
{
var ndx = buffer.Length - 1;
buffer[ndx] = (byte)'.';
while (days > 0)
{
buffer[--ndx] = (byte)((days % 10) + '0');
days /= 10;
}
var len = buffer.Length - ndx;
Array.Copy(buffer, ndx, buffer, pos, len);
pos += len;
}
else if (days > 0)
{
buffer[pos++] = (byte)('0' + days);
buffer[pos++] = (byte)'.';
}
// write hours, mins, secs
WriteTwoDigitInt(hrs, pos, buffer);
buffer[pos + 2] = (byte)':';
WriteTwoDigitInt(mins, pos + 3, buffer);
buffer[pos + 5] = (byte)':';
WriteTwoDigitInt(secs, pos + 6, buffer);
pos += 8;
// write record terminator
buffer[pos++] = (byte)'\r';
buffer[pos++] = (byte)'\n';
return pos;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WriteTwoDigitInt(uint val, int offset, byte[] buf)
{
buf[offset] = (byte)((val / 10) + '0');
buf[offset + 1] = (byte)((val % 10) + '0');
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int TwoDigitDif(byte* later, byte* earlier)
{
return (later[0] - earlier[0]) * 10 + (later[1] - earlier[1]);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsLeapYear(int year)
{
return (year % 4) == 0 && (((year % 100) != 0) || ((year % 400) == 0));
}
private static int ParseDateDif(byte* entryDt, byte* exitDt)
{
const long kSubtractionMask = 0x0030300030303030; // subtract '0' from all non-dash digits.
var entry = *(long*)entryDt - kSubtractionMask;
var exit = *(long*)exitDt - kSubtractionMask;
var daysDif = TwoDigitDif(exitDt + 8, entryDt + 8);
var entrybp = (byte*)&entry;
var exitbp = (byte*)&exit;
var entryYr = entrybp[0] * 1000 + entrybp[1] * 100 + entrybp[2] * 10 + entrybp[3];
var entryPrevMo = entrybp[5] * 10 + entrybp[6] - 1;
var exitYr = exitbp[0] * 1000 + exitbp[1] * 100 + exitbp[2] * 10 + exitbp[3];
var exitPrevMo = exitbp[5] * 10 + exitbp[6] - 1;
var entryPrevYr = entryYr - 1;
var entryYrMoDays = (long)
(entryPrevYr * 365 + entryPrevYr / 4 - entryPrevYr / 100 + entryPrevYr / 400) +
(IsLeapYear(entryYr) ? CumulativeMonthDaysLeap[entryPrevMo] : CumulativeMonthDaysRegular[entryPrevMo]);
var exitPrevYs = exitYr - 1;
var exitYrMoDays = (long)
(exitPrevYs * 365 + exitPrevYs / 4 - exitPrevYs / 100 + exitPrevYs / 400) +
(IsLeapYear(exitYr) ? CumulativeMonthDaysLeap[exitPrevMo] : CumulativeMonthDaysRegular[exitPrevMo]);
return (int)(((exitYrMoDays - entryYrMoDays) + daysDif) * (24 * 60 * 60));
}
}
/// <summary>
/// Simple 2-level trie.
/// </summary>
private class EntryMap
{
private const uint kMaxId = 99999999;
private const uint kDirSize = (kMaxId >> 15) + 1; // 1525 + 1
private const uint kVectSize = (1 << 15);
private const uint kVectMask = kVectSize - 1;
private readonly uint[][] _entriesDir = new uint[kDirSize][];
private readonly byte[] _outputBuf = new byte[32]; // we should never need more than 26.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Add(uint id, uint secondsDuration)
{
Debug.Assert(id <= kMaxId);
var idx1 = id >> 15; // we use the high 12 bits for the first trie level.
var idx2 = id & kVectMask; // remaining 15 bits for the second trie level.
var es = _entriesDir[idx1];
if (es == null)
_entriesDir[idx1] = es = new uint[kVectSize];
es[idx2] += secondsDuration;
}
public void MergeWith(EntryMap other)
{
for (var i = 0; i < _entriesDir.Length; ++i)
{
var thatEs = other._entriesDir[i];
if (thatEs != null)
{
var thisEs = _entriesDir[i];
if (thisEs == null)
_entriesDir[i] = thatEs;
else
{
for (var j = 0; j < thisEs.Length; ++j)
thisEs[j] += thatEs[j];
}
}
}
}
public void WriteTo(Stream output)
{
for (var i = 0; i < _entriesDir.Length; ++i)
{
var es = _entriesDir[i];
if (es != null)
{
for (var j = 0; j < es.Length; ++j)
{
if (es[j] > 0)
output.Write(_outputBuf, 0, Record.WriteTo((uint)(i << 15 | j), es[j], _outputBuf));
}
}
}
}
}
/// <summary>
/// Simple 3-level trie.
/// </summary>
private class EntryMap2
{
private const uint kMaxId = 99999999;
private const uint kDirSize = (kMaxId >> 16) + 1; // 1525 + 1
private const uint kVectMask = (1 << 16) - 1;
private const uint kLevel2Size = (kVectMask >> 8) + 1;
private const uint kLevel3Size = (1 << 8);
private const uint kLevel3Mask = kLevel3Size - 1;
private readonly uint[][][] _entriesDir = new uint[kDirSize][][];
private readonly byte[] _outputBuf = new byte[32]; // we should never need more than 26.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Add(uint id, uint secondsDuration)
{
Debug.Assert(id <= kMaxId);
var idx1 = id >> 16; // we use the high 11 bits for the first trie level. 224, 255
var idx_rem = id & kVectMask;
var idx2 = idx_rem >> 8;
var idx3 = idx_rem & kLevel3Mask;
var l2 = _entriesDir[idx1];
var l3 = default(uint[]);
if (l2 == null)
{
_entriesDir[idx1] = l2 = new uint[kLevel2Size][];
l2[idx2] = l3 = new uint[kLevel3Size];
}
else
{
l3 = l2[idx2] ?? (l2[idx2] = new uint[kLevel3Size]);
}
l3[idx3] += secondsDuration;
}
public void MergeWith(EntryMap2 other)
{
for (var i = 0; i < _entriesDir.Length; ++i)
{
var thatL2 = other._entriesDir[i];
if (thatL2 != null)
{
var thisL2 = _entriesDir[i];
if (thisL2 == null)
_entriesDir[i] = thatL2;
else
{
for (var j = 0; j < thisL2.Length; ++j)
{
var thatL3 = thatL2[j];
if (thatL3 != null)
{
var thisL3 = thisL2[j];
if (thisL3 == null)
thisL2[j] = thatL2[j];
else
{
for (var k = 0; k < thisL3.Length; ++k)
thisL3[k] += thatL3[k];
}
}
}
}
}
}
}
public void WriteTo(Stream output)
{
for (var i = 0; i < _entriesDir.Length; ++i)
{
var l2 = _entriesDir[i];
if (l2 != null)
{
for (var j = 0; j < l2.Length; ++j)
{
var l3 = l2[j];
if (l3 != null)
{
for (var k = 0; k < l3.Length; ++k)
{
if (l3[k] > 0)
output.Write(_outputBuf, 0, Record.WriteTo((uint)(i << 16 | j << 8 | k), l3[k], _outputBuf));
}
}
}
}
}
}
}
public static void Run(string inputFileName, string outputFileName)
{
//var entries = new EntryMap();
var entries = new EntryMap2(); // uncomment/comment switch with above.
var size = new FileInfo(inputFileName).Length;
using (var mmf = MemoryMappedFile.CreateFromFile(inputFileName, FileMode.Open))
using (var accessor = mmf.CreateViewAccessor(0, size, MemoryMappedFileAccess.Read))
{
unsafe
{
var start = default(byte*);
accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref start);
var end = start + (size / kRecordSize) * kRecordSize;
ReadFile(start, end, entries);
accessor.SafeMemoryMappedViewHandle.ReleasePointer();
}
}
using (var stream = File.Create(outputFileName, 32 * 1024))
entries.WriteTo(stream);
}
public static void RunParallel(string inputFileName, string outputFileName)
{
var nThreads = Math.Max(1, Environment.ProcessorCount - 1);
var size = new FileInfo(inputFileName).Length;
var tasks = new Task<EntryMap>[nThreads];
var entries = default(EntryMap);
using (var mmf = MemoryMappedFile.CreateFromFile(inputFileName, FileMode.Open))
using (var accessor = mmf.CreateViewAccessor(0, size, MemoryMappedFileAccess.Read))
{
unsafe
{
var start = default(byte*);
accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref start);
var nRecords = (size / kRecordSize);
var end = start + nRecords * kRecordSize;
var nRecordsPerThread = nRecords / nThreads;
while (nRecordsPerThread < 1024 && nThreads > 1)
nRecordsPerThread = nRecords / --nThreads;
var blockPerThread = nRecordsPerThread * kRecordSize;
for (var i = 0; i < nThreads; ++i)
{
var thrStart = start + i * blockPerThread;
var thrEnd = i == nThreads - 1 ? end : thrStart + blockPerThread;
tasks[i] = Task.Run(() =>
{
var map = new EntryMap();
ReadFile(thrStart, thrEnd, map);
return map;
});
}
Task.WaitAll(tasks);
accessor.SafeMemoryMappedViewHandle.ReleasePointer();
}
// Merge maps into the first one.
entries = tasks[0].Result;
for (var i = 1; i < tasks.Length; ++i)
entries.MergeWith(tasks[i].Result);
}
// Write the combined results.
using (var stream = File.Create(outputFileName, 32 * 1024))
entries.WriteTo(stream);
}
private static unsafe void ReadFile(byte* first, byte* end, EntryMap entries)
{
uint duration = 0;
var recPtr = first;
for (; recPtr < end; recPtr += kRecordSize)
{
var id = Record.Parse(recPtr, out duration);
entries.Add(id, duration);
}
}
private static unsafe void ReadFile(byte* first, byte* end, EntryMap2 entries)
{
uint duration = 0;
var recPtr = first;
for (; recPtr < end; recPtr += kRecordSize)
{
var id = Record.Parse(recPtr, out duration);
entries.Add(id, duration);
}
}
}
public static class BenchMain
{
public static void Run(string inputFileName, string outputFileName, string runnerName, Action<string, string> runner)
{
Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.RealTime;
AppDomain.MonitoringIsEnabled = true;
Console.Write($"Running {runnerName} ... ");
var sw = Stopwatch.StartNew();
runner(inputFileName, outputFileName);
sw.Stop();
var elapsedMs = sw.ElapsedMilliseconds;
var allocKb = AppDomain.CurrentDomain.MonitoringTotalAllocatedMemorySize / 1024.0;
var peakKb = Process.GetCurrentProcess().PeakWorkingSet64 / 1024.0;
Console.WriteLine($"Took: {elapsedMs:#,#} ms and allocated {allocKb:#,#} kb with peak working set of {peakKb:#,#} kb");
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment