Skip to content

Instantly share code, notes, and snippets.

Created November 14, 2016 17:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/4e040afcc3ff0f55746d4bd970bc0c55 to your computer and use it in GitHub Desktop.
Save anonymous/4e040afcc3ff0f55746d4bd970bc0c55 to your computer and use it in GitHub Desktop.
using System;
using System.Diagnostics;
using System.IO;
using System.IO.MemoryMappedFiles;
using System.Runtime.CompilerServices;
namespace TryOuts.AyendeParsingChallenge
{
public static class Alternative
{
const uint kMaxId = 99999999;
const uint kDigitZero = '0';
const long kRecordSize = 50;
/// <summary>
/// Time stamp / duration parsing and output methods.
/// </summary>
private static class TimeStamp
{
private const byte kDash = (byte)'-';
private const byte kColon = (byte)':';
private static readonly uint[] CumulativeMonthDaysRegular = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
private static readonly uint[] CumulativeMonthDaysLeap = { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 };
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe bool TryParseYear(byte* bp, out int val)
{
val =
(int)(bp[0] - kDigitZero) * 1000 +
(int)(bp[1] - kDigitZero) * 100 +
(int)(bp[2] - kDigitZero) * 10 +
(int)(bp[3] - kDigitZero);
return val >= 1970 && val < 2100;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe bool TryParseTwoDigit(byte* bp, int maxVal, out int val)
{
val = (int)(bp[0] - kDigitZero) * 10 + (int)(bp[1] - kDigitZero);
return val <= maxVal;
}
private static unsafe bool TryParseTimeDif(byte* entryTm, byte* exitTm, out int seconds)
{
seconds = 0;
int entryHr = 0, exitHr = 0, entryMin = 0, exitMin = 0, entrySec = 0, exitSec = 0;
if (TryParseTwoDigit(entryTm, 23, out entryHr) &&
*(entryTm + 2) == kColon &&
TryParseTwoDigit(entryTm + 3, 59, out entryMin) &&
*(entryTm + 5) == kColon &&
TryParseTwoDigit(entryTm + 6, 59, out entrySec) &&
TryParseTwoDigit(exitTm, 23, out exitHr) &&
*(exitTm + 2) == kColon &&
TryParseTwoDigit(exitTm + 3, 59, out exitMin) &&
*(exitTm + 5) == kColon &&
TryParseTwoDigit(exitTm + 6, 59, out exitSec))
{
seconds = (((exitHr - entryHr) * 60 * 60) + ((exitMin - entryMin) * 60) + (exitSec - entrySec));
return true;
}
return false;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe bool TryParseDateDif(byte* entryDt, byte* exitDt, out uint secsDif)
{
secsDif = 0;
return (*(ulong*)entryDt == *(ulong*)exitDt) && (*(ushort*)(entryDt + 8) == *(ushort*)(exitDt + 8)) ||
TryParseDateDifLessLikely(entryDt, exitDt, ref secsDif);
}
private static unsafe bool TryParseDateDifLessLikely(byte* entryDt, byte* exitDt, ref uint secsDif)
{
secsDif = 0;
var entrySecs = default(ulong);
var exitSecs = default(ulong);
if (!TryParseDate(entryDt, out entrySecs) || !TryParseDate(exitDt, out exitSecs))
return false;
secsDif = (uint)(exitSecs - entrySecs);
return (int)(secDif) >= 0;
}
private static unsafe bool TryParseDate(byte* dt, out ulong seconds)
{
seconds = 0;
var year = default(int);
var month = default(int);
var day = default(int);
if (TryParseYear(dt, out year) &&
dt[4] == kDash &&
TryParseTwoDigit(dt + 5, 12, out month) &&
dt[7] == kDash &&
TryParseTwoDigit(dt + 8, 31, out day))
{
var isLeapYear = (year % 4) == 0 && (((year % 100) != 0) || ((year % 400) == 0));
var py = year - 1;
var nd =
(long)(py * 365 + py / 4 - py / 100 + py / 400) +
(isLeapYear ? CumulativeMonthDaysLeap[month - 1] : CumulativeMonthDaysRegular[month - 1]) +
day - 1;
seconds = (ulong)nd * 24 * 60 * 60;
return true;
}
return false;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe bool TryParseDuration(byte* entryTs, byte* exitTs, out uint duration)
{
uint dtDif;
int tmDif;
duration = 0;
if (TryParseDateDif(entryTs, exitTs, out dtDif) && TryParseTimeDif(entryTs + 11, exitTs + 11, out tmDif))
{
Debug.Assert((long)dtDif + tmDif > 0);
duration = (uint)(dtDif + tmDif);
return (int)duration >= 0;
}
return false;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WriteTwoDigitInt(uint val, int offset, byte[] buf)
{
buf[offset] = (byte)((val / 10) + '0');
buf[offset + 1] = (byte)((val % 10) + '0');
}
public static unsafe int WriteAsTimeSpan(uint duration, byte[] buffer, int offset)
{
var secs = duration % 60;
duration /= 60;
var mins = duration % 60;
duration /= 60;
var hrs = duration % 24;
var days = (duration / 24);
if (days >= 10)
{
var ndx = buffer.Length - 1;
buffer[ndx] = (byte)'.';
while (days > 0)
{
buffer[--ndx] = (byte)((days % 10) + '0');
days /= 10;
}
var len = buffer.Length - ndx;
Array.Copy(buffer, ndx, buffer, offset, len);
offset += len;
}
else if (days > 0)
{
buffer[offset++] = (byte)('0' + days);
buffer[offset++] = (byte)'.';
}
WriteTwoDigitInt(hrs, offset, buffer);
buffer[offset + 2] = kColon;
WriteTwoDigitInt(mins, offset + 3, buffer);
buffer[offset + 5] = kColon;
WriteTwoDigitInt(secs, offset + 6, buffer);
return offset + 8;
}
}
/// <summary>
/// A very simple one-level trie, an array of arrays.
/// Used because it verified the assumption that a (cache concious) trie
/// will be better in terms of both memory consumption and speed (due to
/// better cache locality effects) this is better than a dictionary or
/// a <c>uint[100000000]</c> array.
/// </summary>
private class EntryMap
{
private const int kDirBits = (27 - 15);
private const uint kDirSize = (1 << kDirBits);
private const uint kVectSize = (1 << 16);
private const uint kVectMask = kVectSize - 1;
private readonly uint[][] _entriesDir = new uint[kDirSize][];
private readonly byte[] _outputBuf = new byte[32]; // we should never need more than 26.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Add(uint id, uint secondsDuration)
{
Debug.Assert(id <= kMaxId);
var idx2 = id & kVectMask;
var idx1 = id >> 15; // we use the high 12 bits for the first trie level.
var es = _entriesDir[idx1];
if (es == null)
_entriesDir[idx1] = es = new uint[kVectSize];
es[idx2] += secondsDuration;
}
public void WriteTo(Stream output)
{
_outputBuf[8] = (byte)' ';
var off = 0;
for (var i = 0; i < _entriesDir.Length; ++i)
{
var es = _entriesDir[i];
if (es != null)
{
for (var j = 0; j < es.Length; ++j)
{
if (es[j] > 0)
{
WriteId((uint)(i << 15 | j), _outputBuf);
off = TimeStamp.WriteAsTimeSpan(es[j], _outputBuf, 9);
_outputBuf[off++] = (byte)'\r';
_outputBuf[off++] = (byte)'\n';
output.Write(_outputBuf, 0, off);
}
}
}
}
}
private static void WriteId(uint id, byte[] output)
{
var n = 7;
while (id != 0)
{
output[n--] = (byte)((id % 10) + kDigitZero);
id /= 10;
}
for (; n >= 0; --n)
output[n] = (byte)kDigitZero;
}
}
public static void Run(string inputFileName, string outputFileName)
{
var entries = new EntryMap();
var size = new FileInfo(inputFileName).Length;
using (var mmf = MemoryMappedFile.CreateFromFile(inputFileName, FileMode.Open))
using (var accessor = mmf.CreateViewAccessor(0, size, MemoryMappedFileAccess.Read))
ReadFile(accessor, size, entries);
using (var stream = File.Create(outputFileName, 32 * 1024))
entries.WriteTo(stream);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe bool TryParseId(byte* idp, out uint id)
{
id =
(uint)(idp[0] - kDigitZero) * 10000000 +
(uint)(idp[1] - kDigitZero) * 1000000 +
(uint)(idp[2] - kDigitZero) * 100000 +
(uint)(idp[3] - kDigitZero) * 10000 +
(uint)(idp[4] - kDigitZero) * 1000 +
(uint)(idp[5] - kDigitZero) * 100 +
(uint)(idp[6] - kDigitZero) * 10 +
(uint)(idp[7] - kDigitZero);
return id <= kMaxId;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe bool TryParseRecord(byte* record, ref uint id, ref uint duration)
{
return TimeStamp.TryParseDuration(record, record + 20, out duration) && TryParseId(record + 40, out id);
}
private static unsafe bool ReadFile(MemoryMappedViewAccessor accessor, long inputSize, EntryMap entries)
{
long recNo = 0, nRecords = inputSize / kRecordSize;
uint id = 0, duration = 0;
var recPtr = default(byte*);
accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref recPtr);
for (recNo = 0; recNo < nRecords; ++recNo, recPtr += kRecordSize)
{
if (!TryParseRecord(recPtr, ref id, ref duration))
break;
entries.Add(id, duration);
}
accessor.SafeMemoryMappedViewHandle.ReleasePointer();
return recNo == nRecords;
}
}
public static class BenchMain
{
public static void Run(string inputFileName, string outputFileName, string runnerName, Action<string, string> runner)
{
AppDomain.MonitoringIsEnabled = true;
Console.Write($"Running {runnerName} ... ");
var sw = Stopwatch.StartNew();
runner(inputFileName, outputFileName);
sw.Stop();
var elapsedMs = sw.ElapsedMilliseconds;
var allocKb = AppDomain.CurrentDomain.MonitoringTotalAllocatedMemorySize / 1024.0;
var peakKb = Process.GetCurrentProcess().PeakWorkingSet64 / 1024.0;
Console.WriteLine($"Took: {elapsedMs:#,#} ms and allocated {allocKb:#,#} kb with peak working set of {peakKb:#,#} kb");
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment