Created
November 14, 2016 17:09
-
-
Save anonymous/4e040afcc3ff0f55746d4bd970bc0c55 to your computer and use it in GitHub Desktop.
https://ayende.com/blog/176034/making-code-faster-the-interview-question - 260 ms vs 40,252 ms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Diagnostics; | |
using System.IO; | |
using System.IO.MemoryMappedFiles; | |
using System.Runtime.CompilerServices; | |
namespace TryOuts.AyendeParsingChallenge | |
{ | |
public static class Alternative | |
{ | |
const uint kMaxId = 99999999; | |
const uint kDigitZero = '0'; | |
const long kRecordSize = 50; | |
/// <summary> | |
/// Time stamp / duration parsing and output methods. | |
/// </summary> | |
private static class TimeStamp | |
{ | |
private const byte kDash = (byte)'-'; | |
private const byte kColon = (byte)':'; | |
private static readonly uint[] CumulativeMonthDaysRegular = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; | |
private static readonly uint[] CumulativeMonthDaysLeap = { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 }; | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe bool TryParseYear(byte* bp, out int val) | |
{ | |
val = | |
(int)(bp[0] - kDigitZero) * 1000 + | |
(int)(bp[1] - kDigitZero) * 100 + | |
(int)(bp[2] - kDigitZero) * 10 + | |
(int)(bp[3] - kDigitZero); | |
return val >= 1970 && val < 2100; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe bool TryParseTwoDigit(byte* bp, int maxVal, out int val) | |
{ | |
val = (int)(bp[0] - kDigitZero) * 10 + (int)(bp[1] - kDigitZero); | |
return val <= maxVal; | |
} | |
private static unsafe bool TryParseTimeDif(byte* entryTm, byte* exitTm, out int seconds) | |
{ | |
seconds = 0; | |
int entryHr = 0, exitHr = 0, entryMin = 0, exitMin = 0, entrySec = 0, exitSec = 0; | |
if (TryParseTwoDigit(entryTm, 23, out entryHr) && | |
*(entryTm + 2) == kColon && | |
TryParseTwoDigit(entryTm + 3, 59, out entryMin) && | |
*(entryTm + 5) == kColon && | |
TryParseTwoDigit(entryTm + 6, 59, out entrySec) && | |
TryParseTwoDigit(exitTm, 23, out exitHr) && | |
*(exitTm + 2) == kColon && | |
TryParseTwoDigit(exitTm + 3, 59, out exitMin) && | |
*(exitTm + 5) == kColon && | |
TryParseTwoDigit(exitTm + 6, 59, out exitSec)) | |
{ | |
seconds = (((exitHr - entryHr) * 60 * 60) + ((exitMin - entryMin) * 60) + (exitSec - entrySec)); | |
return true; | |
} | |
return false; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe bool TryParseDateDif(byte* entryDt, byte* exitDt, out uint secsDif) | |
{ | |
secsDif = 0; | |
return (*(ulong*)entryDt == *(ulong*)exitDt) && (*(ushort*)(entryDt + 8) == *(ushort*)(exitDt + 8)) || | |
TryParseDateDifLessLikely(entryDt, exitDt, ref secsDif); | |
} | |
private static unsafe bool TryParseDateDifLessLikely(byte* entryDt, byte* exitDt, ref uint secsDif) | |
{ | |
secsDif = 0; | |
var entrySecs = default(ulong); | |
var exitSecs = default(ulong); | |
if (!TryParseDate(entryDt, out entrySecs) || !TryParseDate(exitDt, out exitSecs)) | |
return false; | |
secsDif = (uint)(exitSecs - entrySecs); | |
return (int)(secDif) >= 0; | |
} | |
private static unsafe bool TryParseDate(byte* dt, out ulong seconds) | |
{ | |
seconds = 0; | |
var year = default(int); | |
var month = default(int); | |
var day = default(int); | |
if (TryParseYear(dt, out year) && | |
dt[4] == kDash && | |
TryParseTwoDigit(dt + 5, 12, out month) && | |
dt[7] == kDash && | |
TryParseTwoDigit(dt + 8, 31, out day)) | |
{ | |
var isLeapYear = (year % 4) == 0 && (((year % 100) != 0) || ((year % 400) == 0)); | |
var py = year - 1; | |
var nd = | |
(long)(py * 365 + py / 4 - py / 100 + py / 400) + | |
(isLeapYear ? CumulativeMonthDaysLeap[month - 1] : CumulativeMonthDaysRegular[month - 1]) + | |
day - 1; | |
seconds = (ulong)nd * 24 * 60 * 60; | |
return true; | |
} | |
return false; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static unsafe bool TryParseDuration(byte* entryTs, byte* exitTs, out uint duration) | |
{ | |
uint dtDif; | |
int tmDif; | |
duration = 0; | |
if (TryParseDateDif(entryTs, exitTs, out dtDif) && TryParseTimeDif(entryTs + 11, exitTs + 11, out tmDif)) | |
{ | |
Debug.Assert((long)dtDif + tmDif > 0); | |
duration = (uint)(dtDif + tmDif); | |
return (int)duration >= 0; | |
} | |
return false; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static void WriteTwoDigitInt(uint val, int offset, byte[] buf) | |
{ | |
buf[offset] = (byte)((val / 10) + '0'); | |
buf[offset + 1] = (byte)((val % 10) + '0'); | |
} | |
public static unsafe int WriteAsTimeSpan(uint duration, byte[] buffer, int offset) | |
{ | |
var secs = duration % 60; | |
duration /= 60; | |
var mins = duration % 60; | |
duration /= 60; | |
var hrs = duration % 24; | |
var days = (duration / 24); | |
if (days >= 10) | |
{ | |
var ndx = buffer.Length - 1; | |
buffer[ndx] = (byte)'.'; | |
while (days > 0) | |
{ | |
buffer[--ndx] = (byte)((days % 10) + '0'); | |
days /= 10; | |
} | |
var len = buffer.Length - ndx; | |
Array.Copy(buffer, ndx, buffer, offset, len); | |
offset += len; | |
} | |
else if (days > 0) | |
{ | |
buffer[offset++] = (byte)('0' + days); | |
buffer[offset++] = (byte)'.'; | |
} | |
WriteTwoDigitInt(hrs, offset, buffer); | |
buffer[offset + 2] = kColon; | |
WriteTwoDigitInt(mins, offset + 3, buffer); | |
buffer[offset + 5] = kColon; | |
WriteTwoDigitInt(secs, offset + 6, buffer); | |
return offset + 8; | |
} | |
} | |
/// <summary> | |
/// A very simple one-level trie, an array of arrays. | |
/// Used because it verified the assumption that a (cache concious) trie | |
/// will be better in terms of both memory consumption and speed (due to | |
/// better cache locality effects) this is better than a dictionary or | |
/// a <c>uint[100000000]</c> array. | |
/// </summary> | |
private class EntryMap | |
{ | |
private const int kDirBits = (27 - 15); | |
private const uint kDirSize = (1 << kDirBits); | |
private const uint kVectSize = (1 << 16); | |
private const uint kVectMask = kVectSize - 1; | |
private readonly uint[][] _entriesDir = new uint[kDirSize][]; | |
private readonly byte[] _outputBuf = new byte[32]; // we should never need more than 26. | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public void Add(uint id, uint secondsDuration) | |
{ | |
Debug.Assert(id <= kMaxId); | |
var idx2 = id & kVectMask; | |
var idx1 = id >> 15; // we use the high 12 bits for the first trie level. | |
var es = _entriesDir[idx1]; | |
if (es == null) | |
_entriesDir[idx1] = es = new uint[kVectSize]; | |
es[idx2] += secondsDuration; | |
} | |
public void WriteTo(Stream output) | |
{ | |
_outputBuf[8] = (byte)' '; | |
var off = 0; | |
for (var i = 0; i < _entriesDir.Length; ++i) | |
{ | |
var es = _entriesDir[i]; | |
if (es != null) | |
{ | |
for (var j = 0; j < es.Length; ++j) | |
{ | |
if (es[j] > 0) | |
{ | |
WriteId((uint)(i << 15 | j), _outputBuf); | |
off = TimeStamp.WriteAsTimeSpan(es[j], _outputBuf, 9); | |
_outputBuf[off++] = (byte)'\r'; | |
_outputBuf[off++] = (byte)'\n'; | |
output.Write(_outputBuf, 0, off); | |
} | |
} | |
} | |
} | |
} | |
private static void WriteId(uint id, byte[] output) | |
{ | |
var n = 7; | |
while (id != 0) | |
{ | |
output[n--] = (byte)((id % 10) + kDigitZero); | |
id /= 10; | |
} | |
for (; n >= 0; --n) | |
output[n] = (byte)kDigitZero; | |
} | |
} | |
public static void Run(string inputFileName, string outputFileName) | |
{ | |
var entries = new EntryMap(); | |
var size = new FileInfo(inputFileName).Length; | |
using (var mmf = MemoryMappedFile.CreateFromFile(inputFileName, FileMode.Open)) | |
using (var accessor = mmf.CreateViewAccessor(0, size, MemoryMappedFileAccess.Read)) | |
ReadFile(accessor, size, entries); | |
using (var stream = File.Create(outputFileName, 32 * 1024)) | |
entries.WriteTo(stream); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe bool TryParseId(byte* idp, out uint id) | |
{ | |
id = | |
(uint)(idp[0] - kDigitZero) * 10000000 + | |
(uint)(idp[1] - kDigitZero) * 1000000 + | |
(uint)(idp[2] - kDigitZero) * 100000 + | |
(uint)(idp[3] - kDigitZero) * 10000 + | |
(uint)(idp[4] - kDigitZero) * 1000 + | |
(uint)(idp[5] - kDigitZero) * 100 + | |
(uint)(idp[6] - kDigitZero) * 10 + | |
(uint)(idp[7] - kDigitZero); | |
return id <= kMaxId; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe bool TryParseRecord(byte* record, ref uint id, ref uint duration) | |
{ | |
return TimeStamp.TryParseDuration(record, record + 20, out duration) && TryParseId(record + 40, out id); | |
} | |
private static unsafe bool ReadFile(MemoryMappedViewAccessor accessor, long inputSize, EntryMap entries) | |
{ | |
long recNo = 0, nRecords = inputSize / kRecordSize; | |
uint id = 0, duration = 0; | |
var recPtr = default(byte*); | |
accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref recPtr); | |
for (recNo = 0; recNo < nRecords; ++recNo, recPtr += kRecordSize) | |
{ | |
if (!TryParseRecord(recPtr, ref id, ref duration)) | |
break; | |
entries.Add(id, duration); | |
} | |
accessor.SafeMemoryMappedViewHandle.ReleasePointer(); | |
return recNo == nRecords; | |
} | |
} | |
public static class BenchMain | |
{ | |
public static void Run(string inputFileName, string outputFileName, string runnerName, Action<string, string> runner) | |
{ | |
AppDomain.MonitoringIsEnabled = true; | |
Console.Write($"Running {runnerName} ... "); | |
var sw = Stopwatch.StartNew(); | |
runner(inputFileName, outputFileName); | |
sw.Stop(); | |
var elapsedMs = sw.ElapsedMilliseconds; | |
var allocKb = AppDomain.CurrentDomain.MonitoringTotalAllocatedMemorySize / 1024.0; | |
var peakKb = Process.GetCurrentProcess().PeakWorkingSet64 / 1024.0; | |
Console.WriteLine($"Took: {elapsedMs:#,#} ms and allocated {allocKb:#,#} kb with peak working set of {peakKb:#,#} kb"); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment