Created
July 17, 2020 09:48
-
-
Save sonnemaf/056db10779e79faf8ab4c84bde851fa3 to your computer and use it in GitHub Desktop.
Test for WindowsCommunityToolkit/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Microsoft.Toolkit.HighPerformance.Buffers; | |
using Microsoft.Toolkit.HighPerformance.Enumerables; | |
using System; | |
using System.Buffers.Text; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Text.Unicode; | |
namespace ImportTest { | |
class Program { | |
static void Main(string[] args) { | |
OptimizedUtf8(); | |
} | |
private static void OptimizedUtf8() { | |
// Source: https://github.com/dotnet/machinelearning/blob/master/test/data/taxi-fare-train.csv | |
// Saved with UTF8 encoding | |
using var mo = GetBytesFromFile("taxi-fare-train-utf8.csv"); | |
var stringPool = new StringPool(); | |
var header = true; | |
foreach (var line in new ReadOnlySpanTokenizer<byte>(mo.Span, (byte)'\n')) { | |
if (header) { | |
header = false; | |
} else { | |
var data = new Data(); | |
var index = 0; | |
foreach (var item in new ReadOnlySpanTokenizer<byte>(line, (byte)',')) { | |
switch (index++) { | |
case 0: | |
data.VendorId = MakeString(item, stringPool); | |
break; | |
case 1: | |
if (Utf8Parser.TryParse(item, out byte rateCode, out _)) { | |
data.RateCode = rateCode; | |
} | |
break; | |
case 2: | |
if (Utf8Parser.TryParse(item, out byte passengerCount, out _)) { | |
data.PassengerCount = passengerCount; | |
} | |
break; | |
case 3: | |
if (Utf8Parser.TryParse(item, out short tripTimeInSecs, out _)) { | |
data.TripTimeInSecs = tripTimeInSecs; | |
} | |
break; | |
case 4: | |
if (Utf8Parser.TryParse(item, out float tripDistance, out _)) { | |
data.TripDistance = tripDistance; | |
} | |
break; | |
case 5: | |
data.PaymentType = MakeString(item, stringPool); | |
break; | |
case 6: | |
if (Utf8Parser.TryParse(item, out float fareAmount, out _)) { | |
data.FareAmount = fareAmount; | |
} | |
break; | |
default: | |
break; | |
} | |
} | |
//Console.WriteLine(data); | |
} | |
} | |
stringPool.Reset(); | |
static string MakeString(ReadOnlySpan<byte> bytes, StringPool pool) { | |
Span<char> chars = stackalloc char[bytes.Length]; | |
Utf8.ToUtf16(bytes, chars, out _, out _); | |
return pool.GetOrAdd(chars); | |
//return Encoding.UTF8.GetString(bytes); | |
} | |
} | |
public static MemoryOwner<byte> GetBytesFromFile(string path) { | |
using Stream stream = File.OpenRead(path); | |
MemoryOwner<byte> buffer = MemoryOwner<byte>.Allocate((int)stream.Length); | |
stream.Read(buffer.Span); | |
return buffer; | |
} | |
} | |
[System.Runtime.InteropServices.StructLayout(System.Runtime.InteropServices.LayoutKind.Auto)] | |
struct Data { | |
public string VendorId; | |
public byte RateCode; | |
public byte PassengerCount; | |
public short TripTimeInSecs; | |
public float TripDistance; | |
public string PaymentType; | |
public float FareAmount; | |
public override string ToString() { | |
return $"{VendorId},{RateCode},{PassengerCount},{TripTimeInSecs},{TripDistance},{PaymentType},{FareAmount}"; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment