Skip to content

Instantly share code, notes, and snippets.

@sonnemaf
Created July 17, 2020 09:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sonnemaf/056db10779e79faf8ab4c84bde851fa3 to your computer and use it in GitHub Desktop.
Save sonnemaf/056db10779e79faf8ab4c84bde851fa3 to your computer and use it in GitHub Desktop.
Test for WindowsCommunityToolkit/Microsoft.Toolkit.HighPerformance/Buffers/StringPool.cs
using Microsoft.Toolkit.HighPerformance.Buffers;
using Microsoft.Toolkit.HighPerformance.Enumerables;
using System;
using System.Buffers.Text;
using System.Diagnostics;
using System.IO;
using System.Text.Unicode;
namespace ImportTest {
class Program {
static void Main(string[] args) {
OptimizedUtf8();
}
private static void OptimizedUtf8() {
// Source: https://github.com/dotnet/machinelearning/blob/master/test/data/taxi-fare-train.csv
// Saved with UTF8 encoding
using var mo = GetBytesFromFile("taxi-fare-train-utf8.csv");
var stringPool = new StringPool();
var header = true;
foreach (var line in new ReadOnlySpanTokenizer<byte>(mo.Span, (byte)'\n')) {
if (header) {
header = false;
} else {
var data = new Data();
var index = 0;
foreach (var item in new ReadOnlySpanTokenizer<byte>(line, (byte)',')) {
switch (index++) {
case 0:
data.VendorId = MakeString(item, stringPool);
break;
case 1:
if (Utf8Parser.TryParse(item, out byte rateCode, out _)) {
data.RateCode = rateCode;
}
break;
case 2:
if (Utf8Parser.TryParse(item, out byte passengerCount, out _)) {
data.PassengerCount = passengerCount;
}
break;
case 3:
if (Utf8Parser.TryParse(item, out short tripTimeInSecs, out _)) {
data.TripTimeInSecs = tripTimeInSecs;
}
break;
case 4:
if (Utf8Parser.TryParse(item, out float tripDistance, out _)) {
data.TripDistance = tripDistance;
}
break;
case 5:
data.PaymentType = MakeString(item, stringPool);
break;
case 6:
if (Utf8Parser.TryParse(item, out float fareAmount, out _)) {
data.FareAmount = fareAmount;
}
break;
default:
break;
}
}
//Console.WriteLine(data);
}
}
stringPool.Reset();
static string MakeString(ReadOnlySpan<byte> bytes, StringPool pool) {
Span<char> chars = stackalloc char[bytes.Length];
Utf8.ToUtf16(bytes, chars, out _, out _);
return pool.GetOrAdd(chars);
//return Encoding.UTF8.GetString(bytes);
}
}
public static MemoryOwner<byte> GetBytesFromFile(string path) {
using Stream stream = File.OpenRead(path);
MemoryOwner<byte> buffer = MemoryOwner<byte>.Allocate((int)stream.Length);
stream.Read(buffer.Span);
return buffer;
}
}
[System.Runtime.InteropServices.StructLayout(System.Runtime.InteropServices.LayoutKind.Auto)]
struct Data {
public string VendorId;
public byte RateCode;
public byte PassengerCount;
public short TripTimeInSecs;
public float TripDistance;
public string PaymentType;
public float FareAmount;
public override string ToString() {
return $"{VendorId},{RateCode},{PassengerCount},{TripTimeInSecs},{TripDistance},{PaymentType},{FareAmount}";
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment