Last active
June 23, 2018 22:22
-
-
Save ddpruitt/599ea3acef2abec60bf05e3c567bc0db to your computer and use it in GitHub Desktop.
CSV File Scanner POC. The goal is to minimize time and memory by using an ArrayPool<byte> and File.ReadLines.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Buffers; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Text; | |
namespace ConsoleApp1 | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
const string csvFilename = @".\Data\noniouzipcodes2015.csv"; | |
var scanner = new CsvFileScanner(csvFilename, 9); | |
scanner.Execute(); | |
} | |
} | |
public class CsvFileScanner | |
{ | |
private readonly FileInfo _csvFileInfo; | |
private readonly int _numberOfCommas; | |
private readonly ArrayPool<byte> _arrayPool; | |
public CsvFileScanner(string csvFilename, int numberOfCommas) | |
{ | |
_numberOfCommas = numberOfCommas; | |
_arrayPool = ArrayPool<byte>.Shared; | |
_csvFileInfo = new FileInfo(csvFilename); | |
} | |
private byte[] FindCommasInLine(string line, byte[] nums) | |
{ | |
byte counter = 0; | |
for (byte index = 0; index < line.Length; index++) | |
{ | |
if (line[index] == ',') | |
{ | |
nums[counter++] = index; | |
} | |
} | |
return nums; | |
} | |
public void Execute() | |
{ | |
var hashSet = new HashSet<string>(); | |
try | |
{ | |
if (!_csvFileInfo.Exists) return; | |
foreach (var line in File.ReadLines(_csvFileInfo.FullName, Encoding.UTF8)) | |
{ | |
var tempBuffer = _arrayPool.Rent(_numberOfCommas); | |
try | |
{ | |
var commaIndexes = FindCommasInLine(line, tempBuffer); | |
hashSet.Add(line.Substring(commaIndexes[1] + 1, commaIndexes[2])); | |
} | |
finally | |
{ | |
_arrayPool.Return(tempBuffer, true); | |
} | |
} | |
foreach (var name in hashSet) | |
{ | |
Console.WriteLine(name); | |
} | |
} | |
catch (Exception e) | |
{ | |
Console.WriteLine("The process failed: {0}", e.ToString()); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The idea for this came from Strings Are Evil, where Indy Singh introduced me to the
ArrayPool<T>