Skip to content

Instantly share code, notes, and snippets.

@thelastpointer
Last active August 31, 2023 16:55
Show Gist options
  • Save thelastpointer/d9a379679f1a872a1e07517d09d8a493 to your computer and use it in GitHub Desktop.
Save thelastpointer/d9a379679f1a872a1e07517d09d8a493 to your computer and use it in GitHub Desktop.
Simple CSV reader class that is able to convert lines into objects.
using System.Collections.Generic;
using System.IO;
using System.Reflection;
namespace TLP.Utils
{
/// <summary>
/// Reads CSV files.
/// </summary>
public static class CSVReader
{
#region Read into string arrays
/// <summary>
/// Reads CSV data into string arrays. Every line will be an array.
/// </summary>
/// <param name="data">CSV data as a string.</param>
/// <param name="skipHeader">Skips the first line if true (for headers, for example).</param>
/// <returns>An array of string arrays. Every line will be a new array of fields as strings.</returns>
public static string[][] Read(string data, bool skipHeader = true)
{
using (StringReader sr = new StringReader(data))
{
return Read(sr, skipHeader);
}
}
/// <summary>
/// Reads CSV data into string arrays. Every line will be an array.
/// </summary>
/// <param name="str">A stream holding the CSV data.</param>
/// <param name="skipHeader">Skips the first line if true (for headers, for example).</param>
/// <returns>An array of string arrays. Every line will be a new array of fields as strings.</returns>
public static string[][] Read(TextReader str, bool skipHeader = true)
{
List<string[]> result = new List<string[]>();
string line = str.ReadLine();
if (skipHeader)
line = str.ReadLine();
while (line != null)
{
result.Add(ParseCSVLine(line));
line = str.ReadLine();
}
return result.ToArray();
}
#endregion
#region Read into classes & structs
/// <summary>
/// Reads CSV data and tries to convert every line into an object of type T.
/// Returns the list of objects.
/// </summary>
/// <typeparam name="T">Type into which the data will be converted.</typeparam>
/// <param name="data">CSV data as a string.</param>
/// <param name="readFirstLineAsHeader">
/// If true, the first line will specify the names of the fields. If false, you need to set headerOverride and the first line will be treated just like the others.
/// </param>
/// <param name="headerOverride">
/// Positional names of the object members. This will be used as the default setting even if readFirstLineAsHeader is set, but it can be set to null.
/// </param>
/// <param name="fillPrivateMembers">If true, private and protected members will be assigned too.</param>
/// <param name="ignoreCase">If true, member names will be case-insensitive.</param>
/// <param name="conversionErrors">Specifies what happens when a conversion error occurs.</param>
/// <param name="conversionFunction">If set, this function will be called for every value that's about to be filled.</param>
/// <returns>An array of objects, each filled with data from a line of the CSV.</returns>
public static T[] Read<T>(string data,
bool readFirstLineAsHeader = true,
string[] headerOverride = null,
bool fillPrivateMembers = false,
bool ignoreCase = true,
ConversionErrorHandling conversionErrors = ConversionErrorHandling.SkipItem,
ConversionFunction conversionFunction = null
)
where T: new()
{
using (StringReader sr = new StringReader(data))
{
return Read<T>(sr, readFirstLineAsHeader, headerOverride, fillPrivateMembers, ignoreCase, conversionErrors, conversionFunction);
}
}
/// <summary>
/// Reads CSV data and tries to convert every line into an object of type T.
/// Returns the list of objects.
/// </summary>
/// <typeparam name="T">Type into which the data will be converted.</typeparam>
/// <param name="str">A stream of CSV data.</param>
/// <param name="readFirstLineAsHeader">
/// If true, the first line will specify the names of the fields. If false, you need to set headerOverride and the first line will be treated just like the others.
/// </param>
/// <param name="headerOverride">
/// Positional names of the object members. This will be used as the default setting even if readFirstLineAsHeader is set, but it can be set to null.
/// </param>
/// <param name="fillPrivateMembers">If true, private and protected members will be assigned too.</param>
/// <param name="ignoreCase">If true, member names will be case-insensitive.</param>
/// <param name="conversionErrors">Specifies what happens when a conversion error occurs.</param>
/// <param name="conversionFunction">If set, this function will be called for every value that's about to be filled.</param>
/// <returns>An array of objects, each filled with data from a line of the CSV.</returns>
public static T[] Read<T>(TextReader str,
bool readFirstLineAsHeader = true,
string[] headerOverride = null,
bool fillPrivateMembers = false,
bool ignoreCase = true,
ConversionErrorHandling conversionErrors = ConversionErrorHandling.SkipItem,
ConversionFunction conversionFunction = null
)
where T : new()
{
if (!readFirstLineAsHeader && (headerOverride == null))
throw new System.InvalidOperationException("Either readFirstLineAsHeader or headerOverride must be set!");
List<T> result = new List<T>();
// Read first line; use it as a header if requested
string line = str.ReadLine();
if (readFirstLineAsHeader)
{
if (headerOverride == null)
headerOverride = ParseCSVLine(line);
line = str.ReadLine();
}
// Get reflection info for this type now that we have the member names. We only care about fields and properties.
FieldInfo[] fields = new FieldInfo[headerOverride.Length];
PropertyInfo[] properties = new PropertyInfo[headerOverride.Length];
// Some options here
BindingFlags bindingFlags = BindingFlags.Instance | BindingFlags.Public;
if (fillPrivateMembers)
bindingFlags |= BindingFlags.NonPublic;
if (ignoreCase)
bindingFlags |= BindingFlags.IgnoreCase;
for (int i = 0; i < headerOverride.Length; i++)
{
if (!string.IsNullOrEmpty(headerOverride[i]))
{
var field = typeof(T).GetField(headerOverride[i], bindingFlags);
if (field != null)
fields[i] = field;
var property = typeof(T).GetProperty(headerOverride[i], bindingFlags);
if (property != null)
properties[i] = property;
}
}
// Start reading line by line; obj will be inserted into the array unless errorOccured gets set.
while (line != null)
{
var elems = ParseCSVLine(line);
//T obj = default(T);
T obj = new T();
bool errorOccured = false;
for (int i = 0; i < elems.Length; i++)
{
if ((i < fields.Length) && (fields[i] != null))
{
try
{
// Note: if the object is a struct, then fields.SetValue would get a copy of it and set the field for the copy.
// To avoid this, I box the struct as an object.
object boxed = obj;
object convertedValue = null;
bool customConverted = false;
if (conversionFunction != null)
customConverted = conversionFunction(headerOverride[i], elems[i], ref convertedValue);
if (!customConverted)
convertedValue = System.Convert.ChangeType(elems[i], fields[i].FieldType);
fields[i].SetValue(obj, convertedValue);
obj = (T)boxed;
}
catch
{
if (conversionErrors == ConversionErrorHandling.ThrowException)
throw;
else if (conversionErrors == ConversionErrorHandling.SkipItem)
{
errorOccured = true;
break;
}
}
}
if ((i < properties.Length) && (properties[i] != null))
{
try
{
// Note: same boxing method as above
object boxed = obj;
object convertedValue = null;
bool customConverted = false;
if (conversionFunction != null)
customConverted = conversionFunction(headerOverride[i], elems[i], ref convertedValue);
if (!customConverted)
convertedValue = System.Convert.ChangeType(elems[i], properties[i].PropertyType);
properties[i].SetValue(obj, convertedValue);
obj = (T)boxed;
}
catch
{
if (conversionErrors == ConversionErrorHandling.ThrowException)
{
throw;
}
else if (conversionErrors == ConversionErrorHandling.SkipItem)
{
errorOccured = true;
break;
}
}
}
}
if (!errorOccured)
result.Add(obj);
line = str.ReadLine();
}
return result.ToArray();
}
#endregion
/// <summary>
/// Parses a comma-separated line. Apostrophed fields are handled too.
/// It is fairly error-resilient; unclosed apostrophes will
/// </summary>
/// <param name="line">A single line of comma-separated data.</param>
/// <returns>An array of values as strings.</returns>
public static string[] ParseCSVLine(string line)
{
List<string> elements = new List<string>();
int start = 0;
bool inApostrophe = false;
for (int i = 0; i < line.Length; i++)
{
if (line[i] == ',')
{
if (!inApostrophe)
{
// Starts and ends with apostrophe?
// Remove first and last char, replace "" with "
if ((line[start] == '"') && (line[i - 1] == '"'))
{
string str = line.Substring(start + 1, i - start - 2);
str = str.Replace("\"\"", "\"");
elements.Add(str);
}
// Simple add otherwise
else
{
elements.Add(line.Substring(start, i - start));
}
start = i + 1;
}
}
else if (line[i] == '"')
{
inApostrophe = !inApostrophe;
}
}
// Add the last element. Also check start/end apostrophe
if ((start < line.Length) && (line[start] == '"') && (line[line.Length - 1] == '"'))
{
string str = line.Substring(start + 1, line.Length - start - 2);
str = str.Replace("\"\"", "\"");
elements.Add(str);
}
// Simple add otherwise
else
{
elements.Add(line.Substring(start, line.Length - start));
}
return elements.ToArray();
}
/// <summary>
/// Conversion error behaviour.
/// </summary>
public enum ConversionErrorHandling
{
/// <summary>Throw an exception when a conversion error occurs. This will stop the whole parsing.</summary>
ThrowException,
/// <summary>Skip the entire item when a conversion error occurs.</summary>
SkipItem,
/// <summary>Ignore conversion errors; faulty fields will be unchanged.</summary>
SkipMember
}
/// <summary>
/// Custom conversion callback.
/// </summary>
/// <param name="fieldName">Name of the field to convert.</param>
/// <param name="value">The value to convert.</param>
/// <param name="result">The result of the conversion, if successful.</param>
/// <returns>Return true to use the result, false to ignore the function and use the default conversion.</returns>
public delegate bool ConversionFunction(string fieldName, string value, ref object result);
}
}
@thelastpointer
Copy link
Author

Added a better conversion function.
A line's last element will also be stripped of apostrophes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment