Skip to content

Instantly share code, notes, and snippets.

@MarkPflug
Last active April 20, 2023 20:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MarkPflug/6df6c1dee306685edec9a3293a83a0c0 to your computer and use it in GitHub Desktop.
Save MarkPflug/6df6c1dee306685edec9a3293a83a0c0 to your computer and use it in GitHub Desktop.
CSV Validation Example
// C# 11 and .NET 6+
using Sylvan.Data; // 0.2.12-B0001
using Sylvan.Data.Csv; // 1.2.7
// the schema for the csv data below.
var schema =
new Schema.Builder()
// ID is required!
.Add<int>("Id")
.Add<string>("Name", allowNull: false)
// Date of birth can be null.
.Add<DateTime?>("DOB")
// nullable enum value
.Add<ConsoleColor?>("FavoriteColor")
.Build();
// record 1 is OK
// record 2 has a missing ID and an unknown color. The different date format is OK.
// record 3 has a bad date (Unknown). The missing/null color is OK, as it is nullable.
var data =
"""
Id,Name,DOB,FavoriteColor
1,Dan,2020-01-01,Red
,Alex,"Jun 29, 1995",Blornge
3,Maria,Unknown
""";
// create a reader with the schema
var opts = new CsvDataReaderOptions { Schema = new CsvSchema(schema) };
var reader = CsvDataReader.Create(new StringReader(data), opts);
// apply the validation handler. See "HandleRecordError" function below
var validatingReader = reader.ValidateSchema(HandleRecordError);
// This will produce records for row 1, which had no errors
// and row 3 where the invalid date could be corrected.
foreach (var record in validatingReader.GetRecords<Record>())
{
Console.WriteLine(record.ToString());
}
// the validation handler will log all failures
// it will repair bad Date values by setting the value to null
// Any records that can be repaired will be returned to the reader.
static bool HandleRecordError(SchemaValidationContext context)
{
var csv = (CsvDataReader)context.DataReader;
Console.WriteLine("--- ERROR START ---");
Console.WriteLine($"Error(s) on row {csv.RowNumber}. Raw Record:");
Console.Out.Write(csv.GetRawRecordSpan());
Console.WriteLine("");
bool repaired = true;
foreach (var idx in context.GetErrors())
{
var ex = context.GetException(idx);
// log the error detail
Console.WriteLine($" col: {idx} value: \"{csv.GetString(idx)}\" exception: {ex.Message} ({ex.GetType()})");
var name = csv.GetName(idx);
switch (name)
{
case "DOB": // the date column
// unknown date, we'll set it to null
context.SetValue(idx, null);
break;
default:
// any other issue is irreparable
repaired = false;
break;
}
}
Console.WriteLine(repaired ? "Record was repaired" : "Record was NOT repaired");
Console.WriteLine("--- ERROR END ---");
return repaired;
}
class Record
{
public int Id { get; set; }
public string Name { get; set; }
public DateTime? DOB { get; set; }
public ConsoleColor? FavoriteColor { get; set; }
public override string ToString()
{
return $"Record: {Id} {Name} {DOB?.ToString() ?? "NULL"} {FavoriteColor}";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment