Skip to content

Instantly share code, notes, and snippets.

@SeanConnelly
Last active November 5, 2018 17:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SeanConnelly/cb4ff970c2df5266d24c8802d56f48da to your computer and use it in GitHub Desktop.
Save SeanConnelly/cb4ff970c2df5266d24c8802d56f48da to your computer and use it in GitHub Desktop.
CSV file reader for Cache and IRIR

CSV File Reader for Cache and IRIS

Loads RFC 4180 compliant CSV into a data reader object.

How to use:

Create an instance of the Cogs.Lib.Csv.Reader class and load the raw CSV using either LoadFile(), LoadStream() or LoadString(), see Test methods for examples.

Use the .Next() method to load each record.

Access the record field data either using the file name using Get("address") or its ordinal position GetAt(1).

Tweak the CSV import behavious by setting properties on the reader instance, see property description for information.

/// <example>
/// set file="C:\Temp\My.csv"
/// set reader=##class(Cogs.Lib.Csv.FileReader).%New()
/// set sc=reader.LoadFile("C:\Temp\test.csv")
/// while reader.Next() { 
///   do reader.Display() //or
///   write !,reader.Get("address") //or
///   write !,reader.GetAt(1)
/// }
/// </example>
Class Cogs.Lib.Csv.Reader Extends %RegisteredObject
{

/// If CSV does not contain meta data then set this to 0 before calling next()
/// If the CSV does contain meta data then it will be loaded and the data can
/// be accessed using the Get("field-name") method of the current row data.
Property HasHeaderMetaData As %Boolean [ InitialExpression = 1 ];

/// The delimiter used to seperate fields, defaults to a comma
Property Delimiter As %String [ InitialExpression = "," ];

/// The line terminator used to seperate records, defaults to CRLF
Property LineTerminator As %String [ InitialExpression = {$c(13,10)} ];

/// Convert line terminators embedded in fields from the LineTerminator
/// value to the NormaliseLineTerminator value
Property NormaliseLineTerminator As %String;

/// Unescape quotes found in field values
Property UnescapeQuotes As %Boolean [ InitialExpression = 1 ];

/// Automatically remove outer quotes from fields that have them
Property RemoveOuterQuotes As %Boolean [ InitialExpression = 1 ];

/// Automatically converts field values from a default encoding type of UTF8
/// Set this to an empty string to prevent automatic conversion
Property Encoding As %String [ InitialExpression = "UTF8" ];

/// Contains the data of the latest record loaded from calling the Next method
Property Data As list Of %String;

/// If pUseHeader is set to 1 then the first row will be loaded into MetaData
/// MetaData will contain the field names at thier ordinal positions
/// If header data is provided then data can be accessed using Get() with
/// the field name.
Property MetaData As array Of %String;

/// Set to the number of fields found in the very first row, this will be used
/// to validate that all records contain this same number of columns.
/// If a record contains less or more fields then the class will throw an error.
Property ColumnCount As %Integer [ InitialExpression = 0 ];

/// The current record number loaded into Data.
/// If there is header data then record 1 will start at the second line
Property RecordNo As %Integer;

/// A reference to the CSV data
Property Csv;

/// If loaded from a string then this will be set to 1, else it will be a stream
Property IsString As %Boolean [ InitialExpression = 0 ];

/// Private property for tracking string read pointer when using strings
Property StringPointer As %Integer [ InitialExpression = -1, Private ];

/// An internal reference to the record string currently loaded
Property Record As %String;

Method LoadFile(pFileName) As %Status
{
	set ..Csv=##class(%Stream.FileBinary).%New()
	set sc=..Csv.LinkToFile(pFileName)
	if $$$ISERR(sc) quit sc
	set ..Csv.LineTerminator=..LineTerminator
	if ..Csv.%IsNull() quit $$$ERROR($$$GeneralError,"File not found")
	do ..LoadHeaderMetaData()
	quit $$$OK
}

Method LoadStream(pStream)
{
	set ..Csv=pStream
	set ..Csv.LineTerminator=..LineTerminator
	do ..LoadHeaderMetaData()
	quit $$$OK
}

Method LoadString(pString)
{
	set ..Csv=pString
	set ..IsString=1
	do ..LoadHeaderMetaData()
	quit $$$OK
}

Method LoadHeaderMetaData()
{
	if ..HasHeaderMetaData=0 quit
	do ..Next() for i=1:1:..Data.Count() {
		do ..MetaData.SetAt(..Data.GetAt(i),i)
	}
	set ..RecordNo=0
}

/// Call Next() to load the next CSV record into memory, this record can then be accessed using
/// The Get, GetAt and GetAsList methods.
/// Returns a 1 or 0, if 1 then there are more records, if 0 then this is the last record\
Method Next() As %Boolean
{
	if ..AtEnd() quit 0
	for i=..Data.Count():-1:1 do ..Data.RemoveAt(i)
	set ..Record=..ReadLine()
	while ($l(..Record,"""")-1)#2 {
		if ..AtEnd() $$$ThrowStatus($$$ERROR($$$GeneralError,"End of CSV file is corrupt")) 
		set ..Record=..Record_..LineTerminator_..ReadLine()	
	}
	set (from,to)=1
	while to'=0 {
		set to=$f(..Record,..Delimiter,to),val=$e(..Record,from,$s(to=0:$l(..Record),1:to-2))
		if ($l(val,"""")-1)#2 continue
		set val=$zstrip(val,"<>W")
		if ..RemoveOuterQuotes,$e(val,1)="""",$e(val,*)="""" set val=$e(val,2,*-1)
		if ..UnescapeQuotes set val=$replace(val,"""""","""")
		if ..NormaliseLineTerminator'="",..NormaliseLineTerminator'=..LineTerminator set val=$replace(val,..LineTerminator,..NormaliseLineTerminator)
		if ..Encoding'="" set val=$zcvt(val,"I",..Encoding)
		do ..Data.Insert(val)
		set from=to
	}
	if ..ColumnCount=0 set ..ColumnCount=..Data.Count()
	if ..ColumnCount'=..Data.Count() $$$ThrowStatus($$$ERROR($$$GeneralError,"Invalid column count at record no : "_..RecordNo))
	set ..RecordNo=..RecordNo+1
	set ..Record=""
	quit 1
}

Method AtEnd()
{
	if ..IsString {
		quit $s(..StringPointer=0:1,1:0)
	} else {
		quit ..Csv.AtEnd	
	}
}

Method ReadLine()
{
	if ..IsString {
		set from=..StringPointer
		set ..StringPointer=$find(..Csv,..LineTerminator,..StringPointer)
		if ..StringPointer=0 {
			quit $e(..Csv,from,*)		
		} else {
			quit $e(..Csv,from,..StringPointer-$l(..LineTerminator)-1)			
		}
	} else {
		quit ..Csv.ReadLine()	
	}
}

Method Get(pName As %String = "") As %Library.String
{
	quit ..Data.GetAt(..MetaData(pName))
}

Method GetAt(pPos As %Integer = 0) As %Library.String
{
	quit ..Data.GetAt(pPos)
}

Method GetAsList()
{
	set list=$lb(..Data.GetAt(1))
	for i=2:1:..ColumnCount set $list(list,i)=..Data.GetAt(i)
	quit list
}

Method Display()
{
	write !!,"Record No : ",..RecordNo
	for i=1:1:..ColumnCount {
		write !," ",$s(..MetaData.IsDefined(i):..MetaData.GetAt(i),1:i)," = "
		if ..Data.GetAt(i)[..LineTerminator write !!
		write ..Data.GetAt(i)
		if ..Data.GetAt(i)[..LineTerminator write !
	}
}

ClassMethod Test1()
{
	set file="C:\Temp\test.csv"
	set reader=##class(Cogs.Lib.Csv.Reader).%New()
	set sc=reader.LoadFile("C:\Temp\test.csv")
	while reader.Next() { 
		do reader.Display()
		r x
	}
}

ClassMethod Test2()
{
	set file="C:\Temp\test.csv"
	set CsvStream=##class(%Stream.FileBinary).%New()
	set sc=CsvStream.LinkToFile(file)
	set reader=##class(Cogs.Lib.Csv.Reader).%New()
	set sc=reader.LoadStream(CsvStream)
	while reader.Next() { 
		do reader.Display()
		r x
	}
}

ClassMethod Test3()
{
	set string="""LatD"", ""LatM"", ""LatS"", ""NS"", ""LonD"", ""LonM"", ""LonS"", ""EW"", ""City"", ""State"""_$c(13,10)_"   41,    5,   59, ""N"",     80,   39,    0, ""W"", ""Youngstown"", OH"_$c(13,10)_"   42,   52,   48, ""N"",     97,   23,   23, ""W"", ""Yankton"", SD"_$c(13,10)_"   46,   35,   59, ""N"",    120,   30,   36, ""W"", ""Yakima"", WA"_$c(13,10)_"   42,   16,   12, ""N"",     71,   48,    0, ""W"", ""Worcester"", MA"_$c(13,10)_"   43,   37,   48, ""N"",     89,   46,   11, ""W"", ""Wisconsin Dells"", WI"_$c(13,10)_"   36,    5,   59, ""N"",     80,   15,    0, ""W"", ""Winston-Salem"", NC"_$c(13,10)_"   49,   52,   48, ""N"",     97,    9,    0, ""W"", ""Winnipeg"", MB"_$c(13,10)_"   39,   11,   23, ""N"",     78,    9,   36, ""W"", ""Winchester"", VA"_$c(13,10)_"   34,   14,   24, ""N"",     77,   55,   11, ""W"", ""Wilm,,"""",,,,"_$c(13,10)_"ington"", NC"
	set reader=##class(Cogs.Lib.Csv.Reader).%New()
	set sc=reader.LoadString(string)
	while reader.Next() { 
		do reader.Display()
		r x
	}
}

}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment