Last active
August 26, 2020 14:37
-
-
Save elutz/73f042830cbdf858eae9e40c704b291f to your computer and use it in GitHub Desktop.
4D: Converts CSV data to collection based on https://gist.github.com/bennadel/9753411
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ---------------------------------------------------- | |
// Method: csv2coll | |
// Description | |
// Converts CSV data to collection | |
// Source: https://gist.githubusercontent.com/bennadel/9753411/raw/a8e6f25f15fc78d1ef2d187e4f4864c4b528f885/code-1.htm | |
// This will parse a delimited string into a collection of | |
// collections. The default delimiter is the semicolon, but this | |
// can be overriden in the second argument. | |
// Parameters | |
// ---------------------------------------------------- | |
C_TEXT($1;$csv) // Input CSV | |
C_TEXT($2;$strDelimiter) // optional delimiter, default=";" | |
C_COLLECTION($0) // Return | |
$csv:=$1 | |
// Check to see if the delimiter is defined. If not, | |
// then default to semicolon. | |
If (Count parameters>1) | |
$strDelimiter:=$2 | |
Else | |
$strDelimiter:=";" | |
End if | |
// Create a regular expression to parse the CSV values. | |
C_TEXT($pattern) | |
// Line resp. group 1: Delimiters. | |
// Line resp. group 2: Quoted fields. | |
// Line resp. group 3: Standard fields. | |
$pattern:="("+$strDelimiter+"|\\r?\\n|\\r|^)"+\ | |
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|"+\ | |
"([^\""+$strDelimiter+"\\r\\n]*))" | |
// Create an array to hold our data. Give the array | |
// a default empty first row. | |
C_COLLECTION($coll) | |
$coll:=New collection(New collection) | |
ARRAY LONGINT($positions;0) | |
ARRAY LONGINT($lengths;0) | |
C_LONGINT($start) | |
$start:=1 | |
// Keep looping over the regular expression matches | |
// until we can no longer find a match. | |
While (Match regex($pattern;$csv;$start;$positions;$lengths)) | |
// Get the delimiter that was found. | |
C_TEXT($strMatchedDelimiter) | |
$strMatchedDelimiter:=Substring($csv;$positions{1};$lengths{1}) | |
// Check to see if the given delimiter has a length | |
// (is not the start of string) and if it matches | |
// field delimiter. If id does not, then we know | |
// that this delimiter is a row delimiter. | |
If ((Length($strMatchedDelimiter)>0) & \ | |
($strMatchedDelimiter#$strDelimiter)) | |
// Since we have reached a new row of data, | |
// add an empty row to our data array. | |
$coll.push(New collection); | |
End if | |
// Now that we have our delimiter out of the way, | |
// let's check to see which kind of value we | |
// captured (quoted or unquoted). | |
C_TEXT($strMatchedValue) | |
If ($lengths{2}>0) | |
// We found a quoted value. When we capture | |
// this value, unescape any double quotes. | |
$strMatchedValue:=Substring($csv;$positions{2};$lengths{2}) | |
$strMatchedValue:=Replace string($strMatchedValue;"\"\"";"\"") | |
$start:=$positions{2}+$lengths{2} | |
Else | |
// We found a non-quoted value. | |
$strMatchedValue:=Substring($csv;$positions{3};$lengths{3}) | |
$start:=$positions{3}+$lengths{3} | |
End if | |
// Now that we have our value string, let's add | |
// it to the data array. | |
$coll[$coll.length-1].push($strMatchedValue) | |
End while | |
// Return the parsed data. | |
$0:=$coll |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment