Created
December 21, 2020 21:12
-
-
Save kaizhu256/52c43c18e6541c8b5adbccb27a5ef6cb to your computer and use it in GitHub Desktop.
convert json list-of-list or list-of-dict to csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function jsonRowListFromCsv({ | |
csv | |
}) { | |
/* | |
* this function will convert <csv>-text to json list-of-list | |
*/ | |
/* | |
https://tools.ietf.org/html/rfc4180#section-2 | |
Definition of the CSV Format | |
While there are various specifications and implementations for the | |
CSV format (for ex. [4], [5], [6] and [7]), there is no formal | |
specification in existence, which allows for a wide variety of | |
interpretations of CSV files. This section documents the format that | |
seems to be followed by most implementations: | |
1. Each record is located on a separate line, delimited by a line | |
break (CRLF). For example: | |
aaa,bbb,ccc CRLF | |
zzz,yyy,xxx CRLF | |
2. The last record in the file may or may not have an ending line | |
break. For example: | |
aaa,bbb,ccc CRLF | |
zzz,yyy,xxx | |
3. There maybe an optional header line appearing as the first line | |
of the file with the same format as normal record lines. This | |
header will contain names corresponding to the fields in the file | |
and should contain the same number of fields as the records in | |
the rest of the file (the presence or absence of the header line | |
should be indicated via the optional "header" parameter of this | |
MIME type). For example: | |
field_name,field_name,field_name CRLF | |
aaa,bbb,ccc CRLF | |
zzz,yyy,xxx CRLF | |
4. Within the header and each record, there may be one or more | |
fields, separated by commas. Each line should contain the same | |
number of fields throughout the file. Spaces are considered part | |
of a field and should not be ignored. The last field in the | |
record must not be followed by a comma. For example: | |
aaa,bbb,ccc | |
5. Each field may or may not be enclosed in double quotes (however | |
some programs, such as Microsoft Excel, do not use double quotes | |
at all). If fields are not enclosed with double quotes, then | |
double quotes may not appear inside the fields. For example: | |
"aaa","bbb","ccc" CRLF | |
zzz,yyy,xxx | |
6. Fields containing line breaks (CRLF), double quotes, and commas | |
should be enclosed in double-quotes. For example: | |
"aaa","b CRLF | |
bb","ccc" CRLF | |
zzz,yyy,xxx | |
7. If double-quotes are used to enclose fields, then a double-quote | |
appearing inside a field must be escaped by preceding it with | |
another double quote. For example: | |
"aaa","b""bb","ccc" | |
*/ | |
let csvRowList; | |
let match; | |
let quote; | |
let rgx; | |
let row; | |
let val; | |
csv = csv.replace(( | |
/\r\n?/gu | |
), "\n"); | |
rgx = ( | |
/(.*?)(""|"|,|\n)/gu | |
); | |
csvRowList = []; | |
// reset row | |
row = []; | |
val = ""; | |
while (true) { | |
match = rgx.exec(csv); | |
if (!match) { | |
/* | |
2. The last record in the file may or may not have an ending line | |
break. For example: | |
aaa,bbb,ccc CRLF | |
zzz,yyy,xxx | |
*/ | |
if (!row.length) { | |
break; | |
} | |
// if eof missing crlf, then mock it | |
rgx.lastIndex = csv.length; | |
match = [ | |
"\n", "", "\n" | |
]; | |
} | |
// build val | |
val += match[1]; | |
if (match[2] === "\"") { | |
/* | |
5. Each field may or may not be enclosed in double quotes (however | |
some programs, such as Microsoft Excel, do not use double quotes | |
at all). If fields are not enclosed with double quotes, then | |
double quotes may not appear inside the fields. For example: | |
"aaa","bbb","ccc" CRLF | |
zzz,yyy,xxx | |
*/ | |
quote = !quote; | |
} else if (quote) { | |
/* | |
7. If double-quotes are used to enclose fields, then a double-quote | |
appearing inside a field must be escaped by preceding it with | |
another double quote. For example: | |
"aaa","b""bb","ccc" | |
*/ | |
if (match[2] === "\"\"") { | |
val += "\""; | |
/* | |
6. Fields containing line breaks (CRLF), double quotes, and commas | |
should be enclosed in double-quotes. For example: | |
"aaa","b CRLF | |
bb","ccc" CRLF | |
zzz,yyy,xxx | |
*/ | |
} else { | |
val += match[2]; | |
} | |
} else if (match[2] === ",") { | |
/* | |
4. Within the header and each record, there may be one or more | |
fields, separated by commas. Each line should contain the same | |
number of fields throughout the file. Spaces are considered part | |
of a field and should not be ignored. The last field in the | |
record must not be followed by a comma. For example: | |
aaa,bbb,ccc | |
*/ | |
// delimit val | |
row.push(val); | |
val = ""; | |
} else if (match[2] === "\n") { | |
/* | |
1. Each record is located on a separate line, delimited by a line | |
break (CRLF). For example: | |
aaa,bbb,ccc CRLF | |
zzz,yyy,xxx CRLF | |
*/ | |
// delimit val | |
row.push(val); | |
val = ""; | |
// append row | |
csvRowList.push(row); | |
// reset row | |
row = []; | |
} | |
} | |
// append row | |
if (row.length) { | |
csvRowList.push(row); | |
} | |
return csvRowList; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment