Skip to content

Instantly share code, notes, and snippets.

@tenthree
Last active December 13, 2019 09:49
Show Gist options
  • Save tenthree/b44a55c65b96a8eadeed18ab1ed6e34a to your computer and use it in GitHub Desktop.
Save tenthree/b44a55c65b96a8eadeed18ab1ed6e34a to your computer and use it in GitHub Desktop.
convert simple csv formatted data to json
const fs = require('fs')
const NUM_EXP = /^(\+|-)?(\d+)(\.\d+)?$/
const PERCENT_EXP = /^(\+|-)?(\d+)(\.\d+)?%$/
const CSV_COLUMN_EXP = /^"(.*)"$/g
const NULL_VALUES = [ '' ]
const CSV_OPTIONS = {
flip: false,
newline: '\r\n',
beginRow: 1,
fields: [/* String */],
insert: {/* field: value */},
alias: {/* value: newValue | field: {value: newValue } */},
required: [/* fields */],
filter: (row) => true, // function (row) { return Boolean }
groupBy: '', // field
sortOn: '', // field
sortByDesc: false
}
function stripQuotationMark (value = '') {
if (CSV_COLUMN_EXP.test(value)) {
value = value.replace(CSV_COLUMN_EXP, '$1')
}
return value
}
function parseValue (field, value, alias = {}) {
value = stripQuotationMark(value.toString().trim())
if (NUM_EXP.test(value)) {
return Number(value)
} else if (PERCENT_EXP.test(value)) {
return Number((Number(value.replace(/%$/, '')) / 100).toFixed(4))
} else if (NULL_VALUES.indexOf(value) !== -1) {
return null
}
return value
}
function aliasValue (field, value, alias = {}) {
let subAlias = alias[field]
// no sub-alias or invalid setting
if (!subAlias || subAlias !== Object(subAlias)) {
return (typeof alias[value] !== 'undefined') ? alias[value] : value
}
// sub-alias on the specified field
return (typeof subAlias[value] !== 'undefined') ? subAlias[value] : value
}
function log (path, options = {}) {
let { beginRow, fields, insert, alias, required, groupBy, sortOn, sortByDesc } = options
let log = `\n[csv]`
let indent = ' '
let insertList = Object.keys(insert).map(prop => `${prop}:${insert[prop]}`)
let aliasList = Object.keys(alias).map(prop => `${prop}:${alias[prop]}`)
log += `\n${indent}[source] ${path}`
log += `\n${indent}[beginRow] ${beginRow}`
if (fields.length) {
log += `\n${indent}[fields] ${fields.filter(field => !!field)}`
}
if (insertList.length) {
log += `\n${indent}[insert] ${insertList}`
}
if (aliasList.length) {
log += `\n${indent}[alias] ${aliasList}`
}
if (required.length) {
log += `\n${indent}[required] ${required}`
}
if (groupBy) {
log += `\n${indent}[groupBy] ${groupBy}`
}
if (sortOn) {
log += `\n${indent}[sortOn] ${sortOn}`
}
if (sortByDesc) {
log += `\n${indent}[sortByDesc] ${sortByDesc}`
}
console.log(log)
}
function flipArray2d (arr) {
return arr[0].map((col, index) => arr.map(row => row[index])).slice()
}
function parseCsvColumns (str = '') {
return str.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/)
}
function parseCSV (path, options = {}) {
options = Object.assign({}, CSV_OPTIONS, options)
let { beginRow, flip, fields, insert, alias, required, filter, groupBy, sortOn, sortByDesc } = options
// log
log(path, options)
// read raw data
let data = fs
.readFileSync(path, 'utf-8')
.split(options.newline)
.map(row => parseCsvColumns(row))
// flip raw data
if (flip) {
data = flipArray2d(data)
}
// if there is no custom fields in options,
// use first-row values as name of fields
if (beginRow > 0 && (!fields || !fields.length)) {
fields = data[0]
}
// slice from beginRow and process data
let json = data.slice(beginRow)
.map(row => {
return row
.reduce((obj, value, index) => {
let field = fields[index]
if (!field) {
return obj
}
obj[field] = aliasValue(field, parseValue(field, value, alias), alias)
return obj
}, { ...insert })
})
.filter(row => {
// filter by required fields
return !required.some(f => (row[f] === null || typeof row[f] === 'undefined'))
})
// custom filter function
if (filter) {
json = json.filter(filter)
}
// groupBy
if (groupBy && Array.isArray(json)) {
json = json.reduce((obj, row) => {
let groupName = row[groupBy]
if (typeof obj[groupName] === 'undefined') {
obj[groupName] = []
}
obj[groupName].push(row)
return obj
}, {})
}
// sortOn
if (sortOn && typeof json === 'object') {
let sortHandler = !sortByDesc
? (a, b) => a[sortOn] - b[sortOn]
: (a, b) => b[sortOn] - a[sortOn]
if (Array.isArray(json)) {
// sort array
json.sort(sortHandler)
} else {
// sort array by group in object after groupby processing
for (let groupName in json) {
Array.isArray(json[groupName]) && json[groupName].sort(sortHandler)
}
}
}
return JSON.parse(JSON.stringify(json))
}
// --------------------------------------------------
// Example
// --------------------------------------------------
const CSV_SRC = './src/data'
const JSON_DEST = './src/data'
let data = parseCSV(`${CSV_SRC}/source.csv`, {
fields: [ 'year', 'order', 'party', 'proportion', 'seats', 'grants' ],
alias: {
// value: newValue
'民主進步黨': '民進黨',
'中國國民黨': '國民黨',
// field: { value: newValue }
'enable': { 'Y': true, 'N': false }
},
groupBy: 'year',
sortOn: 'order',
sortByDesc: false
})
// output
fs.writeFile(`${JSON_DEST}/result.json`, JSON.stringify(data), (err) => {
err && console.log(err)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment