Skip to content

Instantly share code, notes, and snippets.

@nexpr
Last active June 2, 2021 00:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nexpr/5878bb6e99746e5af75201caec838124 to your computer and use it in GitHub Desktop.
Save nexpr/5878bb6e99746e5af75201caec838124 to your computer and use it in GitHub Desktop.
typed csv

tcsv

JSON は同じキーの繰り返しでムダが多い
オブジェクトの配列でキーの長さも要素数倍になる 数値データの場合テキストのほとんどがキー文字列
ファイルサイズも不要に増える

csv のような形式にして 1 行目が型定義のヘッダ
ボディ部分に基本 , 区切りでデータを入れる

ex

b は boolean
値は t/f で表現する

b
t
f
f

==>> [true, false, false]

n は number

n
10
3.1
-30

==>> [10, 3.1, -30]

s は string

s
a
xyz
foo
bar

==>> ["a", "xyz", "foo", "bar"]

オブジェクトの場合はヘッダを {} で囲む
中身は key:type, 区切り
json と違ってどっちも "" 不要
json を使わない理由は順番に意味があるから
json としてパースすると順が保証されない
ボディ部分は単純に , 区切り

{a:n,b:n,c:n}
1,2,3
3,4,1

==>> [{a:1, b:2, c:3}, {a:3, b:4, c:1}]

配列の場合は [] で囲む
中身は型定義
1 つだけならその型で配列サイズは任意
, 区切りで 2 つ以上なら型の数がそのまま配列の要素数でそれぞれ指定の型
ボディ部分にも [] が必要

{a:n,b:n,c:[n,s],d:{x:{y:{z:b}}},e:[n],f:s}
1,2,[1,djea],f,[1,2,3,4],xxac

==>> [{a:1, b:2, c:[1,"djea"], d:{x:{y:{z:false}}}, e:[1,2,3,4], f: "xxac"}]

esc

パースを楽にするため # を使ってエスケープ

#   -> ##
,   -> #c
\n  -> #r
:   -> #C
" " -> #s
import { serialize, deserialize } from "./tcsv.js"
let def = "{a:n,b:n,c:[n,s],d:{x:{y:{z:b}}},e:s,f:[[n]],g:[[s,s]]}"
const data = [
{
a: 10,
b: 2,
c: [1, "J"],
d: {
x: {
y: {
z: false,
},
},
},
e: "fff",
f: [[1, 2, 3], [3, 4, 5]],
g: [["a", "b"], ["c", "d"], ["e", "g"], ["g", "h"]],
},
{
a: 123,
b: "7",
c: [1, 2],
d: {
x: {
y: {
z: 1,
},
},
},
e: null,
f: [[11, "22", 33], [33, "44", 55]],
g: [[4, 5]],
},
]
const tcsv_str = serialize(data, def)
console.log(tcsv_str)
/*
{a:n,b:n,c:[n,s],d:{x:{y:{z:b}}},e:s,f:[[n]],g:[[s,s]]}
10,2,[1,J],f,fff,[[1,2,3],[3,4,5]],[[a,b],[c,d],[e,g],[g,h]]
123,7,[1,2],t,#N,[[11,22,33],[33,44,55]],[[4,5]]
*/
const org = deserialize(tcsv_str)
console.log(JSON.stringify(org.body, null, " "))
/*
[
{
"a": 10,
"b": 2,
"c": [1, "J"],
"d": {
"x": {
"y": {
"z": false
}
}
},
"e": "fff",
"f": [
[1, 2, 3],
[3, 4, 5]
],
"g": [
["a", "b"],
["c", "d"],
["e", "g"],
["g", "h"]
]
},
{
"a": 123,
"b": 7,
"c": [1, "2"],
"d": {
"x": {
"y": {
"z": true
}
}
},
"e": null,
"f": [
[11, 22, 33],
[33, 44, 55]
],
"g": [
["4", "5"]
]
}
]
*/
const gen = function*(iterable) {
yield* iterable
}
const parseDefinition = def_str => {
const unescape = s => {
return s.replace(/#./g, x => {
// prettier-ignore
switch (x[1]) {
case "C": return ":"
case "c": return ","
case "r": return "\n"
case "s": return " "
default: return x[1]
}
})
}
const parser = {
parse(str) {
const it = gen(str)
return parser.type(it)
},
type(it) {
while (true) {
const { value: c, done } = it.next()
if (done) return { error: "invalid EOS" }
if (c.trim() === "") continue
if (c === "n") return { type: "n" }
if (c === "s") return { type: "s" }
if (c === "b") return { type: "b" }
if (c === "[") return parser.array(it)
if (c === "{") return parser.object(it)
return { error: `invalid char: "${c}" in type ctx` }
}
},
array(it) {
const items = []
while (true) {
const result = parser.type(it)
if (result.error) {
return result
}
if (result.type) {
items.push(result)
}
while (true) {
const { value: c, done } = it.next()
if (done) return { error: "invalid EOS" }
if (c.trim() === "") continue
if (c === ",") break
if (c === "]") return { type: "a", items }
}
}
},
object(it) {
const items = []
while (true) {
const name_result = parser.name(it)
if (name_result.error) {
return name_result
}
const type_result = parser.type(it)
if (type_result.error) {
return type_result
}
items.push({ ...type_result, name: name_result.name })
while (true) {
const { value: c, done } = it.next()
if (done) return { error: "invalid EOS" }
if (c.trim() === "") continue
if (c === ",") break
if (c === "}") return { type: "o", items }
}
}
},
name(it) {
let str = ""
while (true) {
const { value: c, done } = it.next()
if (done) return { error: "invalid EOS" }
if (c === ":") return { name: unescape(str.trim()) }
str += c
}
},
}
return parser.parse(def_str)
}
const serialize = (data, def_str) => {
const escape = s => {
return s.replace(/[:,\n ]/g, x => {
// prettier-ignore
switch (x) {
case ":": return "#c"
case ",": return "#C"
case "\n": return "#r"
case " ": return "#s"
}
})
}
const definition = parseDefinition(def_str)
if (!Array.isArray(data)) data = [data]
const stringify = (value, def) => {
if (value == null) return "#N"
// prettier-ignore
switch(def.type) {
case "n": return Number(value)
case "s": return escape(String(value))
case "b": return String(!!value)[0]
case "a": {
if (def.items.length === 1) {
return "[" + value.map(x => stringify(x, def.items[0])).join(",") + "]"
} else {
return "[" + def.items.map((t, i) => stringify(value[i], t)).join(",") + "]"
}
}
case "o": {
return def.items.map(t => stringify(value[t.name], t)).join(",")
}
default: throw new Error("implement error")
}
}
const body = data.map(row => stringify(row, definition)).join("\n")
return def_str + "\n" + body
}
const deserialize = tcsv_str => {
const unescape = s => {
if (s === "#N") return null
return s.replace(/#./g, x => {
// prettier-ignore
switch (x[1]) {
case "C": return ":"
case "c": return ","
case "r": return "\n"
case "s": return " "
default: return x[1]
}
})
}
const parser = {
parse(str) {
const it = gen(str)
const { done, value } = parser.values(it)
return value
},
values(it) {
const items = []
while (true) {
const { done, value } = parser.value(it)
items.push(value)
if (done) return { done, value: items }
}
},
value(it) {
let str = ""
while (true) {
const { value: c, done } = it.next()
if (done) return { value: unescape(str), done: true }
if (c === ",") return { value: unescape(str) }
if (str.trim() === "" && c === "[") return parser.array(it)
str += c
}
},
array(it) {
const fixArrayEnd = v => {
if (typeof v === "string") {
if (v.endsWith("]")) {
return v.slice(0, -1)
}
}
if (Array.isArray(v)) {
const fixed = fixArrayEnd(v[v.length - 1])
if (fixed != null) {
v[v.length - 1] = fixed
return v
}
}
return null
}
const items = []
while (true) {
const { done, value } = parser.value(it)
const fixed = fixArrayEnd(value)
if (fixed != null) {
items.push(fixed)
return { done, value: items }
} else {
items.push(value)
}
if (done) return { done, value: items }
}
},
}
const format = def => items => {
const rec = (def, it) => {
switch (def.type) {
case "n": {
const value = it.next().value
if (value == null) return null
return Number(value)
}
case "s": {
const value = it.next().value
if (value == null) return null
return String(value)
}
case "b": {
const value = it.next().value
if (value == null) return null
return value === "f" || value === "false" || value === "0" ? false : !!value
}
case "a": {
const value = it.next().value
if (value == null) return null
if (def.items.length === 1) {
return value.map(x => rec(def.items[0], gen([x])))
} else {
const sub_it = gen(value)
return def.items.map(t => rec(t, sub_it))
}
}
case "o": {
return Object.fromEntries(def.items.map(t => [t.name, rec(t, it)]))
}
default:
throw new Error("implement error")
}
}
return rec(def, gen(items))
}
const [header, ...body] = tcsv_str.split("\n")
const definition = parseDefinition(header)
return {
definition,
body: body.map(e => parser.parse(e)).map(format(definition)),
}
}
export { serialize, deserialize }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment