npm i
./python_to_arrow.py
./use-arrow.js
{ | |
"presets": [ | |
"@babel/preset-env", | |
] | |
} |
/node_modules/ | |
package-lock.json |
idx | color | name | large_number | |
---|---|---|---|---|
1 | green | alice | 17592186044416 | |
2 | blue | bob | 17592186044417 | |
3 | red | carl | 17592186044418 |
{ | |
"dependencies": { | |
"@babel/core": "^7.0.0-beta.55", | |
"@babel/node": "^7.0.0-beta.55", | |
"@babel/preset-env": "^7.0.0-beta.55", | |
"apache-arrow": "^0.3.1" | |
} | |
} |
#!/usr/bin/env python3 | |
import pandas as pd | |
import pyarrow as pa | |
import numpy as np | |
if __name__ == "__main__": | |
df = pd.read_table("./data.csv", sep=',') | |
df['idx'] = df['idx'].astype(np.int32) | |
print(df.columns) | |
print(df.shape) | |
table = pa.RecordBatch.from_pandas(df) | |
with open("./arrow-out.ipc", 'bw') as f: | |
writer = pa.ipc.RecordBatchFileWriter(f, table.schema) | |
writer.write_batch(table) | |
writer.close() |
numpy==1.15.0 | |
pandas==0.23.3 | |
pyarrow==0.9.0.post1 |
#!/usr/bin/env ./node_modules/@babel/node/bin/babel-node.js | |
import { readFileSync } from 'fs'; | |
import { Table, predicate, util } from 'apache-arrow'; | |
const main = () => { | |
let filt | |
const arrow = readFileSync('./arrow-out.ipc'); | |
const table = Table.from([arrow]); | |
console.log(table.toString()) | |
const name = 'alice' | |
filt = table.filter(predicate.col('name').eq(name)) | |
console.log(`rows with name[${name}] in them ${filt.count()}`) | |
const idx = 2 | |
filt = table.filter(predicate.col('idx').eq(idx)) | |
console.log(`rows with idx[${idx}] ${filt.count()}`) | |
const largeNumber = 17592186044417 | |
filt = table.filter(predicate.col('large_number').eq(largeNumber)) | |
console.log(`rows with large_number[${largeNumber}] ${filt.count()}`) | |
filt = table.filter(predicate.col('large_number').eq(util.Int64.fromString(`${largeNumber}`))) | |
console.log(`rows with large_number[${largeNumber}] Int64 ${filt.count()}`) | |
} | |
main() |