Skip to content

Instantly share code, notes, and snippets.

@camerondavison
Last active August 20, 2023 19:42
Show Gist options
  • Save camerondavison/cbe43c326f37ab0fe34680baa960634e to your computer and use it in GitHub Desktop.
Save camerondavison/cbe43c326f37ab0fe34680baa960634e to your computer and use it in GitHub Desktop.
apache arrow from python to javascript
{
"presets": [
"@babel/preset-env",
]
}
/node_modules/
package-lock.json

INSTALL

npm i

RUN

./python_to_arrow.py
./use-arrow.js
idx color name large_number
1 green alice 17592186044416
2 blue bob 17592186044417
3 red carl 17592186044418
{
"dependencies": {
"@babel/core": "^7.0.0-beta.55",
"@babel/node": "^7.0.0-beta.55",
"@babel/preset-env": "^7.0.0-beta.55",
"apache-arrow": "^0.3.1"
}
}
#!/usr/bin/env python3
import pandas as pd
import pyarrow as pa
import numpy as np
if __name__ == "__main__":
df = pd.read_table("./data.csv", sep=',')
df['idx'] = df['idx'].astype(np.int32)
print(df.columns)
print(df.shape)
table = pa.RecordBatch.from_pandas(df)
with open("./arrow-out.ipc", 'bw') as f:
writer = pa.ipc.RecordBatchFileWriter(f, table.schema)
writer.write_batch(table)
writer.close()
numpy==1.15.0
pandas==0.23.3
pyarrow==0.9.0.post1
#!/usr/bin/env ./node_modules/@babel/node/bin/babel-node.js
import { readFileSync } from 'fs';
import { Table, predicate, util } from 'apache-arrow';
const main = () => {
let filt
const arrow = readFileSync('./arrow-out.ipc');
const table = Table.from([arrow]);
console.log(table.toString())
const name = 'alice'
filt = table.filter(predicate.col('name').eq(name))
console.log(`rows with name[${name}] in them ${filt.count()}`)
const idx = 2
filt = table.filter(predicate.col('idx').eq(idx))
console.log(`rows with idx[${idx}] ${filt.count()}`)
const largeNumber = 17592186044417
filt = table.filter(predicate.col('large_number').eq(largeNumber))
console.log(`rows with large_number[${largeNumber}] ${filt.count()}`)
filt = table.filter(predicate.col('large_number').eq(util.Int64.fromString(`${largeNumber}`)))
console.log(`rows with large_number[${largeNumber}] Int64 ${filt.count()}`)
}
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment