Skip to content

Instantly share code, notes, and snippets.

@butlern
Created May 13, 2019 17:47
Show Gist options
  • Save butlern/556e1a74d0636986b56251e8d96dda8d to your computer and use it in GitHub Desktop.
Save butlern/556e1a74d0636986b56251e8d96dda8d to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import parquet as pq
import sys
from io import BytesIO
# Purpose: Read parquet files fro stdin to pipeline in MemSQL
# Read binary input from stdin
# Python 2
#file = sys.stdin.read()
# Python 3
file = sys.stdin.buffer.read()
# Convert file object
data = BytesIO(file)
# Read the rows in the parquet file
# Print out CSV to MemSQL
for row in pq.reader(data):
#memsql_row =(str(row[0])+','+str(row[1])+','+str(row[2])+','+str(row[3]))
memsql_row = [str(w) for w in row]
memsql_row = "\t".join(memsql_row)
print(memsql_row.encode('utf-8').decode('utf-8'))
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment