Created
January 26, 2020 01:16
-
-
Save JohnEmhoff/274f6e05cba3f17a16683eb394bfe6b5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import pyorc | |
# use rows of random numbers and strings as test data | |
fields = ",".join([f"a{y}:string,b{y}:float" for y in range(25)]) | |
spec = f"struct<{fields}>" | |
def gen_row(): | |
tup = [] | |
for _ in range(25): | |
tup.append(str(random.random()) * 10) | |
tup.append(random.random()) | |
return tuple(tup) | |
data = [gen_row() for i in range(10000)] | |
for iteration in range(30): | |
print(iteration) | |
writers = [] | |
# create 100 writers at a time | |
for n in range(100): | |
fp = open(f"out-{n}.orc", "wb") | |
# note the small stripe size | |
writers.append(pyorc.Writer(fp, spec, stripe_size=1 * 1024 * 1024)) | |
# write 1 million rows, each to a random writer | |
for row in range(1000000): | |
w = random.choice(writers) | |
w.write(random.choice(data)) | |
# close all of our writers | |
for w in writers: | |
w.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment