Skip to content

Instantly share code, notes, and snippets.

@Miopas

Miopas/read_zst.py

Created May 20, 2020
Embed
What would you like to do?
read zst files using stream
# python 3.6
import zstandard
import pathlib
import shutil
import os
import math
import pandas as pd
import sys
def decompress_zstandard_to_folder(input_file, destination_dir):
input_file = pathlib.Path(input_file)
with open(input_file, 'rb') as compressed:
decomp = zstandard.ZstdDecompressor()
output_path = pathlib.Path(destination_dir) / input_file.stem
with open(output_path, 'wb') as destination:
decomp.copy_stream(compressed, destination)
def read_zst(infile):
size = int(math.pow(2, 24))
with open(infile, 'rb') as fh:
dctx = zstandard.ZstdDecompressor()
with dctx.stream_reader(fh) as reader:
previous_line = ""
while True:
chunk = reader.read(size)
if not chunk:
break
string_data = chunk.decode('utf-8')
lines = string_data.split("\n")
for i, line in enumerate(lines[:-1]):
if i == 0:
line = previous_line + line
print(line)
previous_line = lines[-1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.