Skip to content

Instantly share code, notes, and snippets.

@Elypha
Forked from ashmalvayani/read_zst_xz_to_df.py
Created June 3, 2024 03:24
Show Gist options
  • Save Elypha/ab2c0c90d0130dbe4b80e48a3c6f13bb to your computer and use it in GitHub Desktop.
Save Elypha/ab2c0c90d0130dbe4b80e48a3c6f13bb to your computer and use it in GitHub Desktop.
Reading jsonl.zst, jsonl.xz in DataFrame
## Read jsonl.zst file as dataframe in python
!pip install
#####################
import io
import pandas as pd
import zstandard as zst
def read_jsonl_zst(file_path):
with open(file_path, 'rb') as file:
decompressor = zstd.ZstdDecompressor()
stream_reader = decompressor.stream_reader(file)
stream = io.TextIOWrapper(stream_reader, encoding = "utf-8")
for line in stream:
yield json.loads(line)
data = list(read_jsonl_zst(file_path))
df = pd.DataFrame(data)
##--------------##
## Read jsonl.xzfile as dataframe in python
import lzma
import json
import pandas as pd
def read_jsonl_xz(file_path):
with lzma.open(file_path, 'rt', encoding='utf-8') as file:
for line in file:
yield json.loads(line)
data = list(read_jsonl_xz(file_path))
df = pd.DataFrame(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment