Skip to content

Instantly share code, notes, and snippets.

@ashmalvayani
Last active June 3, 2024 03:24
Show Gist options
  • Save ashmalvayani/ce56cb807bd5e88d08a82d8eefc6f7ae to your computer and use it in GitHub Desktop.
Save ashmalvayani/ce56cb807bd5e88d08a82d8eefc6f7ae to your computer and use it in GitHub Desktop.
Reading jsonl.zst, jsonl.xz in DataFrame
## Read jsonl.zst file as dataframe in python
!pip install
#####################
import io
import pandas as pd
import zstandard as zst
def read_jsonl_zst(file_path):
with open(file_path, 'rb') as file:
decompressor = zstd.ZstdDecompressor()
stream_reader = decompressor.stream_reader(file)
stream = io.TextIOWrapper(stream_reader, encoding = "utf-8")
for line in stream:
yield json.loads(line)
data = list(read_jsonl_zst(file_path))
df = pd.DataFrame(data)
##--------------##
## Read jsonl.xzfile as dataframe in python
import lzma
import json
import pandas as pd
def read_jsonl_xz(file_path):
with lzma.open(file_path, 'rt', encoding='utf-8') as file:
for line in file:
yield json.loads(line)
data = list(read_jsonl_xz(file_path))
df = pd.DataFrame(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment