Last active
June 3, 2024 03:24
-
-
Save ashmalvayani/ce56cb807bd5e88d08a82d8eefc6f7ae to your computer and use it in GitHub Desktop.
Reading jsonl.zst, jsonl.xz in DataFrame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Read jsonl.zst file as dataframe in python | |
!pip install | |
##################### | |
import io | |
import pandas as pd | |
import zstandard as zst | |
def read_jsonl_zst(file_path): | |
with open(file_path, 'rb') as file: | |
decompressor = zstd.ZstdDecompressor() | |
stream_reader = decompressor.stream_reader(file) | |
stream = io.TextIOWrapper(stream_reader, encoding = "utf-8") | |
for line in stream: | |
yield json.loads(line) | |
data = list(read_jsonl_zst(file_path)) | |
df = pd.DataFrame(data) | |
##--------------## | |
## Read jsonl.xzfile as dataframe in python | |
import lzma | |
import json | |
import pandas as pd | |
def read_jsonl_xz(file_path): | |
with lzma.open(file_path, 'rt', encoding='utf-8') as file: | |
for line in file: | |
yield json.loads(line) | |
data = list(read_jsonl_xz(file_path)) | |
df = pd.DataFrame(data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment