Skip to content

Instantly share code, notes, and snippets.

@OhadRubin
Created June 2, 2024 08:56
Show Gist options
  • Save OhadRubin/64f3769bb126b5545e4a37b631aed071 to your computer and use it in GitHub Desktop.
Save OhadRubin/64f3769bb126b5545e4a37b631aed071 to your computer and use it in GitHub Desktop.
incrementally read a pickle file and show progress
import pickle
import numpy as np
from tqdm import tqdm
import os
# Create a large numpy array and save it to a pickle file
# a = np.random.random((1000, 1000))
a = np.random.random((40000,10000))
with open('large_array.pkl', 'wb') as f:
pickle.dump(a, f)
# Function to incrementally read a pickle file with a progress bar
def read_pickle_in_chunks(file_path, chunk_size=1024):
file_size = os.path.getsize(file_path)
progress_bar = tqdm(total=file_size, unit='B', unit_scale=True, desc="Reading pickle file")
buffer = bytearray()
with open(file_path, 'rb') as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
buffer.extend(chunk)
progress_bar.update(len(chunk))
progress_bar.close()
data = pickle.loads(buffer)
return data
# Read the pickle file incrementally and display a progress bar
file_path = 'large_array.pkl'
loaded_array = read_pickle_in_chunks(file_path)
# Compare the original array with the loaded array
print("Original array shape:", a.shape)
print("Loaded array shape:", loaded_array.shape)
print("Arrays equal:", np.array_equal(a, loaded_array))
print("Finished reading pickle file")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment