Created
November 7, 2024 23:50
-
-
Save colin-ho/0eacf9a515218e6af9e538448e27a83d to your computer and use it in GitHub Desktop.
Daft Streaming Engine Demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import daft | |
import memray | |
def daft_in_memory_operation_one_partition(nums: int): | |
daft.context.set_runner_native() | |
for _ in range(0, nums): | |
df = daft.read_parquet("/nyc_taxi_2022/*") | |
df_filter = daft.sql("select VendorID, sum(total_amount) as total_amount from df group by VendorID") | |
df_filter.show(100) | |
output_file_path = f"memray_output.bin" | |
num_runs = 10 | |
with memray.Tracker(output_file_path, native_traces=True): | |
start = time.time() | |
daft_in_memory_operation_one_partition(nums=num_runs) | |
end = time.time() | |
reader = memray.FileReader(output_file_path) | |
print(f"Peak memory usage: {reader.metadata.peak_memory / 1024 / 1024:.2f} MB") | |
print(f"Average time taken: {(end - start) / num_runs:.2f} seconds") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Memory graphs:
New (streaming):
Old: