Skip to content

Instantly share code, notes, and snippets.

@chenyaofo
Created May 31, 2023 09:37
Show Gist options
  • Save chenyaofo/2ceb94a5b5b87ebe8aacd8f7fdd25996 to your computer and use it in GitHub Desktop.
Save chenyaofo/2ceb94a5b5b87ebe8aacd8f7fdd25996 to your computer and use it in GitHub Desktop.
Async reading multiple files.
import asyncio
import aiofiles
tar_filenames = [f"/home/chenyaofo/datasets/imagenet-wds/train/{i:06d}.tar" for i in range(256)]
# tar_filenames = [f"/gpfs01/home/chenyaofo/imagenet-wds/train/{i:06d}.tar" for i in range(256)]
count = 0
def async_reading():
print("asyncio reading based on naive asyncio")
def start_read_tar_files(filenames):
loop = asyncio.get_event_loop()
return loop.run_until_complete(read_multiple_tar_files(filenames))
async def read_tar_file(filename):
async with aiofiles.open(filename, mode='rb') as file:
b = await file.read()
global count
count += len(b)
async def read_multiple_tar_files(filenames):
tasks = []
for filename in filenames:
task = asyncio.create_task(read_tar_file(filename))
tasks.append(task)
await asyncio.gather(*tasks)
start_read_tar_files(tar_filenames)
def sync_reading():
print("sync reading, 1 thread")
def read_tar_file(filename):
with open(filename, 'rb') as f:
b = f.read()
global count
count += len(b)
for item in tar_filenames:
read_tar_file(item)
if __name__ == '__main__':
import time
start = time.perf_counter()
print(tar_filenames[0])
async_reading()
# sync_reading()
print(count/(1024**3)/(time.perf_counter()-start), "GB/s")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment