Skip to content

Instantly share code, notes, and snippets.

@AranKomat
Created August 14, 2021 00:37
Show Gist options
  • Save AranKomat/cdd0a1e5fd8f3318a35e05081de12605 to your computer and use it in GitHub Desktop.
Save AranKomat/cdd0a1e5fd8f3318a35e05081de12605 to your computer and use it in GitHub Desktop.
import os
import json
from time import time
from time import time
t = time()
TXT_PATH = "./80m-dataset/img"
paths = []
max_num = 90000000
max_idx = max_num // 1000
for idx in range(max_idx):
for idx2 in range(1000):
path = os.path.join(TXT_PATH, str(idx), str(idx2) + ".jpg")
if os.path.exists(path):
paths += [path]
tm = time() - t
print(idx, tm, (idx+1) / tm)
with open("./index.json", 'w') as f:
json.dump(paths, f, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment