import os
import sys
from threading import Thread
from datetime import datetime
import subprocess
dict1 = {}
def get_locations():
locs = os.scandir('/Volumes')
rtn = []
for i in locs:
for entry in os.scandir(i.path):
if entry.is_dir(follow_symlinks=False):
rtn.append(entry.path)
elif entry.is_file(follow_symlinks=False):
dict1[entry.path] = entry.name
return rtn
def walker(location):
for root, dir, files in os.walk(location, topdown = True):
for file in files:
dict1[root+"/"+file] = file
def create():
processes = [] # empty process list is created
targetLocations = get_locations()
for location in targetLocations:
process1 = Thread(target=walker, args=(location,))
process1.start()
processes.append(process1)
for t in processes:
t.join() # Terminate the threads
t1= datetime.now()
create()
t2= datetime.now()
total =t2-t1
print("Time taken to index " , total)
#If we want to use Pandas to do some filtering:
import pandas as pd
df = pd.DataFrame(list(dict1.items()), columns=['Path','File'])
imagesDataFrame = df["File"].str.contains('\.jpg$', case=False, regex=True)
Last active
September 25, 2019 18:54
-
-
Save CoffieldWeb/57dc5b4dcc01d175335b43de1a16db96 to your computer and use it in GitHub Desktop.
Index all drives connected to macOS with multithreaded Python 3
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment