Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Index all drives connected to macOS with multithreaded Python 3

Index all drives connected to macOS with multithreaded Python 3

import os
import sys
from threading import  Thread
from datetime import datetime
import subprocess

dict1 = {}

def get_locations():
    locs = os.scandir('/Volumes')
    rtn = []
    for i in locs:
        for entry in os.scandir(i.path):
            if entry.is_dir(follow_symlinks=False):
                rtn.append(entry.path)
            elif entry.is_file(follow_symlinks=False):
                dict1[entry.path] = entry.name
    return rtn

def walker(location):
    for root, dir, files in os.walk(location, topdown = True):
        for file in files:
            dict1[root+"/"+file] = file

def create():
    processes = []   # empty process list is created           
    targetLocations = get_locations()
    for location in targetLocations:
        process1 = Thread(target=walker, args=(location,))
        process1.start()
        processes.append(process1)

    for t in processes:
        t.join() # Terminate the threads

t1= datetime.now()
create()
t2= datetime.now()
total =t2-t1
print("Time taken to index " , total)

#If we want to use Pandas to do some filtering:
import pandas as pd
df = pd.DataFrame(list(dict1.items()), columns=['Path','File'])
imagesDataFrame = df["File"].str.contains('\.jpg$', case=False, regex=True)

Read the article:

View this on Coffield Web

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment