Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Index all drives connected to macOS with multithreaded Python 3

Index all drives connected to macOS with multithreaded Python 3

import os
import sys
from threading import  Thread
from datetime import datetime
import subprocess

dict1 = {}

def get_locations():
    locs = os.scandir('/Volumes')
    rtn = []
    for i in locs:
        for entry in os.scandir(i.path):
            if entry.is_dir(follow_symlinks=False):
                rtn.append(entry.path)
            elif entry.is_file(follow_symlinks=False):
                dict1[entry.path] = entry.name
    return rtn

def walker(location):
    for root, dir, files in os.walk(location, topdown = True):
        for file in files:
            dict1[root+"/"+file] = file

def create():
    processes = []   # empty process list is created           
    targetLocations = get_locations()
    for location in targetLocations:
        process1 = Thread(target=walker, args=(location,))
        process1.start()
        processes.append(process1)

    for t in processes:
        t.join() # Terminate the threads

t1= datetime.now()
create()
t2= datetime.now()
total =t2-t1
print("Time taken to index " , total)

#If we want to use Pandas to do some filtering:
import pandas as pd
df = pd.DataFrame(list(dict1.items()), columns=['Path','File'])
imagesDataFrame = df["File"].str.contains('\.jpg$', case=False, regex=True)

Read the article:

View this on Coffield Web

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.