Skip to content

Instantly share code, notes, and snippets.

@NBprojekt
Last active May 11, 2021 13:54
Show Gist options
  • Save NBprojekt/0ca66d3dec6245f72be39c923cbcb731 to your computer and use it in GitHub Desktop.
Save NBprojekt/0ca66d3dec6245f72be39c923cbcb731 to your computer and use it in GitHub Desktop.
Fetches all unique file types in all subfolders

Search unique file types

This script returns all unique file type in a given directory, including subdirectories.

Optional parameter: ROOT_DIR (default: ./)

py search-unique-file-types.py ./your-dir
example
import os
import time
import sys
IGNORED_DIRECTORIES = ('node_modules', '.git', '.vs', 'obj', 'bin')
FILE_TYPES = []
ROOT_DIR = './'
def listDirectories(path):
if not (os.access(path, os.F_OK) and os.access(path, os.R_OK) and os.access(path, os.X_OK)):
return
for obj in os.scandir(path):
if (obj.is_file()):
# TODO: optimize filename build and stack push
fileName = obj.name if obj.name.startswith('.') else '*.' +obj.name.split('.')[-1]
if fileName not in FILE_TYPES:
FILE_TYPES.append(fileName)
if (obj.is_dir()) and (not obj.path.endswith(IGNORED_DIRECTORIES)):
listDirectories(obj.path)
ROOT_DIR = sys.argv[1] if os.access(sys.argv[1], os.F_OK) else ROOT_DIR
listDirectories(ROOT_DIR)
for t in FILE_TYPES:
print(t)
import os
import time
import sys
IGNORED_DIRECTORIES = ('node_modules', '.git', '.vs', 'obj', 'bin')
FILE_TYPES = []
FILE_COUNTER = 0
DIRECOTRY_COUNTER = 0
ROOT_DIR = './'
def listDirectories(path):
if not (os.access(path, os.F_OK) and os.access(path, os.R_OK) and os.access(path, os.X_OK)):
print('WARN: Cant access {}'.format(path))
return
for obj in os.scandir(path):
if (obj.is_file()):
global FILE_COUNTER
FILE_COUNTER += 1
# TODO: optimize filename build and stack push
fileName = obj.name if obj.name.startswith('.') else '*.' +obj.name.split('.')[-1]
if fileName not in FILE_TYPES:
FILE_TYPES.append(fileName)
if (obj.is_dir()) and (not obj.path.endswith(IGNORED_DIRECTORIES)):
global DIRECOTRY_COUNTER
DIRECOTRY_COUNTER += 1
listDirectories(obj.path)
ROOT_DIR = sys.argv[1] if os.access(sys.argv[1], os.F_OK) else ROOT_DIR
print('Searching for unique file types in {}'.format(ROOT_DIR))
start = time.time()
listDirectories(ROOT_DIR)
end = time.time()
print('\r\nFetched {} directories and {} files in {}s'.format(DIRECOTRY_COUNTER, FILE_COUNTER, end - start))
print('Found {} unique file types'.format(len(FILE_TYPES)))
input('\r\nPress Enter to show...')
for t in FILE_TYPES:
print(t)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment