Created
October 12, 2015 15:25
-
-
Save BrechtDeMan/5c95f27d880d4f22a2d8 to your computer and use it in GitHub Desktop.
Recursively get information of all files in specified directory and subdirectories (esp. WAV file properties) and write to text file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os # walking through file system | |
import sys # command lines arguments | |
import wave # for reading .wav files | |
# Turn number of seconds (int) to '[minutes] min [seconds] s' (string) | |
def seconds2timestr(time_in_seconds): | |
if time_in_seconds is not None: | |
time_in_minutes = int(time_in_seconds/60) | |
remaining_seconds = int(time_in_seconds%60) | |
return str(time_in_minutes) + "min " + str(remaining_seconds) + "s" | |
else: | |
return 'N/A' | |
def list_files(startpath): | |
output_str = '' | |
total_size_in_MB = [0] | |
number_of_files = [0] | |
number_of_wav_files = [0] | |
previous_level = 0 | |
for root, dirs, files in os.walk(startpath): | |
dirs.sort(key=lambda s: s.lower()) # sort dirs in place to walk through folders alphabetically | |
level = root.replace(startpath, '').count(os.sep) | |
# if we went up a level again, print accumulated size | |
if level < previous_level: | |
for l in range(previous_level,level,-1): | |
indent = ' ' * 4 * (l) | |
output_str += indent+45*"-"+'\n' # horizontal line | |
previous_root = os.path.dirname(previous_root) # go one level up | |
output_str += indent+"TOTAL SIZE"+20*" "+str(total_size_in_MB[l])+" MB\t"+\ | |
str(number_of_files[l])+" files\t"+str(number_of_wav_files[l])+" WAV files\n"+\ | |
"("+previous_root+")\n\n" | |
total_size_in_MB[l] = 0 # reset to zero | |
number_of_files[l] = 0 # reset to zero | |
number_of_wav_files[l] = 0 # reset to zero | |
previous_level = level | |
indent = ' ' * 4 * (level) | |
output_str += ('{}{}/'.format(indent, os.path.basename(root)))+'\n' | |
subindent = ' ' * 4 * (level + 1) | |
# sort files alphabetically, case insensitive | |
files = sorted(files, key=lambda s: s.lower()) | |
# keep track of accumulated size at the current level | |
if level>=len(total_size_in_MB)-1: # new level, set to zero | |
total_size_in_MB.append(0) | |
number_of_files.append(0) | |
number_of_wav_files.append(0) | |
else: # reset for this level | |
total_size_in_MB[level+1] = 0 | |
number_of_files[level+1] = 0 | |
number_of_wav_files[level+1] = 0 | |
for f in files: | |
# add size to all levels up to this one | |
statinfo = os.stat(root+'/'+f) | |
file_size = statinfo.st_size | |
for l in range(-1,level+1): | |
total_size_in_MB[l+1] += file_size/1000000.0 | |
number_of_files[l+1] += 1 | |
# print file info | |
if f.endswith('.wav') and not f.startswith('.'): | |
for l in range(-1,level+1): | |
number_of_wav_files[l+1] += 1 | |
try: | |
wav_file = wave.open(root+'/'+f, 'r') | |
fs = wav_file.getframerate() | |
bit_depth = 8*wav_file.getsampwidth() | |
n_chan = wav_file.getnchannels() | |
file_length_in_seconds = wav_file.getnframes()/fs | |
output_str += '{}{}'.format(subindent, f)+max((30-len(f)),1)*' '+str(file_size/1000000.0)+' MB\t'+\ | |
str(fs)+' Hz\t'+str(bit_depth)+' bit\t'+\ | |
str(n_chan)+(' channel\t' if n_chan==1 else ' channels\t')+\ | |
seconds2timestr(file_length_in_seconds)+'\n' | |
except: | |
output_str += "Unexpected error with "+root+'/'+f+'\n' | |
elif not f.startswith('.'): # ignore hidden files | |
output_str += ('{}{}'.format(subindent, f))+max((30-len(f)),0)*' '+str(file_size/1000000.0)+' MB'+'\n' | |
if files: # if there are any files | |
output_str += subindent+45*"-"+'\n' # horizontal line | |
output_str += subindent+"TOTAL SIZE"+20*" "+str(total_size_in_MB[level+1])+" MB\t"+\ | |
str(number_of_files[level+1])+" files\t"+str(number_of_wav_files[level+1])+" WAV files\n\n" | |
previous_root = root | |
# go down all levels and print accumulated size for each | |
for l in range(level,-1,-1): | |
indent = ' ' * 4 * (l) | |
output_str += indent+45*"-"+'\n' # horizontal line | |
previous_root = os.path.dirname(previous_root) # go one level up | |
output_str += indent+"TOTAL SIZE"+20*" "+str(total_size_in_MB[l])+" MB\t"+\ | |
str(number_of_files[l])+" files\t"+str(number_of_wav_files[l])+" WAV files\n"+\ | |
"("+previous_root+")\n\n" | |
return output_str | |
def main(): | |
# parse command line options | |
if len(sys.argv)==1: # default path | |
PATH = "." | |
elif len(sys.argv)==2: # specified path | |
PATH = sys.argv[1] | |
else: | |
print "audio_files_info takes at most 1 command line argument\n"+\ | |
"Use: python audio_files_info.py [folder_path]." | |
output_str = list_files(PATH) | |
with open('testfile.txt', 'w') as f: | |
f.write(output_str) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment