Skip to content

Instantly share code, notes, and snippets.

@BrechtDeMan
Created October 12, 2015 15:25
Show Gist options
  • Save BrechtDeMan/5c95f27d880d4f22a2d8 to your computer and use it in GitHub Desktop.
Save BrechtDeMan/5c95f27d880d4f22a2d8 to your computer and use it in GitHub Desktop.
Recursively get information of all files in specified directory and subdirectories (esp. WAV file properties) and write to text file.
import os # walking through file system
import sys # command lines arguments
import wave # for reading .wav files
# Turn number of seconds (int) to '[minutes] min [seconds] s' (string)
def seconds2timestr(time_in_seconds):
if time_in_seconds is not None:
time_in_minutes = int(time_in_seconds/60)
remaining_seconds = int(time_in_seconds%60)
return str(time_in_minutes) + "min " + str(remaining_seconds) + "s"
else:
return 'N/A'
def list_files(startpath):
output_str = ''
total_size_in_MB = [0]
number_of_files = [0]
number_of_wav_files = [0]
previous_level = 0
for root, dirs, files in os.walk(startpath):
dirs.sort(key=lambda s: s.lower()) # sort dirs in place to walk through folders alphabetically
level = root.replace(startpath, '').count(os.sep)
# if we went up a level again, print accumulated size
if level < previous_level:
for l in range(previous_level,level,-1):
indent = ' ' * 4 * (l)
output_str += indent+45*"-"+'\n' # horizontal line
previous_root = os.path.dirname(previous_root) # go one level up
output_str += indent+"TOTAL SIZE"+20*" "+str(total_size_in_MB[l])+" MB\t"+\
str(number_of_files[l])+" files\t"+str(number_of_wav_files[l])+" WAV files\n"+\
"("+previous_root+")\n\n"
total_size_in_MB[l] = 0 # reset to zero
number_of_files[l] = 0 # reset to zero
number_of_wav_files[l] = 0 # reset to zero
previous_level = level
indent = ' ' * 4 * (level)
output_str += ('{}{}/'.format(indent, os.path.basename(root)))+'\n'
subindent = ' ' * 4 * (level + 1)
# sort files alphabetically, case insensitive
files = sorted(files, key=lambda s: s.lower())
# keep track of accumulated size at the current level
if level>=len(total_size_in_MB)-1: # new level, set to zero
total_size_in_MB.append(0)
number_of_files.append(0)
number_of_wav_files.append(0)
else: # reset for this level
total_size_in_MB[level+1] = 0
number_of_files[level+1] = 0
number_of_wav_files[level+1] = 0
for f in files:
# add size to all levels up to this one
statinfo = os.stat(root+'/'+f)
file_size = statinfo.st_size
for l in range(-1,level+1):
total_size_in_MB[l+1] += file_size/1000000.0
number_of_files[l+1] += 1
# print file info
if f.endswith('.wav') and not f.startswith('.'):
for l in range(-1,level+1):
number_of_wav_files[l+1] += 1
try:
wav_file = wave.open(root+'/'+f, 'r')
fs = wav_file.getframerate()
bit_depth = 8*wav_file.getsampwidth()
n_chan = wav_file.getnchannels()
file_length_in_seconds = wav_file.getnframes()/fs
output_str += '{}{}'.format(subindent, f)+max((30-len(f)),1)*' '+str(file_size/1000000.0)+' MB\t'+\
str(fs)+' Hz\t'+str(bit_depth)+' bit\t'+\
str(n_chan)+(' channel\t' if n_chan==1 else ' channels\t')+\
seconds2timestr(file_length_in_seconds)+'\n'
except:
output_str += "Unexpected error with "+root+'/'+f+'\n'
elif not f.startswith('.'): # ignore hidden files
output_str += ('{}{}'.format(subindent, f))+max((30-len(f)),0)*' '+str(file_size/1000000.0)+' MB'+'\n'
if files: # if there are any files
output_str += subindent+45*"-"+'\n' # horizontal line
output_str += subindent+"TOTAL SIZE"+20*" "+str(total_size_in_MB[level+1])+" MB\t"+\
str(number_of_files[level+1])+" files\t"+str(number_of_wav_files[level+1])+" WAV files\n\n"
previous_root = root
# go down all levels and print accumulated size for each
for l in range(level,-1,-1):
indent = ' ' * 4 * (l)
output_str += indent+45*"-"+'\n' # horizontal line
previous_root = os.path.dirname(previous_root) # go one level up
output_str += indent+"TOTAL SIZE"+20*" "+str(total_size_in_MB[l])+" MB\t"+\
str(number_of_files[l])+" files\t"+str(number_of_wav_files[l])+" WAV files\n"+\
"("+previous_root+")\n\n"
return output_str
def main():
# parse command line options
if len(sys.argv)==1: # default path
PATH = "."
elif len(sys.argv)==2: # specified path
PATH = sys.argv[1]
else:
print "audio_files_info takes at most 1 command line argument\n"+\
"Use: python audio_files_info.py [folder_path]."
output_str = list_files(PATH)
with open('testfile.txt', 'w') as f:
f.write(output_str)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment