ThatDevopsGuy/indexer.py

## indexer.py
#!/usr/bin/env python
# Sebastian Weigand 2011

from mutagen.easymp4 import EasyMP4
from subprocess import *
import sys, time

print 'Proof that Microsoft sucks ass at parsing media files:\n'

raw_input('Press enter when ready.')

sys.stdout.write('Searching for music files...'.ljust(50))
sys.stdout.flush()

start = time.time()
files = Popen('find . \\( -iname "*.m4a" \\)', shell = True, stdout = PIPE).communicate()[0].splitlines()
end1 = time.time()

diff1 = round(end1 - start, 4)

sys.stdout.write('Got ' + str(len(files)) + ' files in ' + str(diff1) + ' seconds.\n')
sys.stdout.flush()

library = {}
unknowns = []

sys.stdout.write('Processing metadata...'.ljust(50))
sys.stdout.flush()

print ''

total = str(len(files) - 1)
spacing = len(total)

start2 = time.time()

# Just do m4a files to get a rough benchmark:
for i, file in enumerate(files):
	print '(' + str(i).rjust(spacing) + '/' + total + ')', file

	audio = EasyMP4(file)

	# Ensure metadata is present:
	if 'artist' in audio and 'album' in audio and 'title' in audio:
		# Ensure metadata values are populated:
		if len(audio['artist'][0]) > 0 and len(audio['album'][0]) > 0 and len(audio['title'][0]) > 0:
			if audio['artist'][0] not in library:
				# Add the artist, create new dicts for album and track:
				library[audio['artist'][0]] = {audio['album'][0]: {audio['title'][0]: file}}

			# Have we seen this album before?
			elif audio['album'][0] not in library[audio['artist'][0]]:
				# Add the album to the artist dict, create new dict for track:
				library[audio['artist'][0]][audio['album'][0]] = {audio['title'][0]: file}

			# We have seen the artist, and album, but not track:
			else:
				# Add track k:v pair to artist:album dict:
				library[audio['artist'][0]][audio['album'][0]][audio['title'][0]] = file
		else:
			# Not enough metadata:
			unknowns.append(file)
	else:
		unknowns.append(file)
		continue

end2 = time.time()
diff2 = round(end2 - start2, 4)
diff = round(end2 - start, 4)

sys.stdout.write('Done in ' + str(diff2) + ' seconds.\n')
sys.stdout.flush()

print '\nTotal time taken to find', len(files), 'files and extract metadata from them:', diff, 'seconds.\n'

while True:

	input = unicode(raw_input('Enter an artist: '))

	if input in library:
		print 'There are', len(library[input]), 'albums available for that artist:'
		for album in library[input]:
			print '\t', album
			for track in library[input][album]:
				print '\t\t', track
	else:
		print 'Not found:', input
	#!/usr/bin/env python
	# Sebastian Weigand 2011

	from mutagen.easymp4 import EasyMP4
	from subprocess import *
	import sys, time

	print 'Proof that Microsoft sucks ass at parsing media files:\n'

	raw_input('Press enter when ready.')

	sys.stdout.write('Searching for music files...'.ljust(50))
	sys.stdout.flush()

	start = time.time()
	files = Popen('find . \\( -iname "*.m4a" \\)', shell = True, stdout = PIPE).communicate()[0].splitlines()
	end1 = time.time()

	diff1 = round(end1 - start, 4)

	sys.stdout.write('Got ' + str(len(files)) + ' files in ' + str(diff1) + ' seconds.\n')
	sys.stdout.flush()

	library = {}
	unknowns = []

	sys.stdout.write('Processing metadata...'.ljust(50))
	sys.stdout.flush()

	print ''

	total = str(len(files) - 1)
	spacing = len(total)

	start2 = time.time()

	# Just do m4a files to get a rough benchmark:
	for i, file in enumerate(files):
	print '(' + str(i).rjust(spacing) + '/' + total + ')', file

	audio = EasyMP4(file)

	# Ensure metadata is present:
	if 'artist' in audio and 'album' in audio and 'title' in audio:
	# Ensure metadata values are populated:
	if len(audio['artist'][0]) > 0 and len(audio['album'][0]) > 0 and len(audio['title'][0]) > 0:
	if audio['artist'][0] not in library:
	# Add the artist, create new dicts for album and track:
	library[audio['artist'][0]] = {audio['album'][0]: {audio['title'][0]: file}}

	# Have we seen this album before?
	elif audio['album'][0] not in library[audio['artist'][0]]:
	# Add the album to the artist dict, create new dict for track:
	library[audio['artist'][0]][audio['album'][0]] = {audio['title'][0]: file}

	# We have seen the artist, and album, but not track:
	else:
	# Add track k:v pair to artist:album dict:
	library[audio['artist'][0]][audio['album'][0]][audio['title'][0]] = file
	else:
	# Not enough metadata:
	unknowns.append(file)
	else:
	unknowns.append(file)
	continue

	end2 = time.time()
	diff2 = round(end2 - start2, 4)
	diff = round(end2 - start, 4)

	sys.stdout.write('Done in ' + str(diff2) + ' seconds.\n')
	sys.stdout.flush()

	print '\nTotal time taken to find', len(files), 'files and extract metadata from them:', diff, 'seconds.\n'

	while True:

	input = unicode(raw_input('Enter an artist: '))

	if input in library:
	print 'There are', len(library[input]), 'albums available for that artist:'
	for album in library[input]:
	print '\t', album
	for track in library[input][album]:
	print '\t\t', track
	else:
	print 'Not found:', input