walshbr/nemla-audio.py

## nemla-audio.py
# Python 2

import os
from subprocess import call
from pydub import AudioSegment


def manifest(directory, extension=None):
        # gives all the file names in the directory.
        for (root, _, files) in os.walk(directory):
            for fn in files:
                if extension == '.txt' and os.path.splitext(fn)[1] == '.txt':
                    yield os.path.join(root, fn)
                elif extension == '.mp3' and os.path.splitext(fn)[1] == '.mp3':
                    yield os.path.join(root, fn)
                elif extension is not None or fn == '.DS_Store':
                    pass
                else:
                    yield os.path.join(root, fn)


def move_all_transcripts(directory):
    files = list(manifest(directory, '.txt'))
    for file in files:
        call(['mv', file, 'transcriptions'])


def combine_all_mp3s(fns):
    """given a list of filenames will combine into a single mp3"""
    combined = AudioSegment.from_mp3(fns[0])
    for fn in fns[1:]:
        print(fn)
        combined += AudioSegment.from_mp3(fn)
    combined.export('combined_files.mp3')


def transcribe(fns):
    for recording in fns:
        call(['audiogrep', '--input', recording, '--transcribe'])


def move_transcripts():
    pass


def main():

    # workflow is - put all recordings in folder. combine all recordings. transcribe all recordings. frankencut them based on the text you input?
    # directory = 'recordings'
    # recordings = list(manifest('recordings', '.mp3'))
    # print(recordings[0])

    # Transcription phase
    # transcribe(recordings)
    # transcribe(['combined_files.mp3'])

    # Move transcripts to other folder.
    # move_all_transcripts(directory)

    #franken-cut
    paragraph = """
    Approaching sound in this way is a way for our students to reconstitute their own ideas through the very sound artifacts that they are studying. In so doing, they learn to consider them as sound, as material objects that can be turned over, re-examined, disrupted, and reassembled. But look at how much is gone. How much gets lost. The recording is notable for its absences, its gaps.
    """
    call(['audiogrep', '--input', 'combined_files.mp3', '--search', paragraph, '--output-mode', 'franken'])

    # regex approach
    # call(['audiogrep', '--input', 'combined_files.mp3', '--regex', '--search', r'voice.*\b|sound.*\b|tongue.*\b', '--output-mode', 'word'])

    # combine all recordings
    # combine_all_mp3s(recordings)

if __name__ == '__main__':
    main()
	# Python 2

	import os
	from subprocess import call
	from pydub import AudioSegment


	def manifest(directory, extension=None):
	# gives all the file names in the directory.
	for (root, _, files) in os.walk(directory):
	for fn in files:
	if extension == '.txt' and os.path.splitext(fn)[1] == '.txt':
	yield os.path.join(root, fn)
	elif extension == '.mp3' and os.path.splitext(fn)[1] == '.mp3':
	yield os.path.join(root, fn)
	elif extension is not None or fn == '.DS_Store':
	pass
	else:
	yield os.path.join(root, fn)


	def move_all_transcripts(directory):
	files = list(manifest(directory, '.txt'))
	for file in files:
	call(['mv', file, 'transcriptions'])


	def combine_all_mp3s(fns):
	"""given a list of filenames will combine into a single mp3"""
	combined = AudioSegment.from_mp3(fns[0])
	for fn in fns[1:]:
	print(fn)
	combined += AudioSegment.from_mp3(fn)
	combined.export('combined_files.mp3')


	def transcribe(fns):
	for recording in fns:
	call(['audiogrep', '--input', recording, '--transcribe'])


	def move_transcripts():
	pass


	def main():

	# workflow is - put all recordings in folder. combine all recordings. transcribe all recordings. frankencut them based on the text you input?
	# directory = 'recordings'
	# recordings = list(manifest('recordings', '.mp3'))
	# print(recordings[0])

	# Transcription phase
	# transcribe(recordings)
	# transcribe(['combined_files.mp3'])

	# Move transcripts to other folder.
	# move_all_transcripts(directory)

	#franken-cut
	paragraph = """
	Approaching sound in this way is a way for our students to reconstitute their own ideas through the very sound artifacts that they are studying. In so doing, they learn to consider them as sound, as material objects that can be turned over, re-examined, disrupted, and reassembled. But look at how much is gone. How much gets lost. The recording is notable for its absences, its gaps.
	"""
	call(['audiogrep', '--input', 'combined_files.mp3', '--search', paragraph, '--output-mode', 'franken'])

	# regex approach
	# call(['audiogrep', '--input', 'combined_files.mp3', '--regex', '--search', r'voice.\b\|sound.\b\|tongue.*\b', '--output-mode', 'word'])

	# combine all recordings
	# combine_all_mp3s(recordings)

	if __name__ == '__main__':
	main()