Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
implements audiogrep and pydub to cut up and reassemble audio
# Python 2
import os
from subprocess import call
from pydub import AudioSegment
def manifest(directory, extension=None):
# gives all the file names in the directory.
for (root, _, files) in os.walk(directory):
for fn in files:
if extension == '.txt' and os.path.splitext(fn)[1] == '.txt':
yield os.path.join(root, fn)
elif extension == '.mp3' and os.path.splitext(fn)[1] == '.mp3':
yield os.path.join(root, fn)
elif extension is not None or fn == '.DS_Store':
yield os.path.join(root, fn)
def move_all_transcripts(directory):
files = list(manifest(directory, '.txt'))
for file in files:
call(['mv', file, 'transcriptions'])
def combine_all_mp3s(fns):
"""given a list of filenames will combine into a single mp3"""
combined = AudioSegment.from_mp3(fns[0])
for fn in fns[1:]:
combined += AudioSegment.from_mp3(fn)
def transcribe(fns):
for recording in fns:
call(['audiogrep', '--input', recording, '--transcribe'])
def move_transcripts():
def main():
# workflow is - put all recordings in folder. combine all recordings. transcribe all recordings. frankencut them based on the text you input?
# directory = 'recordings'
# recordings = list(manifest('recordings', '.mp3'))
# print(recordings[0])
# Transcription phase
# transcribe(recordings)
# transcribe(['combined_files.mp3'])
# Move transcripts to other folder.
# move_all_transcripts(directory)
paragraph = """
Approaching sound in this way is a way for our students to reconstitute their own ideas through the very sound artifacts that they are studying. In so doing, they learn to consider them as sound, as material objects that can be turned over, re-examined, disrupted, and reassembled. But look at how much is gone. How much gets lost. The recording is notable for its absences, its gaps.
call(['audiogrep', '--input', 'combined_files.mp3', '--search', paragraph, '--output-mode', 'franken'])
# regex approach
# call(['audiogrep', '--input', 'combined_files.mp3', '--regex', '--search', r'voice.*\b|sound.*\b|tongue.*\b', '--output-mode', 'word'])
# combine all recordings
# combine_all_mp3s(recordings)
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment