Skip to content

Instantly share code, notes, and snippets.

@walshbr
Created March 14, 2017 13:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save walshbr/cbcdabc92995334ae52414d048ae5d92 to your computer and use it in GitHub Desktop.
Save walshbr/cbcdabc92995334ae52414d048ae5d92 to your computer and use it in GitHub Desktop.
implements audiogrep and pydub to cut up and reassemble audio
# Python 2
import os
from subprocess import call
from pydub import AudioSegment
def manifest(directory, extension=None):
# gives all the file names in the directory.
for (root, _, files) in os.walk(directory):
for fn in files:
if extension == '.txt' and os.path.splitext(fn)[1] == '.txt':
yield os.path.join(root, fn)
elif extension == '.mp3' and os.path.splitext(fn)[1] == '.mp3':
yield os.path.join(root, fn)
elif extension is not None or fn == '.DS_Store':
pass
else:
yield os.path.join(root, fn)
def move_all_transcripts(directory):
files = list(manifest(directory, '.txt'))
for file in files:
call(['mv', file, 'transcriptions'])
def combine_all_mp3s(fns):
"""given a list of filenames will combine into a single mp3"""
combined = AudioSegment.from_mp3(fns[0])
for fn in fns[1:]:
print(fn)
combined += AudioSegment.from_mp3(fn)
combined.export('combined_files.mp3')
def transcribe(fns):
for recording in fns:
call(['audiogrep', '--input', recording, '--transcribe'])
def move_transcripts():
pass
def main():
# workflow is - put all recordings in folder. combine all recordings. transcribe all recordings. frankencut them based on the text you input?
# directory = 'recordings'
# recordings = list(manifest('recordings', '.mp3'))
# print(recordings[0])
# Transcription phase
# transcribe(recordings)
# transcribe(['combined_files.mp3'])
# Move transcripts to other folder.
# move_all_transcripts(directory)
#franken-cut
paragraph = """
Approaching sound in this way is a way for our students to reconstitute their own ideas through the very sound artifacts that they are studying. In so doing, they learn to consider them as sound, as material objects that can be turned over, re-examined, disrupted, and reassembled. But look at how much is gone. How much gets lost. The recording is notable for its absences, its gaps.
"""
call(['audiogrep', '--input', 'combined_files.mp3', '--search', paragraph, '--output-mode', 'franken'])
# regex approach
# call(['audiogrep', '--input', 'combined_files.mp3', '--regex', '--search', r'voice.*\b|sound.*\b|tongue.*\b', '--output-mode', 'word'])
# combine all recordings
# combine_all_mp3s(recordings)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment