Created
March 14, 2017 13:39
-
-
Save walshbr/cbcdabc92995334ae52414d048ae5d92 to your computer and use it in GitHub Desktop.
implements audiogrep and pydub to cut up and reassemble audio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python 2 | |
import os | |
from subprocess import call | |
from pydub import AudioSegment | |
def manifest(directory, extension=None): | |
# gives all the file names in the directory. | |
for (root, _, files) in os.walk(directory): | |
for fn in files: | |
if extension == '.txt' and os.path.splitext(fn)[1] == '.txt': | |
yield os.path.join(root, fn) | |
elif extension == '.mp3' and os.path.splitext(fn)[1] == '.mp3': | |
yield os.path.join(root, fn) | |
elif extension is not None or fn == '.DS_Store': | |
pass | |
else: | |
yield os.path.join(root, fn) | |
def move_all_transcripts(directory): | |
files = list(manifest(directory, '.txt')) | |
for file in files: | |
call(['mv', file, 'transcriptions']) | |
def combine_all_mp3s(fns): | |
"""given a list of filenames will combine into a single mp3""" | |
combined = AudioSegment.from_mp3(fns[0]) | |
for fn in fns[1:]: | |
print(fn) | |
combined += AudioSegment.from_mp3(fn) | |
combined.export('combined_files.mp3') | |
def transcribe(fns): | |
for recording in fns: | |
call(['audiogrep', '--input', recording, '--transcribe']) | |
def move_transcripts(): | |
pass | |
def main(): | |
# workflow is - put all recordings in folder. combine all recordings. transcribe all recordings. frankencut them based on the text you input? | |
# directory = 'recordings' | |
# recordings = list(manifest('recordings', '.mp3')) | |
# print(recordings[0]) | |
# Transcription phase | |
# transcribe(recordings) | |
# transcribe(['combined_files.mp3']) | |
# Move transcripts to other folder. | |
# move_all_transcripts(directory) | |
#franken-cut | |
paragraph = """ | |
Approaching sound in this way is a way for our students to reconstitute their own ideas through the very sound artifacts that they are studying. In so doing, they learn to consider them as sound, as material objects that can be turned over, re-examined, disrupted, and reassembled. But look at how much is gone. How much gets lost. The recording is notable for its absences, its gaps. | |
""" | |
call(['audiogrep', '--input', 'combined_files.mp3', '--search', paragraph, '--output-mode', 'franken']) | |
# regex approach | |
# call(['audiogrep', '--input', 'combined_files.mp3', '--regex', '--search', r'voice.*\b|sound.*\b|tongue.*\b', '--output-mode', 'word']) | |
# combine all recordings | |
# combine_all_mp3s(recordings) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment