Skip to content

Instantly share code, notes, and snippets.

@lucasgautheron
Last active October 6, 2020 11:49
Show Gist options
  • Save lucasgautheron/0f7704290305480c089fd650d1593f57 to your computer and use it in GitHub Desktop.
Save lucasgautheron/0f7704290305480c089fd650d1593f57 to your computer and use it in GitHub Desktop.
import pandas as pd
import glob
import os
import subprocess
import re
from collections import defaultdict
import argparse
def merge_audio(files, destination):
if os.path.exists(os.path.join('recordings', 'merged', destination)):
return None
open('concat.txt', 'w+').write(
"\n".join(["file '{}'".format(os.path.join('recordings', f)) for f in files])
)
proc = subprocess.Popen(
['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', 'concat.txt', '-c', 'copy', os.path.join('recordings', 'merged', destination)],
stdout = subprocess.PIPE,
stderr = subprocess.PIPE
)
proc.wait()
return proc
parser = argparse.ArgumentParser(description='')
parser.add_argument("--debug", default=True, required = False)
args = parser.parse_args()
df = pd.read_excel('recordings/recordings.xlsx')
files = [os.path.basename(f) for f in glob.glob('recordings/**.*')]
merge = defaultdict(list)
for f in files:
res = re.match(r"^([0-9]+_(CW[0-9]+|NA)_CH[0-9]+_(AJ|FB|LM)[0-9]+_(AJ|FB|LM)[0-9]+_[0-9]{6})(_((REC)?([0-9]{1,3}))\.WAV)?", f)
if res:
prefix = res.groups()[0]
suffix = res.groups()[7]
destination = os.path.basename(prefix + '.WAV')
print(destination)
if os.path.exists(os.path.join('recordings', destination)):
continue
merge[destination].append((f, suffix))
else:
print(f)
failures = []
merged = []
for destination in merge:
files = sorted([f[0] for f in merge[destination]])
suffixes = sorted([str(f[1]) for f in merge[destination]])
merged += files
if 'None' not in suffixes:
suffixes = [int(s) for s in suffixes]
consecutive = suffixes == list(range(min(suffixes), max(suffixes)+1))
if not consecutive:
failures.append({"filename": destination, "errors": "trying to merge records with missing file ({})".format(",".join(files))})
if args.debug:
continue
proc = merge_audio(files, destination)
if proc is None:
continue
(stdout, stderr) = proc.communicate()
if proc.returncode != 0:
failures.append({"filename": destination, "errors": stderr})
# manual_merges = [
# {'destination': '01_SF02_NA_FB05_FB06_190701.WAV', 'files': glob.glob('recordings/01_SF02_NA_FB05_FB06_190701*.WAV')},
# {'destination': '01_SF04_NA_FB07_FB39_190712.WAV', 'files': glob.glob('recordings/01_SF04_NA_FB07_FB39_190712*.WAV')},
# {'destination': '01_SF05_NA_FB07_FB12_190712.WAV', 'files': glob.glob('recordings/01_SF05_NA_FB07_FB12_190712*.WAV')},
# {'destination': '01_SF02_NA_FB05_FB08_190701.WAV', 'files': glob.glob('recordings/01_SF02_NA_FB05_FB08_190701*.WAV')},
# {'destination': '01_CW05_NA_FB07_FB08_190712.WAV', 'files': glob.glob('recordings/01_CW05_NA_FB07_FB08_190712*.WAV')},
# {'destination': '01_SF04_NA_FB07_FB14_190712.WAV', 'files': glob.glob('recordings/01_SF04_NA_FB07_FB14_190712*.WAV')},
# {'destination': '01_SF05_NA_FB07_FB13_190712.WAV', 'files': glob.glob('recordings/01_SF05_NA_FB07_FB13_190712*.WAV')},
# {'destination': '01_CW02_CH02_LM03_LM40_190619.WAV', 'files': glob.glob('recordings/REC00*.WAV')}
# ]
# for mm in manual_merges:
# proc = merge_audio([os.path.basename(f) for f in mm['files']], mm['destination'])
# merged += [os.path.basename(f) for f in mm['files']]
# if proc is None:
# continue
# (stdout, stderr) = proc.communicate()
# if proc.returncode != 0:
# failures.append({"filename": mm['destination'], "errors": stderr})
# double_extension = [
# '01_CW03_NA_FB07_FB24_190712.WAV',
# '01_CW05_NA_FB07_FB03_190712.WAV',
# '01_SF03_NA_FB12_FB35_190807.WAV',
# '01_CW03_NA_FB07_FB22_190712.WAV',
# '01_SF03_NA_FB12_FB20_190807.WAV'
# ]
# for de in double_extension:
# os.rename(os.path.join('recordings', de + '.WAV'), os.path.join('recordings', de))
# renames = [
# ['01_CW03_CH03_FB14_FB14_190814.WAV', '01_CW04_CH04_FB14_FB14_190814.WAV'],
# ['01_CW02_CH02_FB06_FB01_190705.WAV', '01_CW02_CH02_FB06_FB01_190704.WAV'],
# ['01_CW03_CH03_FB06_FB17_190705.WAV', '01_CW03_CH03_FB06_FB17_190704.WAV'],
# ['01_CW04_CH04_FB06_FB24_190705.WAV', '01_CW04_CH04_FB06_FB24_190704.WAV'],
# ['01_CW05_CH05_FB06_FB11_190705.WAV', '01_CW05_CH05_FB06_FB11_190704.WAV'],
# ['01_CW06_CH06_FB06_FB02_190705.WAV', '01_CW06_CH06_FB06_FB02_190704.WAV'],
# ['01_CW01_CH01_FB14_FB20_190813.WAV', '01_CW01_CH01_FB14_FB20_190814.WAV'],
# ['01_CW02_CH02_FB06_FB07_190705.WAV', '01_CW02_CH02_FB06_FB07_190704.WAV'],
# ['01_CW03_CH03_FB06_FB39_190705.WAV', '01_CW03_CH03_FB06_FB39_190704.WAV'],
# ['01_CW04_CH04_FB06_FB35_190705.WAV', '01_CW04_CH04_FB06_FB35_190704.WAV'],
# ['01_CW05_CH05_FB06_FB15_190705.WAV', '01_CW05_CH05_FB06_FB15_190704.WAV'],
# ['01_CW06_CH06_FB06_FB14_190705.WAV', '01_CW06_CH06_FB06_FB14_190704.WAV'],
# ['01_CW04_CH04_LM07_LM31_190708.WAV', '01_CW02_CH02_LM07_LM31_190708.WAV'],
# ['02_CW02_CH02_FB13_FB27_190811.WAV', '01_CW02_CH02_FB13_FB27_190811.WAV'],
# ['01_CW01_CH01_LM04_LM31_190622.WAV', '01_CW02_CH02_LM04_LM31_190622.WAV'],
# ['01_CW02_CH02_FB11_FB37_1908029.WAV', '01_CW02_CH02_FB11_FB37_190802.WAV'],
# ['01_CW04_CH04_FB03_FB1_190622.WAV', '01_CW04_CH04_FB03_FB01_190622.WAV'],
# ['01_CW06_CH06_FB03_FB8_190622.WAV', '01_CW06_CH06_FB03_FB08_190622.WAV'],
# ['01_CW01_CH01_FB04_FB24_190625.WAV', '01_CW01_CH01_FB04_FB25_190625.WAV']
# ['01_CW03_CH03_FB14_FB10_190814.WAV', '01_CW04_CH04_FB14_FB10_190814.WAV'],
# ]
# for r in renames:
# os.rename(os.path.join('recordings', r[0]), os.path.join('recordings', r[1]))
pd.DataFrame(failures).to_csv('failures.csv', index = False)
pd.DataFrame([{'filename': f} for f in merged]).to_csv('merged.csv', index = False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment