lucasgautheron/recordings.py Secret

## recordings.py
import pandas as pd
import glob
import os
import subprocess
import re
from collections import defaultdict
import argparse

def merge_audio(files, destination):
    if os.path.exists(os.path.join('recordings', 'merged', destination)):
        return None

    open('concat.txt', 'w+').write(
        "\n".join(["file '{}'".format(os.path.join('recordings', f)) for f in files])
    )

    proc = subprocess.Popen(
        ['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', 'concat.txt', '-c', 'copy', os.path.join('recordings', 'merged', destination)],
        stdout = subprocess.PIPE,
        stderr = subprocess.PIPE
    )

    proc.wait()
    return proc

parser = argparse.ArgumentParser(description='')
parser.add_argument("--debug", default=True, required = False)
args = parser.parse_args()

df = pd.read_excel('recordings/recordings.xlsx')
files = [os.path.basename(f) for f in glob.glob('recordings/**.*')]

merge = defaultdict(list)
for f in files:
    res = re.match(r"^([0-9]+_(CW[0-9]+|NA)_CH[0-9]+_(AJ|FB|LM)[0-9]+_(AJ|FB|LM)[0-9]+_[0-9]{6})(_((REC)?([0-9]{1,3}))\.WAV)?", f)
    if res:
        prefix = res.groups()[0]
        suffix = res.groups()[7]
        destination = os.path.basename(prefix + '.WAV')
        print(destination)

        if os.path.exists(os.path.join('recordings', destination)):
            continue

        merge[destination].append((f, suffix))
    else:
        print(f)

failures = []
merged = []
for destination in merge:
    files = sorted([f[0] for f in merge[destination]])
    suffixes = sorted([str(f[1]) for f in merge[destination]])
    merged += files

    if 'None' not in suffixes:
        suffixes = [int(s) for s in suffixes]
        consecutive = suffixes == list(range(min(suffixes), max(suffixes)+1))
        if not consecutive:
            failures.append({"filename": destination, "errors": "trying to merge records with missing file ({})".format(",".join(files))})

    if args.debug:
        continue

    proc = merge_audio(files, destination)
    if proc is None:
        continue

    (stdout, stderr) = proc.communicate()
    if proc.returncode != 0:
        failures.append({"filename": destination, "errors": stderr})


# manual_merges = [
#     {'destination': '01_SF02_NA_FB05_FB06_190701.WAV', 'files': glob.glob('recordings/01_SF02_NA_FB05_FB06_190701*.WAV')},
#     {'destination': '01_SF04_NA_FB07_FB39_190712.WAV', 'files': glob.glob('recordings/01_SF04_NA_FB07_FB39_190712*.WAV')},
#     {'destination': '01_SF05_NA_FB07_FB12_190712.WAV', 'files': glob.glob('recordings/01_SF05_NA_FB07_FB12_190712*.WAV')},
#     {'destination': '01_SF02_NA_FB05_FB08_190701.WAV', 'files': glob.glob('recordings/01_SF02_NA_FB05_FB08_190701*.WAV')},
#     {'destination': '01_CW05_NA_FB07_FB08_190712.WAV', 'files': glob.glob('recordings/01_CW05_NA_FB07_FB08_190712*.WAV')},
#     {'destination': '01_SF04_NA_FB07_FB14_190712.WAV', 'files': glob.glob('recordings/01_SF04_NA_FB07_FB14_190712*.WAV')},
#     {'destination': '01_SF05_NA_FB07_FB13_190712.WAV', 'files': glob.glob('recordings/01_SF05_NA_FB07_FB13_190712*.WAV')},
#     {'destination': '01_CW02_CH02_LM03_LM40_190619.WAV', 'files': glob.glob('recordings/REC00*.WAV')}
# ]

# for mm in manual_merges:
#     proc = merge_audio([os.path.basename(f) for f in mm['files']], mm['destination'])
#     merged += [os.path.basename(f) for f in mm['files']]
#     if proc is None:
#         continue

#     (stdout, stderr) = proc.communicate()
#     if proc.returncode != 0:
#         failures.append({"filename": mm['destination'], "errors": stderr})

# double_extension = [
#     '01_CW03_NA_FB07_FB24_190712.WAV',
#     '01_CW05_NA_FB07_FB03_190712.WAV',
#     '01_SF03_NA_FB12_FB35_190807.WAV',
#     '01_CW03_NA_FB07_FB22_190712.WAV',
#     '01_SF03_NA_FB12_FB20_190807.WAV'
# ]

# for de in double_extension:
#     os.rename(os.path.join('recordings', de + '.WAV'), os.path.join('recordings', de))

# renames = [
#     ['01_CW03_CH03_FB14_FB14_190814.WAV', '01_CW04_CH04_FB14_FB14_190814.WAV'],
#     ['01_CW02_CH02_FB06_FB01_190705.WAV', '01_CW02_CH02_FB06_FB01_190704.WAV'],
#     ['01_CW03_CH03_FB06_FB17_190705.WAV', '01_CW03_CH03_FB06_FB17_190704.WAV'],
#     ['01_CW04_CH04_FB06_FB24_190705.WAV', '01_CW04_CH04_FB06_FB24_190704.WAV'],
#     ['01_CW05_CH05_FB06_FB11_190705.WAV', '01_CW05_CH05_FB06_FB11_190704.WAV'],
#     ['01_CW06_CH06_FB06_FB02_190705.WAV', '01_CW06_CH06_FB06_FB02_190704.WAV'],
#     ['01_CW01_CH01_FB14_FB20_190813.WAV', '01_CW01_CH01_FB14_FB20_190814.WAV'],
#     ['01_CW02_CH02_FB06_FB07_190705.WAV', '01_CW02_CH02_FB06_FB07_190704.WAV'],
#     ['01_CW03_CH03_FB06_FB39_190705.WAV', '01_CW03_CH03_FB06_FB39_190704.WAV'],
#     ['01_CW04_CH04_FB06_FB35_190705.WAV', '01_CW04_CH04_FB06_FB35_190704.WAV'],
#     ['01_CW05_CH05_FB06_FB15_190705.WAV', '01_CW05_CH05_FB06_FB15_190704.WAV'],
#     ['01_CW06_CH06_FB06_FB14_190705.WAV', '01_CW06_CH06_FB06_FB14_190704.WAV'],
#     ['01_CW04_CH04_LM07_LM31_190708.WAV', '01_CW02_CH02_LM07_LM31_190708.WAV'],
#     ['02_CW02_CH02_FB13_FB27_190811.WAV', '01_CW02_CH02_FB13_FB27_190811.WAV'],
#     ['01_CW01_CH01_LM04_LM31_190622.WAV', '01_CW02_CH02_LM04_LM31_190622.WAV'],
#     ['01_CW02_CH02_FB11_FB37_1908029.WAV', '01_CW02_CH02_FB11_FB37_190802.WAV'],
#     ['01_CW04_CH04_FB03_FB1_190622.WAV', '01_CW04_CH04_FB03_FB01_190622.WAV'],
#     ['01_CW06_CH06_FB03_FB8_190622.WAV', '01_CW06_CH06_FB03_FB08_190622.WAV'],
#     ['01_CW01_CH01_FB04_FB24_190625.WAV', '01_CW01_CH01_FB04_FB25_190625.WAV']
#     ['01_CW03_CH03_FB14_FB10_190814.WAV', '01_CW04_CH04_FB14_FB10_190814.WAV'],
# ]

# for r in renames:
#     os.rename(os.path.join('recordings', r[0]), os.path.join('recordings', r[1]))

pd.DataFrame(failures).to_csv('failures.csv', index = False)
pd.DataFrame([{'filename': f} for f in merged]).to_csv('merged.csv', index = False)
	import pandas as pd
	import glob
	import os
	import subprocess
	import re
	from collections import defaultdict
	import argparse

	def merge_audio(files, destination):
	if os.path.exists(os.path.join('recordings', 'merged', destination)):
	return None

	open('concat.txt', 'w+').write(
	"\n".join(["file '{}'".format(os.path.join('recordings', f)) for f in files])
	)

	proc = subprocess.Popen(
	['ffmpeg', '-y', '-f', 'concat', '-safe', '0', '-i', 'concat.txt', '-c', 'copy', os.path.join('recordings', 'merged', destination)],
	stdout = subprocess.PIPE,
	stderr = subprocess.PIPE
	)

	proc.wait()
	return proc

	parser = argparse.ArgumentParser(description='')
	parser.add_argument("--debug", default=True, required = False)
	args = parser.parse_args()

	df = pd.read_excel('recordings/recordings.xlsx')
	files = [os.path.basename(f) for f in glob.glob('recordings/*.')]

	merge = defaultdict(list)
	for f in files:
	res = re.match(r"^([0-9]+_(CW[0-9]+\|NA)_CH[0-9]+_(AJ\|FB\|LM)[0-9]+_(AJ\|FB\|LM)[0-9]+_[0-9]{6})(_((REC)?([0-9]{1,3}))\.WAV)?", f)
	if res:
	prefix = res.groups()[0]
	suffix = res.groups()[7]
	destination = os.path.basename(prefix + '.WAV')
	print(destination)

	if os.path.exists(os.path.join('recordings', destination)):
	continue

	merge[destination].append((f, suffix))
	else:
	print(f)

	failures = []
	merged = []
	for destination in merge:
	files = sorted([f[0] for f in merge[destination]])
	suffixes = sorted([str(f[1]) for f in merge[destination]])
	merged += files

	if 'None' not in suffixes:
	suffixes = [int(s) for s in suffixes]
	consecutive = suffixes == list(range(min(suffixes), max(suffixes)+1))
	if not consecutive:
	failures.append({"filename": destination, "errors": "trying to merge records with missing file ({})".format(",".join(files))})

	if args.debug:
	continue

	proc = merge_audio(files, destination)
	if proc is None:
	continue

	(stdout, stderr) = proc.communicate()
	if proc.returncode != 0:
	failures.append({"filename": destination, "errors": stderr})


	# manual_merges = [
	# {'destination': '01_SF02_NA_FB05_FB06_190701.WAV', 'files': glob.glob('recordings/01_SF02_NA_FB05_FB06_190701*.WAV')},
	# {'destination': '01_SF04_NA_FB07_FB39_190712.WAV', 'files': glob.glob('recordings/01_SF04_NA_FB07_FB39_190712*.WAV')},
	# {'destination': '01_SF05_NA_FB07_FB12_190712.WAV', 'files': glob.glob('recordings/01_SF05_NA_FB07_FB12_190712*.WAV')},
	# {'destination': '01_SF02_NA_FB05_FB08_190701.WAV', 'files': glob.glob('recordings/01_SF02_NA_FB05_FB08_190701*.WAV')},
	# {'destination': '01_CW05_NA_FB07_FB08_190712.WAV', 'files': glob.glob('recordings/01_CW05_NA_FB07_FB08_190712*.WAV')},
	# {'destination': '01_SF04_NA_FB07_FB14_190712.WAV', 'files': glob.glob('recordings/01_SF04_NA_FB07_FB14_190712*.WAV')},
	# {'destination': '01_SF05_NA_FB07_FB13_190712.WAV', 'files': glob.glob('recordings/01_SF05_NA_FB07_FB13_190712*.WAV')},
	# {'destination': '01_CW02_CH02_LM03_LM40_190619.WAV', 'files': glob.glob('recordings/REC00*.WAV')}
	# ]

	# for mm in manual_merges:
	# proc = merge_audio([os.path.basename(f) for f in mm['files']], mm['destination'])
	# merged += [os.path.basename(f) for f in mm['files']]
	# if proc is None:
	# continue

	# (stdout, stderr) = proc.communicate()
	# if proc.returncode != 0:
	# failures.append({"filename": mm['destination'], "errors": stderr})

	# double_extension = [
	# '01_CW03_NA_FB07_FB24_190712.WAV',
	# '01_CW05_NA_FB07_FB03_190712.WAV',
	# '01_SF03_NA_FB12_FB35_190807.WAV',
	# '01_CW03_NA_FB07_FB22_190712.WAV',
	# '01_SF03_NA_FB12_FB20_190807.WAV'
	# ]

	# for de in double_extension:
	# os.rename(os.path.join('recordings', de + '.WAV'), os.path.join('recordings', de))

	# renames = [
	# ['01_CW03_CH03_FB14_FB14_190814.WAV', '01_CW04_CH04_FB14_FB14_190814.WAV'],
	# ['01_CW02_CH02_FB06_FB01_190705.WAV', '01_CW02_CH02_FB06_FB01_190704.WAV'],
	# ['01_CW03_CH03_FB06_FB17_190705.WAV', '01_CW03_CH03_FB06_FB17_190704.WAV'],
	# ['01_CW04_CH04_FB06_FB24_190705.WAV', '01_CW04_CH04_FB06_FB24_190704.WAV'],
	# ['01_CW05_CH05_FB06_FB11_190705.WAV', '01_CW05_CH05_FB06_FB11_190704.WAV'],
	# ['01_CW06_CH06_FB06_FB02_190705.WAV', '01_CW06_CH06_FB06_FB02_190704.WAV'],
	# ['01_CW01_CH01_FB14_FB20_190813.WAV', '01_CW01_CH01_FB14_FB20_190814.WAV'],
	# ['01_CW02_CH02_FB06_FB07_190705.WAV', '01_CW02_CH02_FB06_FB07_190704.WAV'],
	# ['01_CW03_CH03_FB06_FB39_190705.WAV', '01_CW03_CH03_FB06_FB39_190704.WAV'],
	# ['01_CW04_CH04_FB06_FB35_190705.WAV', '01_CW04_CH04_FB06_FB35_190704.WAV'],
	# ['01_CW05_CH05_FB06_FB15_190705.WAV', '01_CW05_CH05_FB06_FB15_190704.WAV'],
	# ['01_CW06_CH06_FB06_FB14_190705.WAV', '01_CW06_CH06_FB06_FB14_190704.WAV'],
	# ['01_CW04_CH04_LM07_LM31_190708.WAV', '01_CW02_CH02_LM07_LM31_190708.WAV'],
	# ['02_CW02_CH02_FB13_FB27_190811.WAV', '01_CW02_CH02_FB13_FB27_190811.WAV'],
	# ['01_CW01_CH01_LM04_LM31_190622.WAV', '01_CW02_CH02_LM04_LM31_190622.WAV'],
	# ['01_CW02_CH02_FB11_FB37_1908029.WAV', '01_CW02_CH02_FB11_FB37_190802.WAV'],
	# ['01_CW04_CH04_FB03_FB1_190622.WAV', '01_CW04_CH04_FB03_FB01_190622.WAV'],
	# ['01_CW06_CH06_FB03_FB8_190622.WAV', '01_CW06_CH06_FB03_FB08_190622.WAV'],
	# ['01_CW01_CH01_FB04_FB24_190625.WAV', '01_CW01_CH01_FB04_FB25_190625.WAV']
	# ['01_CW03_CH03_FB14_FB10_190814.WAV', '01_CW04_CH04_FB14_FB10_190814.WAV'],
	# ]

	# for r in renames:
	# os.rename(os.path.join('recordings', r[0]), os.path.join('recordings', r[1]))

	pd.DataFrame(failures).to_csv('failures.csv', index = False)
	pd.DataFrame([{'filename': f} for f in merged]).to_csv('merged.csv', index = False)