atdt/README.txt

## README.txt
Start by replacing all spaces with underscores
rename 's/\s+/_/g' **/*.mp4
(bash not fish!)

## audio.sh
#!/bin/bash
# convert files to mp3, run this second
set -eux


find . -maxdepth 1 -type d -name 'P*' ! -name '*subtitles*' -exec basename {} \; | sort -n | while read -r DIRECTORY ; do

OUTPUT="${DIRECTORY}.mp3"
FILE_LIST="${DIRECTORY}/file_list"
FFMETADATAFILE="${DIRECTORY}/FFMETADATAFILE"

rm -f "$FILE_LIST"

# Save the current IFS (Internal Field Separator) and set it to handle newline only
OLDIFS=$IFS
IFS=$'\n'

# Processing each file in the directory
for file in $(find "$DIRECTORY" -maxdepth 1 -name '*.mp4' | sort -V); do
    # Extracting the base name
    base_name=$(basename "$file" .mp4)

    # Extracting audio and converting to mp3
    ffmpeg -nostdin -i "$file" -q:a 0 -map a "$DIRECTORY/${base_name}.mp3"

    # Adding file to the file list
    echo "file '${base_name}.mp3'" >> $FILE_LIST

done

# Combine all MP3 files into one with chapters
ffmpeg -nostdin -f concat -safe 0 -i "$FILE_LIST" -i "$FFMETADATAFILE" -map_metadata 1 -codec copy $OUTPUT
IFS=$OLDIFS
done

## chapters.py
# Create FFMETADATA files. Run this first.
import os
import re
import glob
import subprocess

header_template = ''';FFMETADATA1
title={title}
artist=OMSCS 6200
'''

chapter_template = """
[CHAPTER]
TIMEBASE=1/1000
START={start}
END={end}
title={title}"""


import contextlib
import os


base_dir = os.getcwd()
dirs = [os.path.join(base_dir, d) for d in next(os.walk('.'))[1] if d.startswith('P') and 'subtitles' not in d]

for dir in dirs:
  print(dir)
  os.chdir(dir)
  with open(os.path.join(dir, 'FFMETADATAFILE'), 'wt') as metadata_file:
    lecture = re.sub(r'[ _]+', ' ', dir)
    print(header_template.format(title=lecture), file=metadata_file)
    files = glob.glob('*.mp4')
    files.sort(key=lambda n: int(n.split('_')[0]))

    start = 0
    for f in files:
      title = re.match(r'\d+_-_(.*)\.mp4', f).group(1).replace('_', ' ')
      p = subprocess.run(['ffprobe', '-i', f, '-show_entries', 'format=duration', '-v', 'quiet', '-of', 'csv=p=0'], check=True, capture_output=True)
      duration = int(float(p.stdout.strip()) * 1000)
      end = start + duration
      print(chapter_template.format(start=start, end=end, title=title), file=metadata_file)
      start = end + 1
	Start by replacing all spaces with underscores
	rename 's/\s+/_/g' */.mp4
	(bash not fish!)
	#!/bin/bash
	# convert files to mp3, run this second
	set -eux


	find . -maxdepth 1 -type d -name 'P' ! -name 'subtitles*' -exec basename {} \; \| sort -n \| while read -r DIRECTORY ; do

	OUTPUT="${DIRECTORY}.mp3"
	FILE_LIST="${DIRECTORY}/file_list"
	FFMETADATAFILE="${DIRECTORY}/FFMETADATAFILE"

	rm -f "$FILE_LIST"

	# Save the current IFS (Internal Field Separator) and set it to handle newline only
	OLDIFS=$IFS
	IFS=$'\n'

	# Processing each file in the directory
	for file in $(find "$DIRECTORY" -maxdepth 1 -name '*.mp4' \| sort -V); do
	# Extracting the base name
	base_name=$(basename "$file" .mp4)

	# Extracting audio and converting to mp3
	ffmpeg -nostdin -i "$file" -q:a 0 -map a "$DIRECTORY/${base_name}.mp3"

	# Adding file to the file list
	echo "file '${base_name}.mp3'" >> $FILE_LIST

	done

	# Combine all MP3 files into one with chapters
	ffmpeg -nostdin -f concat -safe 0 -i "$FILE_LIST" -i "$FFMETADATAFILE" -map_metadata 1 -codec copy $OUTPUT
	IFS=$OLDIFS
	done
	# Create FFMETADATA files. Run this first.
	import os
	import re
	import glob
	import subprocess

	header_template = ''';FFMETADATA1
	title={title}
	artist=OMSCS 6200
	'''

	chapter_template = """
	[CHAPTER]
	TIMEBASE=1/1000
	START={start}
	END={end}
	title={title}"""


	import contextlib
	import os


	base_dir = os.getcwd()
	dirs = [os.path.join(base_dir, d) for d in next(os.walk('.'))[1] if d.startswith('P') and 'subtitles' not in d]

	for dir in dirs:
	print(dir)
	os.chdir(dir)
	with open(os.path.join(dir, 'FFMETADATAFILE'), 'wt') as metadata_file:
	lecture = re.sub(r'[ _]+', ' ', dir)
	print(header_template.format(title=lecture), file=metadata_file)
	files = glob.glob('*.mp4')
	files.sort(key=lambda n: int(n.split('_')[0]))

	start = 0
	for f in files:
	title = re.match(r'\d+_-_(.*)\.mp4', f).group(1).replace('_', ' ')
	p = subprocess.run(['ffprobe', '-i', f, '-show_entries', 'format=duration', '-v', 'quiet', '-of', 'csv=p=0'], check=True, capture_output=True)
	duration = int(float(p.stdout.strip()) * 1000)
	end = start + duration
	print(chapter_template.format(start=start, end=end, title=title), file=metadata_file)
	start = end + 1