|
# %% |
|
import os |
|
import re |
|
from pathlib import Path |
|
from subprocess import run |
|
|
|
subtitles = {'English(CC)':'en', 'Japanese(CC)': 'jp'} # Which subtitles to export and respective folder name. |
|
# {SUBTITLENAME: FOLDERNAME} |
|
|
|
DEBUG = False |
|
|
|
# %% |
|
|
|
|
|
vfmt = ['mp4', 'mkv'] # Which files to utilize, you might want to expand the list. |
|
|
|
for l in subtitles.values(): # Creates the folder to export the subtitles to. |
|
try: |
|
os.mkdir(l) |
|
except FileExistsError: |
|
print(f"Folder \"{l}\" already exists") |
|
|
|
|
|
# %% |
|
def filelist(): |
|
"""Returns list of files in current folder.""" |
|
files = os.walk(Path('.')) |
|
_, _, files = next(files) |
|
return files |
|
|
|
# %% |
|
def fclean(filename :str): |
|
"""Cleans filenames in order to not escape them, when calling ffmpeg through shell/commandline.""" |
|
letters = r"[\w\d\-_.]" |
|
regex = re.compile(letters) |
|
|
|
out = '' |
|
for c in filename: |
|
out += ('' if re.match(regex, c) is None else c) |
|
return out |
|
|
|
# %% |
|
regex = re.compile(r"Stream #0:(\d+)\((\w+)\): (\w+):[^\n]*\n *Metadata:\n *title *: *([^\n]*)") # Creates a tuple like this: ('3', 'eng', 'Subtitle', 'English') |
|
# (#Stream reported by ffprobe (probably useless), language code, subtitle or audio stream, title of subtitle/audio/video stream (can be anything)). |
|
|
|
files = filelist() |
|
for f in files: |
|
try: |
|
_ = vfmt.index(f[-3:].lower()) # Ignore non-video files |
|
os.rename(f"./{f}", f"./{(f:=fclean(f))}") # Cleans filename |
|
|
|
except ValueError as e: |
|
continue |
|
except Exception as e: |
|
print(type(e)) |
|
continue |
|
|
|
# Because accessing the subtitle streams is not as trivial, because the numbers by ffprobe are skewed/offset, we need to count the subtitles in order to get the real index. |
|
# ffprobe counts all streams together (0: video, 1: audio, 2: audio, 3-10: subtitles). In order to get stream 3 (subtitle), we actually need to index it by s:0 (first subtitle stream). |
|
# Because there does not seem to be a convention on keeping audio and subtitles separate (not intermixed), I am "counting" them like this. |
|
subs = [] |
|
probe = run(f"ffprobe {f}", shell=True, capture_output=True, text=True).stderr # ffprobe returned everything via stderr, didn't bother modifying the shell command. |
|
groups = regex.findall(probe) |
|
for t in groups: |
|
if DEBUG: |
|
print(t) |
|
if t[2] == "Subtitle": |
|
subs.append(t[3]) |
|
|
|
# Now accessing the subtitle streams works via corresponding index in list. |
|
for sub in subtitles: |
|
try: |
|
stream = subs.index(sub) |
|
ffmpegcmd = run(f"ffmpeg -i {f} -map 0:s:{stream} -y {subtitles[sub]}/{f[0:-4]}.srt", shell=True, text=True, capture_output=True) |
|
|
|
except: |
|
print(f"For language {sub} no subtitle found in {f}") |
|
|
|
if DEBUG: |
|
print(subs) |