dtlnor/cutAss.py

## cutAss.py
import sys
import io
import os
import re
import unicodedata
from decimal import *
from multiprocessing.pool import ThreadPool as Pool

# from multiprocessing import Pool
@DeprecationWarning
def slugify(value, allow_unicode=True):
    """
    Taken from https://github.com/django/django/blob/master/django/utils/text.py
    Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
    dashes to single dashes. Remove characters that aren't alphanumerics,
    underscores, or hyphens. Convert to lowercase. Also strip leading and
    trailing whitespace, dashes, and underscores.
    """
    value = str(value)
    if allow_unicode:
        value = unicodedata.normalize('NFKC', value)
    else:
        value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value)
    return re.sub(r'[-\s]+', '-', value).strip('-_')

# too much for me as I'm going to work on windows only.
@DeprecationWarning
def get_valid_filename(name):
    """
    Taken from https://github.com/django/django/blob/master/django/utils/text.py
    Return the given string converted to a string that can be used for a clean
    filename. Remove leading and trailing spaces; convert other spaces to
    underscores; and remove anything that is not an alphanumeric, dash,
    underscore, or dot.
    >>> get_valid_filename("john's portrait in 2004.jpg")
    'johns_portrait_in_2004.jpg'
    """
    s = str(name).strip().replace(" ", "_")
    s = re.sub(r"(?u)[^-\w.]", "", s)
    if s in {"", ".", ".."}:
        print("Could not derive file name from '%s'" % name)
    return s


def translate_valid_filename(name):
    half_to_full = {
        '<' : '＜',
        '>' : '＞',
        ':' : '：',
        '/' : '／',
        '|' : '｜',
        '?' : '？',
        '\\' : '＼',
        '\"' : '＂',
        '\t' : ' '
    }

    s = unicodedata.normalize('NFKC', name)
    s = str(s).strip()

    for k, v in half_to_full.items():
        s = s.replace(k, v)

    # turn to full-width
    # return s.translate(half_to_full)
    return s

def timecodetoSeconcd(fullstring: str = "") -> Decimal:
    hr, mm, ss = fullstring.split(":")
    timeInSecond = int(hr)*60*60 + int(mm)*60 + Decimal(ss)
    return timeInSecond

def GetDuration(start: str = "", end: str = "") -> str:
    duartion = timecodetoSeconcd(end) - timecodetoSeconcd(start)
    return str(duartion)

def findFileFullPath(inputpath: str = "", specificPath: str = "") -> str:
    # try every possible situation from input path

    inputpath = inputpath.strip()
    assname = os.path.basename(inputpath)

    if specificPath:
        assfullname = os.path.join(specificPath, assname)
        if os.path.isfile(assfullname):
            return assfullname

        # relative path
        assfullname = os.path.normpath(os.path.join(specificPath, inputpath))
        if os.path.isfile(assfullname):
            return assfullname

    if os.path.isabs(inputpath):
        if os.path.isfile(inputpath):
            return inputpath

    else: # relative path (filename only)
        assfullname = os.path.join(os.path.dirname(__file__), assname)
        if os.path.isfile(assfullname):
            return assfullname

        assfullname = os.path.join(os.getcwd(), assname)
        if os.path.isfile(assfullname):
            return assfullname

        # relative path
        assfullname = os.path.normpath(os.path.join(os.path.dirname(__file__), inputpath))
        if os.path.isfile(assfullname):
            return assfullname

        assfullname = os.path.normpath(os.path.join(os.getcwd(), inputpath))
        if os.path.isfile(assfullname):
            return assfullname

    return ""

def genCommandList() -> list():
    FFComandList = list()

    audioOnly = False
    assfullname = findFileFullPath(sys.argv[1])
    if assfullname == "":
        print("ass file not found: " + sys.argv[1])
        return
    if len(sys.argv) > 2:
        if sys.argv[2] == "1" or sys.argv[2] == 1:
            audioOnly = True
    assname = os.path.basename(assfullname)
    AssFileFolderPath = os.path.dirname(assfullname)
    if not assname.lower().endswith(".ass"):
        print("input: \""+assname+"\" should be a .ass file at: "+AssFileFolderPath)
        return

    print("input: " + assfullname)
    # resulttxt = io.open("uniquePath3.txt", mode="w", encoding="utf-8")
    # must be utf-8 as it should be ass file
    with io.open(assfullname, mode="r", encoding="utf-8") as openfileobject:
        count = 1
        for line in openfileobject:
            content = str(line.strip())
            if content.startswith("Video File: "):
                VideoFile = content.removeprefix("Video File: ").replace("/","\\")
                VideoFileFull = findFileFullPath(VideoFile, AssFileFolderPath)
                if not VideoFileFull:
                    print("VideoFile not Found: "+os.path.normpath(os.path.join(AssFileFolderPath, VideoFile))+" ("+VideoFile+")")
                    return
                WorkingFolder, VideoFileName = os.path.split(VideoFileFull)
                VideoFolderName, ext = os.path.splitext(VideoFileName)
                OutputFolder = os.path.join(AssFileFolderPath, VideoFolderName)
                if not os.path.exists(OutputFolder):
                    os.makedirs(OutputFolder)

            if content.startswith("Dialogue: "):
                try:
                    print("VideoFileFullPath: "+VideoFileFull)
                    print("-------------------------------------------------------")
                except:
                    raise ValueError("No [Video File] from .ass")
                content = content.removeprefix("Dialogue: ")
                contentArray = content.split(",")
                Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect = contentArray[:9]
                DialogueContent = ','.join(contentArray[9:]) # = Text
                if DialogueContent.lower() != "out" :
                    SongName = translate_valid_filename(DialogueContent)
                    StartTimeStamp = "0" + Start
                else:
                    #EndTimeStamp = "0" + End #DialogueTimeStamp
                    EndTimeStamp = "0" + Start #when out states end time stamp
                    DurationInSecond = GetDuration(StartTimeStamp, EndTimeStamp)
                    SongPath = f"\"{OutputFolder}\\{count:02}.{SongName}.mp4\""
                    # FFmpegComment = "ffmpeg -i \""+VideoFileFull+"\" -ss "+StartTimeStamp+" -to "+EndTimeStamp+" -c:v libx264 -preset medium -crf 21.5 -c:a copy " + SongPath
                    if audioOnly:
                        FFmpegComment = f"ffmpeg -i \"{VideoFileFull}\" -vn -ss {StartTimeStamp} -to {EndTimeStamp} -c:a copy " + SongPath.removesuffix(".mp4\"") + ".m4a\""
                        # FFmpegComment = "ffmpeg -ss "+StartTimeStamp+" -i \""+VideoFileFull+"\" -vn -t "+DurationInSecond+" -c:a copy " + SongPath.removesuffix(".mp4\"")+".m4a\""
                    else:
                        # quick search would change the actual starting point to the nearest keyframe, and set time stamp of 00:00 at the seek point, it is possible to cause some player can't decode correctly
                        # FFmpegComment = "ffmpeg -ss "+StartTimeStamp+" -i \""+VideoFileFull+"\" -t "+DurationInSecond+" -c:v libx264 -preset medium -crf 21.5 -c:a copy " + SongPath
                        # accurate search, very slow
                        libx264Params = "-x264-params \"me=umh:subme=9:merange=48:fast-pskip=0:direct=auto:weightb=1:keyint=360:min-keyint=1:bframes=12:b-adapt=2:ref=3:rc-lookahead=80:crf=20:qpmin=9:chroma-qp-offset=-2:aq-mode=3:aq-strength=0.7:trellis=2:deblock=1,0,0:psy-rd=0.77,0.22:nr=4\""
                        FFmpegComment = f"ffmpeg -i \"{VideoFileFull}\" -ss {StartTimeStamp} -to {EndTimeStamp} -c:v libx264 {libx264Params} -c:a copy {SongPath}"
                    count = count + 1
                    print(FFmpegComment)
                    # print(AssFileFolderPath)
                    # os.system(FFmpegComment)
                    FFComandList.append(FFmpegComment)
    return FFComandList

def worker(item):
    try:
        print(item)
        os.system(item) # do ffmpeg
    except:
        print('error with command '+item)

def main():
    pool_size = 2  # your "parallelness"
    pool = Pool(pool_size)

    items = genCommandList()
    for item in items:
        pool.apply_async(worker, (item,))

    pool.close()
    pool.join()

if __name__ == '__main__':
    main()
    input("press to exit python") # ensure not exit immediatly, don't skip
	import sys
	import io
	import os
	import re
	import unicodedata
	from decimal import *
	from multiprocessing.pool import ThreadPool as Pool

	# from multiprocessing import Pool
	@DeprecationWarning
	def slugify(value, allow_unicode=True):
	"""
	Taken from https://github.com/django/django/blob/master/django/utils/text.py
	Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
	dashes to single dashes. Remove characters that aren't alphanumerics,
	underscores, or hyphens. Convert to lowercase. Also strip leading and
	trailing whitespace, dashes, and underscores.
	"""
	value = str(value)
	if allow_unicode:
	value = unicodedata.normalize('NFKC', value)
	else:
	value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
	value = re.sub(r'[^\w\s-]', '', value)
	return re.sub(r'[-\s]+', '-', value).strip('-_')

	# too much for me as I'm going to work on windows only.
	@DeprecationWarning
	def get_valid_filename(name):
	"""
	Taken from https://github.com/django/django/blob/master/django/utils/text.py
	Return the given string converted to a string that can be used for a clean
	filename. Remove leading and trailing spaces; convert other spaces to
	underscores; and remove anything that is not an alphanumeric, dash,
	underscore, or dot.
	>>> get_valid_filename("john's portrait in 2004.jpg")
	'johns_portrait_in_2004.jpg'
	"""
	s = str(name).strip().replace(" ", "_")
	s = re.sub(r"(?u)[^-\w.]", "", s)
	if s in {"", ".", ".."}:
	print("Could not derive file name from '%s'" % name)
	return s


	def translate_valid_filename(name):
	half_to_full = {
	'<' : '＜',
	'>' : '＞',
	':' : '：',
	'/' : '／',
	'\|' : '｜',
	'?' : '？',
	'\\' : '＼',
	'\"' : '＂',
	'\t' : ' '
	}

	s = unicodedata.normalize('NFKC', name)
	s = str(s).strip()

	for k, v in half_to_full.items():
	s = s.replace(k, v)

	# turn to full-width
	# return s.translate(half_to_full)
	return s

	def timecodetoSeconcd(fullstring: str = "") -> Decimal:
	hr, mm, ss = fullstring.split(":")
	timeInSecond = int(hr)6060 + int(mm)*60 + Decimal(ss)
	return timeInSecond

	def GetDuration(start: str = "", end: str = "") -> str:
	duartion = timecodetoSeconcd(end) - timecodetoSeconcd(start)
	return str(duartion)

	def findFileFullPath(inputpath: str = "", specificPath: str = "") -> str:
	# try every possible situation from input path

	inputpath = inputpath.strip()
	assname = os.path.basename(inputpath)

	if specificPath:
	assfullname = os.path.join(specificPath, assname)
	if os.path.isfile(assfullname):
	return assfullname

	# relative path
	assfullname = os.path.normpath(os.path.join(specificPath, inputpath))
	if os.path.isfile(assfullname):
	return assfullname

	if os.path.isabs(inputpath):
	if os.path.isfile(inputpath):
	return inputpath

	else: # relative path (filename only)
	assfullname = os.path.join(os.path.dirname(__file__), assname)
	if os.path.isfile(assfullname):
	return assfullname

	assfullname = os.path.join(os.getcwd(), assname)
	if os.path.isfile(assfullname):
	return assfullname

	# relative path
	assfullname = os.path.normpath(os.path.join(os.path.dirname(__file__), inputpath))
	if os.path.isfile(assfullname):
	return assfullname

	assfullname = os.path.normpath(os.path.join(os.getcwd(), inputpath))
	if os.path.isfile(assfullname):
	return assfullname

	return ""

	def genCommandList() -> list():
	FFComandList = list()

	audioOnly = False
	assfullname = findFileFullPath(sys.argv[1])
	if assfullname == "":
	print("ass file not found: " + sys.argv[1])
	return
	if len(sys.argv) > 2:
	if sys.argv[2] == "1" or sys.argv[2] == 1:
	audioOnly = True
	assname = os.path.basename(assfullname)
	AssFileFolderPath = os.path.dirname(assfullname)
	if not assname.lower().endswith(".ass"):
	print("input: \""+assname+"\" should be a .ass file at: "+AssFileFolderPath)
	return

	print("input: " + assfullname)
	# resulttxt = io.open("uniquePath3.txt", mode="w", encoding="utf-8")
	# must be utf-8 as it should be ass file
	with io.open(assfullname, mode="r", encoding="utf-8") as openfileobject:
	count = 1
	for line in openfileobject:
	content = str(line.strip())
	if content.startswith("Video File: "):
	VideoFile = content.removeprefix("Video File: ").replace("/","\\")
	VideoFileFull = findFileFullPath(VideoFile, AssFileFolderPath)
	if not VideoFileFull:
	print("VideoFile not Found: "+os.path.normpath(os.path.join(AssFileFolderPath, VideoFile))+" ("+VideoFile+")")
	return
	WorkingFolder, VideoFileName = os.path.split(VideoFileFull)
	VideoFolderName, ext = os.path.splitext(VideoFileName)
	OutputFolder = os.path.join(AssFileFolderPath, VideoFolderName)
	if not os.path.exists(OutputFolder):
	os.makedirs(OutputFolder)

	if content.startswith("Dialogue: "):
	try:
	print("VideoFileFullPath: "+VideoFileFull)
	print("-------------------------------------------------------")
	except:
	raise ValueError("No [Video File] from .ass")
	content = content.removeprefix("Dialogue: ")
	contentArray = content.split(",")
	Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect = contentArray[:9]
	DialogueContent = ','.join(contentArray[9:]) # = Text
	if DialogueContent.lower() != "out" :
	SongName = translate_valid_filename(DialogueContent)
	StartTimeStamp = "0" + Start
	else:
	#EndTimeStamp = "0" + End #DialogueTimeStamp
	EndTimeStamp = "0" + Start #when out states end time stamp
	DurationInSecond = GetDuration(StartTimeStamp, EndTimeStamp)
	SongPath = f"\"{OutputFolder}\\{count:02}.{SongName}.mp4\""
	# FFmpegComment = "ffmpeg -i \""+VideoFileFull+"\" -ss "+StartTimeStamp+" -to "+EndTimeStamp+" -c:v libx264 -preset medium -crf 21.5 -c:a copy " + SongPath
	if audioOnly:
	FFmpegComment = f"ffmpeg -i \"{VideoFileFull}\" -vn -ss {StartTimeStamp} -to {EndTimeStamp} -c:a copy " + SongPath.removesuffix(".mp4\"") + ".m4a\""
	# FFmpegComment = "ffmpeg -ss "+StartTimeStamp+" -i \""+VideoFileFull+"\" -vn -t "+DurationInSecond+" -c:a copy " + SongPath.removesuffix(".mp4\"")+".m4a\""
	else:
	# quick search would change the actual starting point to the nearest keyframe, and set time stamp of 00:00 at the seek point, it is possible to cause some player can't decode correctly
	# FFmpegComment = "ffmpeg -ss "+StartTimeStamp+" -i \""+VideoFileFull+"\" -t "+DurationInSecond+" -c:v libx264 -preset medium -crf 21.5 -c:a copy " + SongPath
	# accurate search, very slow
	libx264Params = "-x264-params \"me=umh:subme=9:merange=48:fast-pskip=0:direct=auto:weightb=1:keyint=360:min-keyint=1:bframes=12:b-adapt=2:ref=3:rc-lookahead=80:crf=20:qpmin=9:chroma-qp-offset=-2:aq-mode=3:aq-strength=0.7:trellis=2:deblock=1,0,0:psy-rd=0.77,0.22:nr=4\""
	FFmpegComment = f"ffmpeg -i \"{VideoFileFull}\" -ss {StartTimeStamp} -to {EndTimeStamp} -c:v libx264 {libx264Params} -c:a copy {SongPath}"
	count = count + 1
	print(FFmpegComment)
	# print(AssFileFolderPath)
	# os.system(FFmpegComment)
	FFComandList.append(FFmpegComment)
	return FFComandList

	def worker(item):
	try:
	print(item)
	os.system(item) # do ffmpeg
	except:
	print('error with command '+item)

	def main():
	pool_size = 2 # your "parallelness"
	pool = Pool(pool_size)

	items = genCommandList()
	for item in items:
	pool.apply_async(worker, (item,))

	pool.close()
	pool.join()

	if __name__ == '__main__':
	main()
	input("press to exit python") # ensure not exit immediatly, don't skip