AlphaAtlas/GetTrainingImages.vpy

## GetTrainingImages.vpy
#Grabs 1 frame from each detected scene in a list of videos
#Downscale it with the highest possible quality, and write the full frame and image to disk
#Useful for getting training data from videos!

#Requires python, vapoursynth, muvsfunc, and mvsfunc, knlmeans, dpid, mvtools
#Or better yet, just unzip this: https://github.com/theChaosCoder/vapoursynth-portable-FATPACK/releases/tag/r2
#Just benchmark this script under the script tab in VSEdit.
#Or run 'vspipe "Path/To/NameOfScript.vpy" .'

import mvsfunc as mvf
import muvsfunc as muf
import vapoursynth as vs
from vapoursynth import core
import functools, os


#List of video directories. Output will be wherever each video is.
videos = ["video1.mkv", "C:/blah/video2.mp4"]

#Set whether to search for the start or end of a changed scene
prop = '_SceneChangePrev'
#prop = '_SceneChangeNext'

#Use CUDA DPID or a CPU SSIM_downscale for downscaling
CUDA = False

#Scene detection strength
#http://avisynth.nl/index.php/MVTools2/MSCDetection
thSCD1 = 400 #degault 400
thSCD2 = 130 #default 130

#output image format
form = "png"

#downscaling factor
sfact = 0.5

#Output depth. Imagemagik will dither it down if required
d = 16

def detector(n, f, clip, pclip, vname):
	#check for prop in frame
	if f.props[prop] == 1:
		return pclip
	else:
		return clip

for video in videos:
	#init
	video = os.path.realpath(video)
	vname = os.path.basename(video).rsplit(".", 1)[0]
	vfolder = os.path.dirname(video)
	os.makedirs(os.path.join(vfolder, vname, "Processed"), exist_ok = True)
	os.makedirs(os.path.join(vfolder, vname, "Original"), exist_ok = True)

	#Get video with ffmpeg
	clip = core.ffms2.Source(video)

	#Trim the video just like a python list to remove intro, credits, etc:
	#http://www.vapoursynth.com/doc/pythonreference.html

	#Detect scene changes with mvtools
	clip = core.mv.SCDetection(clip, core.mv.Analyse(core.mv.Super(clip)), thscd1 = thSCD1, thscd2 = thSCD2)

	#Alternative, faster but less accurate scene detection
	#clip = core.misc.SCDetect(clip)

	#Convert to RGB for less lossy processing
	pclip = mvf.ToRGB(clip, depth = d)

	#Remove noise (with GPU). If you want to do more preprocessing, add lines here!
	#https://github.com/Khanattila/KNLMeansCL/wiki/Filter-description
	pclip = core.knlm.KNLMeansCL(pclip, d = 3, a = 3, h = 0.8)

	#Write original (big) image
	pclip = core.imwri.Write(pclip, imgformat = form, filename=os.path.join(vfolder, vname, "Original", r"%d." + form), firstnum = 0, quality = 100, overwrite = True)

	#Downscale
	if CUDA == True:
		pclip = core.dpid.Dpid(pclip, width = pclip.width * sfact, height = pclip.height * sfact)
	else:
		pclip = muf.SSIM_downsample(pclip, w = pclip.width * sfact, h = pclip.height * sfact)

	#Add random noise back to the clip. If you want to add more artifacts to train on, do it here!
	#pclip = core.grain.Add(pclip, var=1.0, uvar=0.0)

	#Write the downscaled image
	pclip = core.imwri.Write(pclip, imgformat = form, filename=os.path.join(vfolder, vname, "Processed", r"%d." + form), firstnum = 0, quality = 100, overwrite = True)

	#Quick conversion to make FrameEval happy
	pclip = core.resize.Point(pclip, width = clip.width, height = clip.height, matrix_s = "709", format = clip.format)

	#Only process/write frames that are a scene changes
	clip = core.std.FrameEval(clip, functools.partial(detector, clip = clip, pclip = pclip, vname = vname), prop_src = clip)
	clip.set_output()
	#Grabs 1 frame from each detected scene in a list of videos
	#Downscale it with the highest possible quality, and write the full frame and image to disk
	#Useful for getting training data from videos!

	#Requires python, vapoursynth, muvsfunc, and mvsfunc, knlmeans, dpid, mvtools
	#Or better yet, just unzip this: https://github.com/theChaosCoder/vapoursynth-portable-FATPACK/releases/tag/r2
	#Just benchmark this script under the script tab in VSEdit.
	#Or run 'vspipe "Path/To/NameOfScript.vpy" .'

	import mvsfunc as mvf
	import muvsfunc as muf
	import vapoursynth as vs
	from vapoursynth import core
	import functools, os


	#List of video directories. Output will be wherever each video is.
	videos = ["video1.mkv", "C:/blah/video2.mp4"]

	#Set whether to search for the start or end of a changed scene
	prop = '_SceneChangePrev'
	#prop = '_SceneChangeNext'

	#Use CUDA DPID or a CPU SSIM_downscale for downscaling
	CUDA = False

	#Scene detection strength
	#http://avisynth.nl/index.php/MVTools2/MSCDetection
	thSCD1 = 400 #degault 400
	thSCD2 = 130 #default 130

	#output image format
	form = "png"

	#downscaling factor
	sfact = 0.5

	#Output depth. Imagemagik will dither it down if required
	d = 16

	def detector(n, f, clip, pclip, vname):
	#check for prop in frame
	if f.props[prop] == 1:
	return pclip
	else:
	return clip

	for video in videos:
	#init
	video = os.path.realpath(video)
	vname = os.path.basename(video).rsplit(".", 1)[0]
	vfolder = os.path.dirname(video)
	os.makedirs(os.path.join(vfolder, vname, "Processed"), exist_ok = True)
	os.makedirs(os.path.join(vfolder, vname, "Original"), exist_ok = True)

	#Get video with ffmpeg
	clip = core.ffms2.Source(video)

	#Trim the video just like a python list to remove intro, credits, etc:
	#http://www.vapoursynth.com/doc/pythonreference.html

	#Detect scene changes with mvtools
	clip = core.mv.SCDetection(clip, core.mv.Analyse(core.mv.Super(clip)), thscd1 = thSCD1, thscd2 = thSCD2)

	#Alternative, faster but less accurate scene detection
	#clip = core.misc.SCDetect(clip)

	#Convert to RGB for less lossy processing
	pclip = mvf.ToRGB(clip, depth = d)

	#Remove noise (with GPU). If you want to do more preprocessing, add lines here!
	#https://github.com/Khanattila/KNLMeansCL/wiki/Filter-description
	pclip = core.knlm.KNLMeansCL(pclip, d = 3, a = 3, h = 0.8)

	#Write original (big) image
	pclip = core.imwri.Write(pclip, imgformat = form, filename=os.path.join(vfolder, vname, "Original", r"%d." + form), firstnum = 0, quality = 100, overwrite = True)

	#Downscale
	if CUDA == True:
	pclip = core.dpid.Dpid(pclip, width = pclip.width * sfact, height = pclip.height * sfact)
	else:
	pclip = muf.SSIM_downsample(pclip, w = pclip.width * sfact, h = pclip.height * sfact)

	#Add random noise back to the clip. If you want to add more artifacts to train on, do it here!
	#pclip = core.grain.Add(pclip, var=1.0, uvar=0.0)

	#Write the downscaled image
	pclip = core.imwri.Write(pclip, imgformat = form, filename=os.path.join(vfolder, vname, "Processed", r"%d." + form), firstnum = 0, quality = 100, overwrite = True)

	#Quick conversion to make FrameEval happy
	pclip = core.resize.Point(pclip, width = clip.width, height = clip.height, matrix_s = "709", format = clip.format)

	#Only process/write frames that are a scene changes
	clip = core.std.FrameEval(clip, functools.partial(detector, clip = clip, pclip = pclip, vname = vname), prop_src = clip)
	clip.set_output()