QuasimodoNZ/extracter.py

## readme.txt
This script is for spliting the JPG and MP4 out of a Motion Picture that was taken with a Huawei P10 Plus.

It doesn't need any image processing libraries as the processing is just identifying the right byte sequence. I've only done minimal testing, using Python 3.5.3. Because of the simplicity of the script, it should be compatible with Python 2.7, maybe just a few style changes.

An example command would be as follows:
python extractor.py  -j -m /home/Wheatley/Downloads/Photos /home/Wheatley/Downloads/IMG_20190101_120000.jpg -d /home/Wheatley/Downloads/Extracted

Usage: extracter.py [options] source_path_1 source_path_2 ...
You can specify as many source paths as you want. These can be either a directory or a specific image. You can also use -h
 for more help

Options:
  -h, --help            show this help message and exit
  -j, --jpg             Extracts the static image. Useful if you want a
                        smaller file. Exif metadata is preserved.
  -m, --mp4             Extracts the video data from the file. Audio is also
                        in this file.
  -v, --verbose         If you want to see some of the print outs.
  -r, --recursive       If set, this will recursivly look for files in the
                        directories specified.
  -d DIRECTORY, --destination=DIRECTORY
                        Optionally store all extracted parts to this
                        directory. This must be a directory. If this is not
                        specified, extracted files will appear in same place
                        as their sources.

## extracter.py
'''
Extract the static image and video from a motion picture taken with a Huawei phone

'''

import os
import sys

from optparse import OptionParser


def main(argv=None):
    ''' Parses the arguments and options. Then starts extrating the components. '''
    if argv is None:
        argv = sys.argv[1:]

    # setup option parser
    usage = 'usage: %prog [options] source_path_1 source_path_2 ...\nYou can specify as many source paths as you want. These can be either a directory or a specific image. You can also use -h for more help'
    parser = OptionParser(usage=usage)

    parser.add_option(
        '-j',
        '--jpg',
        action='store_true',
        dest='extract_jpg',
        help=
        'Extracts the static image. Useful if you want a smaller file. Exif metadata is preserved.',
        default=False)
    parser.add_option(
        '-m',
        '--mp4',
        action='store_true',
        dest='extract_mp4',
        help=
        'Extracts the video data from the file. Audio is also in this file.',
        default=False)
    # I've decided not to include GIF extraction (well, maybe just not yet)
    # I did a small investigation and found the module "MoviePy" http://zulko.github.io/moviepy/index.html
    # This seemed to be the best for what I was wanting to do.

    # To do a straight copy is a bit ridiculous. The file size for a 5.1mb MP4 turned into 27.1mb
    # It also had a weird reolution as well. To do this properly it would likely require getting more information from the user. Things like frames per second, resizing, etc.
    # parser.add_option(
    #     '-g',
    #     '--gif',
    #     action='store_true',
    #     dest='extract_gif',
    #     help='Extracts the video data from the file and stores as a GIF',
    #     default=False
    # )
    parser.add_option(
        '-v',
        '--verbose',
        action='store_true',
        dest='verbose',
        help='If you want to see some of the print outs.',
        default=False)
    parser.add_option(
        '-r',
        '--recursive',
        action='store_true',
        dest='recursive',
        help=
        'If set, this will recursivly look for files in the directories specified.',
        default=False)

    parser.add_option(
        '-d',
        '--destination',
        dest='destination',
        metavar='DIRECTORY',
        help=
        'Optionally store all extracted parts to this directory. This must be a directory. If this is not specified, extracted files will appear in same place as their sources.'
    )

    # process options
    global opts
    (opts, args) = parser.parse_args(argv)  #pylint: disable=W0612

    if not opts.extract_jpg and not opts.extract_mp4:
        parser.error(
            'You need to specify what formats you want to extract. Please set at least option: -j for JPGs and -m for MP4s'
        )

    if not args:
        parser.error(
            'You need to specify at least one source. Either a directory with images or directly specify the images.'
        )

    if opts.destination and not os.path.isdir(opts.destination):
        parser.error('Destination needs to be a directory')

    filenames = process_directories(args)
    if opts.verbose:
        print([fn[0] for fn in filenames])
        print('JPGs found: {}'.format(len(filenames)))

    split_files(filenames)

def process_directories(paths):
    ''' Find JPG files recursivly within directories '''
    rtn = []
    for path in paths:
        if os.path.isdir(path):
            pathnames = os.listdir(path)
            pathnames = [path + '/' + pn for pn in pathnames]

            # Need to remove child directories if
            if not opts.recursive:
                pathnames = [pn for pn in pathnames if os.path.isfile(pn)]

            rtn.extend(process_directories(pathnames))
        elif os.path.isfile(path):
            directory, tail = os.path.split(path)
            filename, extension = os.path.splitext(tail)
            extension = os.path.splitext(os.path.split(path)[1])[1]
            if extension.upper() == '.JPG':
                rtn.append((
                    path,
                    directory,
                    filename,
                    extension,
                ))
    return rtn


def split_files(filenames):
    ''' Splits the files '''
    for path, directory, filename, extension in filenames:
        with open(path, 'rb') as src:
            # Figure out mid point
            file_contents = src.read()
            try:
                ctrace_index = file_contents.index(
                    b'ctrace\x00\x00\x00\x00\x00\x18ftypmp42')
            except ValueError:
                if opts.verbose:
                    print('JPG tail and MP4 head was not found in {}'.format(
                        path))
                continue
            if opts.verbose:
                print('Splitting {}'.format(path))
            split_index = ctrace_index + 8

            # Return to start, and copy data
            src.seek(0, 0)
            if opts.destination:
                out_directory = opts.destination
                if not out_directory.endswith((
                        '\\',
                        '/',
                )):
                    out_directory += '/'
            else:
                out_directory = directory
            print(out_directory)
            # Reads and saves the static image data
            img = src.read(split_index)
            if opts.extract_jpg:
                with open(out_directory + filename + '-tiny.jpg',
                          'wb') as dst_jpg:
                    dst_jpg.write(img)
            # Reads and saves the mp4 video data
            mov = src.read()
            if opts.extract_mp4:
                with open(out_directory + filename + '.mp4', 'wb') as dst_mp4:
                    dst_mp4.write(mov)

            # if opts.extract_gif:
            # This is where the gif stuff should be
            # Will likely consist of writing the mov variable above to a temporary file and deleting it afterwards


if __name__ == '__main__':
    sys.exit(main())
	This script is for spliting the JPG and MP4 out of a Motion Picture that was taken with a Huawei P10 Plus.

	It doesn't need any image processing libraries as the processing is just identifying the right byte sequence. I've only done minimal testing, using Python 3.5.3. Because of the simplicity of the script, it should be compatible with Python 2.7, maybe just a few style changes.

	An example command would be as follows:
	python extractor.py -j -m /home/Wheatley/Downloads/Photos /home/Wheatley/Downloads/IMG_20190101_120000.jpg -d /home/Wheatley/Downloads/Extracted

	Usage: extracter.py [options] source_path_1 source_path_2 ...
	You can specify as many source paths as you want. These can be either a directory or a specific image. You can also use -h
	for more help

	Options:
	-h, --help show this help message and exit
	-j, --jpg Extracts the static image. Useful if you want a
	smaller file. Exif metadata is preserved.
	-m, --mp4 Extracts the video data from the file. Audio is also
	in this file.
	-v, --verbose If you want to see some of the print outs.
	-r, --recursive If set, this will recursivly look for files in the
	directories specified.
	-d DIRECTORY, --destination=DIRECTORY
	Optionally store all extracted parts to this
	directory. This must be a directory. If this is not
	specified, extracted files will appear in same place
	as their sources.
	'''
	Extract the static image and video from a motion picture taken with a Huawei phone

	'''

	import os
	import sys

	from optparse import OptionParser


	def main(argv=None):
	''' Parses the arguments and options. Then starts extrating the components. '''
	if argv is None:
	argv = sys.argv[1:]

	# setup option parser
	usage = 'usage: %prog [options] source_path_1 source_path_2 ...\nYou can specify as many source paths as you want. These can be either a directory or a specific image. You can also use -h for more help'
	parser = OptionParser(usage=usage)

	parser.add_option(
	'-j',
	'--jpg',
	action='store_true',
	dest='extract_jpg',
	help=
	'Extracts the static image. Useful if you want a smaller file. Exif metadata is preserved.',
	default=False)
	parser.add_option(
	'-m',
	'--mp4',
	action='store_true',
	dest='extract_mp4',
	help=
	'Extracts the video data from the file. Audio is also in this file.',
	default=False)
	# I've decided not to include GIF extraction (well, maybe just not yet)
	# I did a small investigation and found the module "MoviePy" http://zulko.github.io/moviepy/index.html
	# This seemed to be the best for what I was wanting to do.

	# To do a straight copy is a bit ridiculous. The file size for a 5.1mb MP4 turned into 27.1mb
	# It also had a weird reolution as well. To do this properly it would likely require getting more information from the user. Things like frames per second, resizing, etc.
	# parser.add_option(
	# '-g',
	# '--gif',
	# action='store_true',
	# dest='extract_gif',
	# help='Extracts the video data from the file and stores as a GIF',
	# default=False
	# )
	parser.add_option(
	'-v',
	'--verbose',
	action='store_true',
	dest='verbose',
	help='If you want to see some of the print outs.',
	default=False)
	parser.add_option(
	'-r',
	'--recursive',
	action='store_true',
	dest='recursive',
	help=
	'If set, this will recursivly look for files in the directories specified.',
	default=False)

	parser.add_option(
	'-d',
	'--destination',
	dest='destination',
	metavar='DIRECTORY',
	help=
	'Optionally store all extracted parts to this directory. This must be a directory. If this is not specified, extracted files will appear in same place as their sources.'
	)

	# process options
	global opts
	(opts, args) = parser.parse_args(argv) #pylint: disable=W0612

	if not opts.extract_jpg and not opts.extract_mp4:
	parser.error(
	'You need to specify what formats you want to extract. Please set at least option: -j for JPGs and -m for MP4s'
	)

	if not args:
	parser.error(
	'You need to specify at least one source. Either a directory with images or directly specify the images.'
	)

	if opts.destination and not os.path.isdir(opts.destination):
	parser.error('Destination needs to be a directory')

	filenames = process_directories(args)
	if opts.verbose:
	print([fn[0] for fn in filenames])
	print('JPGs found: {}'.format(len(filenames)))

	split_files(filenames)

	def process_directories(paths):
	''' Find JPG files recursivly within directories '''
	rtn = []
	for path in paths:
	if os.path.isdir(path):
	pathnames = os.listdir(path)
	pathnames = [path + '/' + pn for pn in pathnames]

	# Need to remove child directories if
	if not opts.recursive:
	pathnames = [pn for pn in pathnames if os.path.isfile(pn)]

	rtn.extend(process_directories(pathnames))
	elif os.path.isfile(path):
	directory, tail = os.path.split(path)
	filename, extension = os.path.splitext(tail)
	extension = os.path.splitext(os.path.split(path)[1])[1]
	if extension.upper() == '.JPG':
	rtn.append((
	path,
	directory,
	filename,
	extension,
	))
	return rtn


	def split_files(filenames):
	''' Splits the files '''
	for path, directory, filename, extension in filenames:
	with open(path, 'rb') as src:
	# Figure out mid point
	file_contents = src.read()
	try:
	ctrace_index = file_contents.index(
	b'ctrace\x00\x00\x00\x00\x00\x18ftypmp42')
	except ValueError:
	if opts.verbose:
	print('JPG tail and MP4 head was not found in {}'.format(
	path))
	continue
	if opts.verbose:
	print('Splitting {}'.format(path))
	split_index = ctrace_index + 8

	# Return to start, and copy data
	src.seek(0, 0)
	if opts.destination:
	out_directory = opts.destination
	if not out_directory.endswith((
	'\\',
	'/',
	)):
	out_directory += '/'
	else:
	out_directory = directory
	print(out_directory)
	# Reads and saves the static image data
	img = src.read(split_index)
	if opts.extract_jpg:
	with open(out_directory + filename + '-tiny.jpg',
	'wb') as dst_jpg:
	dst_jpg.write(img)
	# Reads and saves the mp4 video data
	mov = src.read()
	if opts.extract_mp4:
	with open(out_directory + filename + '.mp4', 'wb') as dst_mp4:
	dst_mp4.write(mov)

	# if opts.extract_gif:
	# This is where the gif stuff should be
	# Will likely consist of writing the mov variable above to a temporary file and deleting it afterwards


	if __name__ == '__main__':
	sys.exit(main())