Last active
December 16, 2019 03:38
-
-
Save dreness/d75b0eff9c582e094b4362000186d20f to your computer and use it in GitHub Desktop.
Given a set of images in the same format, compute a slideshow order that minimizes image differences between adjacent frames using a bottleneck TSP solver.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import turicreate as tc | |
import mlrose | |
import numpy as np | |
import os | |
import sys | |
import IPython | |
baseDir = "/Users/andre/work/turi/attempts/attempt1" | |
# model constants | |
refdata_file = f"{baseDir}/apple-logos.sframe" | |
model_path = f"{baseDir}/apple-logo-similarity.model" | |
graph_path = f"{baseDir}/simgraph" | |
# the calculated order of images is saved here | |
bestStateFile = f"{baseDir}/bestState.txt" | |
# ffmpeg formatted input file | |
framesFile = f"{baseDir}/frames.txt" | |
# FCP XML constants | |
xmlFile = f"{baseDir}/fcpxml.fcpxml" | |
def readRefData(dataDir): | |
if os.path.exists(refdata_file): | |
print("loading existing image refdata...") | |
reference_data = tc.load_sframe(refdata_file) | |
return reference_data | |
print("creating SFrame from image refdata...") | |
reference_data = tc.image_analysis.load_images(dataDir, with_path=True) | |
reference_data = reference_data.add_row_number() | |
# Save the SFrame for future use | |
reference_data.save(refdata_file) | |
return reference_data | |
def makeModel(reference_data): | |
if os.path.exists(model_path): | |
print("loading existing model...") | |
model = tc.load_model(model_path) | |
return model | |
print("creating model...") | |
# create an image similarity model | |
model = tc.image_similarity.create(reference_data) | |
model.save(model_path) | |
return model | |
def makeGraph(model, k=5): | |
""" | |
Create a similarity graph from a given model to | |
find the top k most similar images in the reference data | |
for a given input. For our TSP use case, k should be small. | |
""" | |
if os.path.exists(graph_path): | |
print("loading existing graph...") | |
similarity_graph = tc.load_sgraph(graph_path) | |
return similarity_graph | |
print("creating graph...") | |
similarity_graph = model.similarity_graph(k=k) | |
similarity_graph.save(graph_path) | |
return similarity_graph | |
def solveBTSP(similarity_graph, k): | |
if os.path.exists(bestStateFile): | |
print("loading existing bottleneck TSP solution...") | |
fh = open(bestStateFile, "r") | |
best_state = eval(fh.read()) | |
return best_state | |
print("solving bottleneck TSP...") | |
print("Selecting fields...") | |
si = similarity_graph.select_fields(["distance"]).edges | |
print("Make a tuple from dict values...") | |
ti = list(map(lambda x: tuple(x.values()), si)) | |
print("Deriving fitness function based on distances...") | |
fitness_fn_dists = mlrose.TravellingSales(distances=ti) | |
print("Creating TSP optimizer...") | |
problem_fit = mlrose.TSPOpt( | |
length=k, fitness_fn=fitness_fn_dists, maximize=False) | |
# Solve problem using the genetic algorithm | |
print("Solving...") | |
best_state, best_fitness = mlrose.genetic_alg( | |
# Not sure of the best approach for tuning these. | |
problem_fit, random_state=2, max_iters=15 | |
) | |
print("The best state found is: ", best_state) | |
print("The fitness at the best state is: ", best_fitness) | |
# As this is a zero-indexed array, increment each element by 1 | |
# so the saved value matches the index number in the file name (if any). | |
# We do the opposite to access ref_data[x]['path'] items when writing the ffmpeg import file | |
a = [x + 1 for x in best_state] | |
fh = open(bestStateFile, "w") | |
fh.write(str(a)) | |
fh.close | |
return best_state | |
def writeFfmpegSlideshow(orderArray, ref_data, dur=5.0): | |
""" | |
Like this: | |
file path/to/file | |
duration 5.0 | |
... | |
""" | |
# grab the file path from ref_data | |
fh = open(framesFile, "w") | |
for f in orderArray: | |
# to access the zero-indexed array element in ref_data, | |
# subtract one from the stored array element value | |
fpath = os.path.abspath(ref_data[f - 1]["path"]) | |
s = f"file {fpath}\nduration {dur}\n" | |
fh.write(s) | |
fh.close | |
fsize = os.path.getsize(framesFile) | |
print(f"Wrote {fsize} bytes to ffmpeg slideshow file {framesFile}") | |
def writeFCPXMLSlideshow(orderArray, ref_data, dur=5.0): | |
''' | |
Write an XML file to be imported by Final Cut Pro X | |
References: | |
https://developer.apple.com/library/archive/documentation/FinalCutProX/Reference/FinalCutProXXMLFormat/EventsandProjects/EventsandProjects.html#//apple_ref/doc/uid/TP40011227-CH12-SW19 | |
https://developer.apple.com/documentation/professional_video_applications/exchanging_content_and_metadata_with_final_cut_pro/describing_final_cut_pro_items_in_fcpxml?language=objc | |
''' | |
raise NotImplementedError | |
def main(): | |
ref_data = readRefData("/Users/andre/Pictures/Apple_Logos/") | |
model = makeModel(ref_data) | |
# We want a solution that includes all input images | |
num_stops = len(ref_data) | |
simgraph = makeGraph(model) | |
best_state = solveBTSP(simgraph, num_stops) | |
print(f"Bottleneck TSP solution contains {len(best_state)} edges") | |
writeFfmpegSlideshow(best_state, ref_data) | |
if sys.flags.interactive == 1: | |
IPython.embed() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment