Skip to content

Instantly share code, notes, and snippets.

@abfo
Created March 11, 2025 23:49
Process photos for a digital frame using face aware cropping and image embeddings to pair similar vertical photos.
using SixLabors.ImageSharp;
using SixLabors.ImageSharp.Processing;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Formats.Jpeg;
using FaceAiSharp;
using System.Numerics.Tensors;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
const string InputFolder = @"D:\photo-frame\input";
const string OutputFolder = @"D:\photo-frame\output";
const int OutputWidth = 1366;
const int OutputHeight = 768;
const float OutputAspectRatio = (float) OutputHeight / OutputWidth;
int fileNumber = 0;
Console.WriteLine("photo-frame-process");
ImageEmbedder imageEmbedder = new ImageEmbedder();
var faceDetector = FaceAiSharpBundleFactory.CreateFaceDetectorWithLandmarks();
Random rng = new Random();
List<MetaImage> metaImages = new List<MetaImage>();
DirectoryInfo di = new DirectoryInfo(InputFolder);
string[] extensions = { "*.jpg", "*.jpeg", "*.png", "*.gif", "*.webp" };
List<FileInfo> files = new List<FileInfo>();
foreach(string ext in extensions)
{
files.AddRange(di.GetFiles(ext, SearchOption.AllDirectories));
}
foreach(FileInfo file in files)
{
// debug single image
//if (file.Name != "IMG_1023.JPG") { continue; }
Console.Write(file.Name);
using (Image<Rgb24> image = Image.Load<Rgb24>(file.FullName))
{
image.Mutate(i => i.AutoOrient());
float aspect = (float) image.Height / image.Width;
// happiest path - same aspect ratio
if (aspect == OutputAspectRatio)
{
Console.WriteLine(" - same aspect ratio");
image.Mutate(x => x.Resize(OutputWidth, OutputHeight));
image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
continue;
}
// vertical image
if (image.Width < image.Height)
{
Console.WriteLine(" - vertical image");
metaImages.Add(new MetaImage(file.FullName, imageEmbedder.Embed(file.FullName)));
continue;
}
// horizontal image
Console.WriteLine(" - horizontal image");
try
{
FaceAwareResize(image, OutputWidth, OutputHeight);
image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
}
catch (Exception ex)
{
Console.WriteLine($"Error processing {file.FullName}: {ex.Message}");
}
}
}
while(metaImages.Count > 0)
{
if (metaImages.Count == 1)
{
// one left over image, just resize as best as possible...
MetaImage metaImage = metaImages[0];
metaImages.Remove(metaImage);
Console.WriteLine($"{Path.GetFileName(metaImage.ImagePath)} - single vertical image");
using (Image<Rgb24> image = Image.Load<Rgb24>(metaImage.ImagePath))
{
try
{
image.Mutate(i => i.AutoOrient());
FaceAwareResize(image, OutputWidth, OutputHeight);
image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
}
catch (Exception ex)
{
Console.WriteLine($"Error processing {metaImage.ImagePath}: {ex.Message}");
}
}
break;
}
// find two vertical images to combin
MetaImage meta1 = metaImages[rng.Next(metaImages.Count)];
MetaImage? meta2 = null;
float bestSimilarity = float.MinValue;
// find the second image that is closest to the first image based on cosine similarity
foreach(MetaImage candidate in metaImages)
{
if (candidate == meta1)
{
continue;
}
float similarity = TensorPrimitives.CosineSimilarity(meta1.Embedding, candidate.Embedding);
if (similarity > bestSimilarity)
{
bestSimilarity = similarity;
meta2 = candidate;
}
}
if (meta2 == null)
{
throw new Exception("No second image found");
}
metaImages.Remove(meta1);
metaImages.Remove(meta2);
Console.WriteLine($"{Path.GetFileName(meta1.ImagePath)} - vertial image paired with {Path.GetFileName(meta2.ImagePath)}");
try
{
using (Image<Rgb24> image1 = Image.Load<Rgb24>(meta1.ImagePath))
{
image1.Mutate(i => i.AutoOrient());
using (Image<Rgb24> image2 = Image.Load<Rgb24>(meta2.ImagePath))
{
image2.Mutate(i => i.AutoOrient());
FaceAwareResize(image1, OutputWidth / 2, OutputHeight);
FaceAwareResize(image2, OutputWidth / 2, OutputHeight);
// create a new image with the two images combined
using (Image<Rgb24> combinedImage = new Image<Rgb24>(OutputWidth, OutputHeight))
{
combinedImage.Mutate(x => x.DrawImage(image1, new Point(0, 0), 1f));
combinedImage.Mutate(x => x.DrawImage(image2, new Point(OutputWidth / 2, 0), 1f));
combinedImage.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
}
}
}
}
catch (Exception ex)
{
Console.WriteLine($"Error processing {meta1.ImagePath} and {meta2.ImagePath}: {ex.Message}");
}
}
Console.WriteLine("photo-frame-process done!");
// utility below
void FaceAwareResize(Image<Rgb24> image, int width, int height)
{
RectangleF? detectRect = null;
var faces = faceDetector.DetectFaces(image);
foreach(var face in faces)
{
if (detectRect.HasValue)
{
detectRect = RectangleF.Union(detectRect.Value, face.Box);
}
else
{
detectRect = face.Box;
}
}
RectangleF coreRect = detectRect ?? new RectangleF(image.Width / 2.0f, image.Height / 2.0f, 0.1f, 0.1f);
// get the center of coreRect as PointF
PointF center = new PointF(coreRect.X + coreRect.Width / 2.0f, coreRect.Y + coreRect.Height / 2.0f);
float targetAspectRatio = (float) width / height;
float imageAspectRatio = (float) image.Width / image.Height;
if (targetAspectRatio >= imageAspectRatio)
{
// figure out the best Y position
float targetHeight = image.Width * ((float)height / width);
float y = center.Y - targetHeight / 2.0f;
if (y < 0)
{
y = 0;
}
else if (y + targetHeight > image.Height)
{
y = image.Height - targetHeight;
}
int intY = (int)y;
int intTargetHeight = (int)targetHeight;
int extra = image.Height - (intY + intTargetHeight);
if (extra < 0)
{
intTargetHeight += extra;
}
Rectangle targetRect = new Rectangle(0, intY, image.Width, intTargetHeight);
// crop to targetRect
image.Mutate(x => x.Crop(targetRect));
}
else
{
// figure out the best X position
float targetWidth = image.Height * ((float)width / height);
float x = center.X - targetWidth / 2.0f;
if (x < 0)
{
x = 0;
}
else if (x + targetWidth > image.Width)
{
x = image.Width - targetWidth;
}
int intX = (int)x;
int intTargetWidth = (int)targetWidth;
int extra = image.Width - (intX + intTargetWidth);
if (extra < 0)
{
intTargetWidth += extra;
}
Rectangle targetRect = new Rectangle(intX, 0, intTargetWidth, image.Height);
// crop to targetRect
image.Mutate(x => x.Crop(targetRect));
}
// Resize the image to the target dimensions while keeping the face in the center
image.Mutate(x => x.Resize(new ResizeOptions
{
Size = new Size(width, height)
}));
}
string GetNextFileName()
{
fileNumber++;
return Path.Combine(OutputFolder, $"{fileNumber:00000000}.jpg");
}
internal class MetaImage
{
public string ImagePath { get; set; }
public float[] Embedding { get; set; }
public MetaImage(string imagePath, float[] embedding)
{
ImagePath = imagePath;
Embedding = embedding;
}
}
/// <summary>
/// Embeds an image using the OpenAI CLIP model.
/// See https://github.com/bartbroere/clip.dll/blob/master/Program.cs
/// </summary>
internal class ImageEmbedder
{
private InferenceSession _model;
public ImageEmbedder()
{
if (!File.Exists("clip-image-vit-32-float32.onnx"))
{
using (HttpClient httpClient = new HttpClient())
{
var response = httpClient.GetAsync("https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-float32.onnx").Result;
using (var fs = new FileStream("clip-image-vit-32-float32.onnx", FileMode.CreateNew))
{
response.Content.CopyToAsync(fs).Wait();
}
}
}
_model = new InferenceSession("clip-image-vit-32-float32.onnx");
}
public float[] Embed(string imagePath)
{
return Embed(Image.Load<Rgb24>(File.ReadAllBytes(imagePath)));
}
public float[] Embed(Image<Rgb24> image)
{
var smallestSide = Math.Min(image.Width, image.Height);
image.Mutate(x => x.Crop(
new Rectangle(
(image.Width - smallestSide) / 2,
(image.Height - smallestSide) / 2,
smallestSide,
smallestSide
)));
image.Mutate(x => x.Resize(224, 224));
var inputTensor = new DenseTensor<float>(new[] {1, 3, 224, 224});
for (var x = 0; x < 224; x++)
{
for (var y = 0; y < 224; y++)
{
// Normalize from bytes (0-255) to floats (constants borrowed from CLIP repository)
inputTensor[0, 0, y, x] = Convert.ToSingle((((float) image[x, y].R / 255) - 0.48145466) / 0.26862954);
inputTensor[0, 1, y, x] = Convert.ToSingle((((float) image[x, y].G / 255) - 0.4578275 ) / 0.26130258);
inputTensor[0, 2, y, x] = Convert.ToSingle((((float) image[x, y].B / 255) - 0.40821073) / 0.27577711);
}
}
var inputs = new List<NamedOnnxValue> {NamedOnnxValue.CreateFromTensor("input", inputTensor)};
var outputData = _model.Run(inputs).ToList().Last().AsTensor<float>().ToArray();
return outputData;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment