Skip to content

Instantly share code, notes, and snippets.

@AlexMedia
Created March 14, 2021 14:08
Show Gist options
  • Save AlexMedia/ccabfa4d766bc9991fad1f04af561584 to your computer and use it in GitHub Desktop.
Save AlexMedia/ccabfa4d766bc9991fad1f04af561584 to your computer and use it in GitHub Desktop.
Imageflow S3 Cache
using System;
using System.Globalization;
using System.Text;
namespace ImageflowS3Cache
{
public class HashBasedPathBuilder
{
public HashBasedPathBuilder()
{
FileExtension = ".jpg";
RelativeDirSeparator = '/';
SubfolderBits = (int)Math.Ceiling(Math.Log(2048, 2)); //Log2 to find the number of bits. round up.
if (SubfolderBits < 1) SubfolderBits = 1;
}
public int SubfolderBits { get; }
private string FileExtension { get; }
private char RelativeDirSeparator { get; }
/// <summary>
/// Builds a key for the cached version, using the hashcode of `data`.
/// I.e, 12/a1/d3/124211ab132592 or 12/0/12412ab12141.
/// Key starts with a number that represents the number of bits required to hold the number of subfolders.
/// Then a segment for each trailing byte of the hash, up to the number of subfolder bits
/// We use trailing bytes so filenames that start the same aren't grouped together, which slows down some filesystems.
/// </summary>
/// <param name="hash"></param>
/// <returns></returns>
public string GetRelativePathFromHash(byte[] hash)
{
var allBits = GetTrailingBits(hash, SubfolderBits);
var sb = new StringBuilder(75 + FileExtension.Length);
//Start with the subfolder distribution in bits, so we can easily delete old folders
//When we change the subfolder size
sb.AppendFormat(NumberFormatInfo.InvariantInfo, "{0:D}", SubfolderBits);
sb.Append(RelativeDirSeparator);
//If subfolders is set above 256, it will nest files in multiple directories, one for each byte
foreach (var b in allBits)
{
sb.AppendFormat(NumberFormatInfo.InvariantInfo, "{0:x2}", b);
sb.Append(RelativeDirSeparator);
}
sb.AppendFormat(NumberFormatInfo.InvariantInfo,
"{0:x2}{1:x2}{2:x2}{3:x2}{4:x2}{5:x2}{6:x2}{7:x2}{8:x2}{9:x2}{10:x2}{11:x2}{12:x2}{13:x2}{14:x2}{15:x2}{16:x2}{17:x2}{18:x2}{19:x2}{20:x2}{21:x2}{22:x2}{23:x2}{24:x2}{25:x2}{26:x2}{27:x2}{28:x2}{29:x2}{30:x2}{31:x2}",
hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7],
hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15],
hash[16], hash[17], hash[18], hash[19], hash[20], hash[21], hash[22], hash[23],
hash[24], hash[25], hash[26], hash[27], hash[28], hash[29], hash[30], hash[31]);
sb.Append(FileExtension);
return sb.ToString();
}
internal static byte[] GetTrailingBits(byte[] data, int bits)
{
var trailingBytes = new byte[(int)Math.Ceiling(bits / 8.0)]; //Round up to bytes.
Array.Copy(data, data.Length - trailingBytes.Length, trailingBytes, 0, trailingBytes.Length);
var bitsToClear = trailingBytes.Length * 8 - bits;
trailingBytes[0] = (byte)((byte)(trailingBytes[0] << bitsToClear) >> bitsToClear); //Set extra bits to 0.
return trailingBytes;
}
}
}
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net5.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AWSSDK.S3" Version="3.5.9.6" />
<PackageReference Include="Imazen.Common" Version="0.5.6" />
</ItemGroup>
</Project>
namespace ImageflowS3Cache
{
public class S3CacheOptions
{
public string BucketName { get; set; }
public string Prefix { get; set; }
}
}
using System.IO;
using Imazen.Common.Extensibility.StreamCache;
namespace ImageflowS3Cache
{
public class S3CacheResult : IStreamCacheResult
{
public S3CacheResult()
{
}
public S3CacheResult(Stream data, string contentType, string status)
{
Data = data;
ContentType = contentType;
Status = status;
}
public Stream Data { get; }
public string ContentType { get; }
public string Status { get; }
}
}
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Amazon.S3;
using Amazon.S3.Transfer;
using Imazen.Common.Extensibility.StreamCache;
using Imazen.Common.Issues;
using Microsoft.Extensions.Logging;
namespace ImageflowS3Cache
{
public class S3CacheService : IStreamCache
{
private readonly Func<IAmazonS3> _s3ClientFactory;
private readonly S3CacheOptions _options;
private readonly ILogger<S3CacheService> _logger;
public S3CacheService(Func<IAmazonS3> s3ClientFactoryFactory, S3CacheOptions options, ILogger<S3CacheService> logger)
{
_s3ClientFactory = s3ClientFactoryFactory;
_options = options;
_logger = logger;
}
public IEnumerable<IIssue> GetIssues()
{
return Enumerable.Empty<IIssue>();
}
public Task StartAsync(CancellationToken cancellationToken)
{
return Task.CompletedTask;
}
public Task StopAsync(CancellationToken cancellationToken)
{
return Task.CompletedTask;
}
public async Task<IStreamCacheResult> GetOrCreateBytes(byte[] key, AsyncBytesResult dataProviderCallback,
CancellationToken cancellationToken,
bool retrieveContentType)
{
var s3Key = ComputeKey(_options.Prefix, key);
using var s3Client = _s3ClientFactory();
try
{
_logger.LogDebug($"Retrieving file {s3Key} from S3");
var objectStream = await s3Client.GetObjectAsync(_options.BucketName, s3Key, cancellationToken);
return new S3CacheResult(objectStream.ResponseStream, objectStream.Headers.ContentType,
objectStream.HttpStatusCode.ToString());
}
catch (AmazonS3Exception s3Exception)
{
if (s3Exception.ErrorCode != "NoSuchKey")
{
_logger.LogError(s3Exception, $"Error while retrieving item {s3Key} from S3.");
throw;
}
_logger.LogDebug($"File {s3Key} not found in cache, creating and storing");
// file does not exist in s3
var (contentType, data) = await dataProviderCallback(cancellationToken);
await using var stream = new MemoryStream(data.Array ?? throw new NullReferenceException(), data.Offset, data.Count, false, true);
_logger.LogDebug($"Storing in S3");
using var transferUtility = new TransferUtility(s3Client);
var uploadRequest = new TransferUtilityUploadRequest
{
InputStream = stream,
BucketName = _options.BucketName,
Key = s3Key,
AutoCloseStream = true,
Headers = { ContentType = contentType }
};
await transferUtility.UploadAsync(uploadRequest, cancellationToken);
_logger.LogDebug("Returning result");
var responseStream = new MemoryStream(data.Array ?? throw new NullReferenceException(), data.Offset, data.Count, false, true);
return new S3CacheResult(responseStream, contentType, "MISS");
}
}
private static string ComputeKey(string prefix, byte[] key)
{
var builder = new HashBasedPathBuilder();
var hashKey = builder.GetRelativePathFromHash(key);
return $"{prefix}/{hashKey}".Trim('/');
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Amazon.S3;
using Imazen.Common.Extensibility.StreamCache;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
namespace ImageflowS3Cache
{
public static class S3CacheServiceExtensions
{
public static IServiceCollection AddImageflowS3Cache(this IServiceCollection services, Func<IAmazonS3> s3ClientFactory, S3CacheOptions options)
{
services.AddSingleton<IStreamCache>((container) =>
{
var logger = container.GetRequiredService<ILogger<S3CacheService>>();
return new S3CacheService(s3ClientFactory, options, logger);
});
return services;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment