Skip to content

Instantly share code, notes, and snippets.

@n0099
Last active June 16, 2023 01:04
Show Gist options
  • Save n0099/f313d2eb601111ee3a341dbf591f6dbe to your computer and use it in GitHub Desktop.
Save n0099/f313d2eb601111ee3a341dbf591f6dbe to your computer and use it in GitHub Desktop.
using System.Diagnostics;
using System.IO.Hashing;
using System.Text.Encodings.Web;
namespace tbm.ImagePipeline;
public class MigrationWorker : BackgroundService
{
private readonly ILogger<MigrationWorker> _logger;
private readonly ILifetimeScope _scope0;
private readonly IHostApplicationLifetime _applicationLifetime;
public MigrationWorker(ILogger<MigrationWorker> logger, ILifetimeScope scope0, IHostApplicationLifetime applicationLifetime)
{
_logger = logger;
_scope0 = scope0;
_applicationLifetime = applicationLifetime;
}
private static readonly JsonSerializerOptions JsonSerializerOptions = new()
{
IncludeFields = true,
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
// DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault
};
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
await using var scope1 = _scope0.BeginLifetimeScope();
var db = scope1.Resolve<ImagePipelineDbContext.NewDefault>()();
var db2 = scope1.Resolve<ImagePipelineDbContext.NewDefault>()();
var existingEntities =
from e in db.Set<ImageMetadata.Exif>().AsNoTracking() select e;
var i = 0;
using var process = Process.GetCurrentProcess();
var stopwatch = new Stopwatch();
stopwatch.Start();
var exceptions = new Dictionary<string, (uint Times, ulong LastId, string StackTrace)>();
var newEntities = new List<ImageMetadata.Exif>();
void SaveAndLog()
{
db2.Set<ImageMetadata.Exif>().UpdateRange(newEntities);
foreach (var e in db2.ChangeTracker.Entries<ImageMetadata.Exif>())
{
e.Property(nameof(ImageMetadata.Exif.RawBytes)).IsModified = false;
}
var entitiesUpdated = db2.SaveChanges();
newEntities.Clear();
db2.ChangeTracker.Clear();
_logger.LogTrace("i:{} entitiesUpdated:{} elapsed:{}ms mem:{}mb exceptions:{}",
i, entitiesUpdated,
stopwatch.ElapsedMilliseconds,
process.PrivateMemorySize64 / 1024 / 1024,
JsonSerializer.Serialize(exceptions, JsonSerializerOptions));
stopwatch.Restart();
}
foreach (var entity in existingEntities)
{
i++;
if (i % 10000 == 0) SaveAndLog();
if (stoppingToken.IsCancellationRequested) break;
try
{
var newEntity = MetadataConsumer.CreateEmbeddedExifFromProfile(new(entity.RawBytes));
newEntity.ImageId = entity.ImageId;
newEntity.XxHash3 = XxHash3.HashToUInt64(entity.RawBytes);
newEntities.Add(newEntity);
}
catch (Exception e)
{
var eKey = e.GetType().FullName + ": " + e.Message;
if (!exceptions.TryAdd(eKey, (1, entity.ImageId, e.StackTrace ?? "")))
{
var ex = exceptions[eKey];
ex.Times++;
ex.LastId = entity.ImageId;
ex.StackTrace = e.StackTrace ?? "";
exceptions[eKey] = ex;
}
}
}
SaveAndLog();
_applicationLifetime.StopApplication();
}
}
namespace tbm.Crawler.Db;
public class ReplyContentImage : IEquatable<ReplyContentImage>
{
public ulong Pid { get; set; }
public string UrlFilename { get; set; } = "";
public bool Equals(ReplyContentImage? other)
{
if (other is null) return false;
if (ReferenceEquals(this, other)) return true;
return Pid == other.Pid
&& UrlFilename == other.UrlFilename;
}
public override bool Equals(object? obj)
{
if (obj is null) return false;
if (ReferenceEquals(this, obj)) return true;
return obj.GetType() == GetType() && Equals((ReplyContentImage)obj);
}
public override int GetHashCode() => HashCode.Combine(Pid, UrlFilename);
}
CREATE TABLE `tbmc_reply_content_image` (
`pid` bigint unsigned NOT NULL,
`urlFilename` varchar(40) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
PRIMARY KEY (`pid`,`urlFilename`),
KEY `urlFilename` (`urlFilename`)
);
create table tbm.tbmi_metadata_embedded_exif
(
imageId int unsigned not null
primary key,
orientation text null,
imageDescription text null,
userComment text null,
artist text null,
xpAuthor text null,
copyright text null,
imageUniqueId text null,
bodySerialNumber text null,
make text null,
model text null,
software text null,
customRendered smallint null,
dateTime datetime null,
dateTimeOffset text null,
dateTimeDigitized datetime null,
dateTimeDigitizedOffset text null,
dateTimeOriginal datetime null,
dateTimeOriginalOffset text null,
offsetTime text null,
offsetTimeDigitized text null,
offsetTimeOriginal text null,
gpsDateTime datetime null,
gpsCoordinate point null,
gpsImgDirection float null,
gpsImgDirectionRef text null,
xxHash3 bigint unsigned not null,
rawBytes blob null
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment