Skip to content

Instantly share code, notes, and snippets.

@n0099
Created February 8, 2023 21:02
Show Gist options
  • Save n0099/8a8cb3899aee9099192dbd192548a3cd to your computer and use it in GitHub Desktop.
Save n0099/8a8cb3899aee9099192dbd192548a3cd to your computer and use it in GitHub Desktop.
using System.Text.Encodings.Web;
using static System.Text.Json.JsonSerializer;
namespace tbm.Crawler;
public class MigrationWorker : BackgroundService
{
private readonly ILogger<MigrationWorker> _logger;
private readonly ILifetimeScope _scope0;
public MigrationWorker(ILogger<MigrationWorker> logger, ILifetimeScope scope0)
{
_logger = logger;
_scope0 = scope0;
}
private static readonly JsonSerializerOptions JsonSerializerOptions = new()
{
IncludeFields = true,
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
// DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault
};
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
await using var scope1 = _scope0.BeginLifetimeScope();
var db = scope1.Resolve<TbmDbContext.New>()(0);
var fids = from f in db.Forum select f.Fid;
foreach (var fid in fids)
{
stoppingToken.ThrowIfCancellationRequested();
_logger.LogInformation("converting for fid:{} started", fid);
Convert(fid, stoppingToken);
_logger.LogInformation("converting for fid:{} finished", fid);
}
Environment.Exit(0);
}
private void Convert(Fid fid, CancellationToken stoppingToken)
{
using var scope1 = _scope0.BeginLifetimeScope();
var db = scope1.Resolve<TbmDbContext.New>()(fid);
var db2 = scope1.Resolve<TbmDbContext.New>()(fid);
var replies = from p in db.ReplyContents where p.Content != null select p;
var i = 0;
using var process = Process.GetCurrentProcess();
var stopwatch = new Stopwatch();
stopwatch.Start();
var exceptions = new Dictionary<string, (uint times, ulong pid, string content)>();
var repliesWithImage = new List<ReplyPost>(10000);
void SaveAndLog()
{
ReplySaver.SaveReplyContentImages(db2, repliesWithImage);
var imagesInserted = db2.SaveChanges();
db2.ChangeTracker.Clear();
repliesWithImage.Clear();
repliesWithImage.EnsureCapacity(10000);
_logger.LogTrace("i:{} imagesInserted:{} elapsed:{}ms mem:{}mb exceptions:{}",
i, imagesInserted,
stopwatch.ElapsedMilliseconds,
process.PrivateMemorySize64 / 1024 / 1024,
Serialize(exceptions, JsonSerializerOptions));
stopwatch.Restart();
}
foreach (var reply in replies.AsNoTracking())
{
i++;
if (i % 10000 == 0) SaveAndLog();
if (stoppingToken.IsCancellationRequested) break;
if (reply.Content == null) continue;
var content = PostContentWrapper.Parser.ParseFrom(reply.Content).Value;
repliesWithImage.Add(new() {Pid = reply.Pid, OriginalContents = content});
}
SaveAndLog();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment