Skip to content

Instantly share code, notes, and snippets.

@n0099
Last active July 28, 2022 21:14
Show Gist options
  • Save n0099/ea4f30e41f5db9b750f9fa5a39e61e41 to your computer and use it in GitHub Desktop.
Save n0099/ea4f30e41f5db9b750f9fa5a39e61e41 to your computer and use it in GitHub Desktop.
tbm migration
using System.Buffers;
using System.Text.Encodings.Web;
using System.Text.Json.Nodes;
using System.Text.RegularExpressions;
namespace tbm.Crawler
{
public class ConvertPostContentToProtoBufWorker : BackgroundService
{
private readonly ILogger<ConvertPostContentToProtoBufWorker> _logger;
public ConvertPostContentToProtoBufWorker(ILogger<ConvertPostContentToProtoBufWorker> logger) => _logger = logger;
private static readonly JsonSerializerOptions JsonSerializerOptions = new() { IncludeFields = true, Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping};
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
/*
await using var scope = Program.Autofac.BeginLifetimeScope();
var db = scope.Resolve<TbmDbContext.New>()(0);
var fids = from f in db.ForumsInfo select f.Fid;
foreach (var fid in fids)
{
_logger.LogInformation("converting for fid:{} started", fid);
await Convert2(fid);
_logger.LogInformation("converting for fid:{} finished", fid);
}
*/
await Convert4();
Environment.Exit(0);
}
private static readonly Regex PortraitExtractingRegex = new(@"^(.*?)\?t=(\d+)$", RegexOptions.Compiled, TimeSpan.FromSeconds(1));
private async Task Convert4()
{
await using var scope = Program.Autofac.BeginLifetimeScope();
var db = scope.Resolve<TbmDbContext.New>()(0);
var db2 = scope.Resolve<TbmDbContext.New>()(0);
var users = from u in db.Users where u.PortraitUpdateTime == null select new {u.Uid, u.Portrait};
var i = 0;
using var process = Process.GetCurrentProcess();
var stopwatch = new Stopwatch();
stopwatch.Start();
var exceptions = new Dictionary<string, (uint times, long uid)>();
var newUsers = new List<TiebaUser>();
void SaveAndLog()
{
db2.Users.AttachRange(newUsers);
foreach (var e in db2.ChangeTracker.Entries<TiebaUser>())
{
e.Property(nameof(TiebaUser.Portrait)).IsModified = true;
e.Property(nameof(TiebaUser.PortraitUpdateTime)).IsModified = true;
}
db2.SaveChangesWithoutTimestamping();
newUsers.Clear();
db2.ChangeTracker.Clear();
_logger.LogTrace("i:{} elapsed:{}ms mem:{}kb exceptions:{}", i,
stopwatch.ElapsedMilliseconds,
process.PrivateMemorySize64 / 1024,
JsonSerializer.Serialize(exceptions, JsonSerializerOptions));
stopwatch.Restart();
}
foreach (var user in users.AsNoTracking())
{
i++;
if (i % 10000 == 0) SaveAndLog();
try
{
static (string Portrait, uint? UpdateTime) ExtractPortrait(string portrait) =>
PortraitExtractingRegex.Match(portrait) is {Success: true} m
? (m.Groups[1].Value, Time.Parse(m.Groups[2].ValueSpan)) : (portrait, null);
var (portrait, updateTime) = ExtractPortrait(user.Portrait);
newUsers.Add(new()
{
Uid = user.Uid,
Portrait = portrait,
PortraitUpdateTime = updateTime
});
}
catch (Exception e)
{
var em = e.GetType().FullName + ": " + e.Message;
if (!exceptions.TryAdd(em, (1, user.Uid)))
{
var a = exceptions[em];
a.times++;
a.uid = user.Uid;
exceptions[em] = a;
}
// _logger.LogError(e, "exception spid:{} content:{}", subReply.Spid, subReply.Content);
}
}
SaveAndLog();
}
private async Task Convert3()
{
await using var scope = Program.Autofac.BeginLifetimeScope();
var db = scope.Resolve<TbmDbContext.New>()(0);
var db2 = scope.Resolve<TbmDbContext.New>()(0);
var users = from u in db.Users where u.IconInfo != null select new {u.Uid, u.IconInfo};
var i = 0;
using var process = Process.GetCurrentProcess();
var stopwatch = new Stopwatch();
stopwatch.Start();
var exceptions = new Dictionary<string, (uint times, long uid)>();
var newUsers = new List<TiebaUser>();
void SaveAndLog()
{
db2.Users.AttachRange(newUsers);
foreach (var e in db2.ChangeTracker.Entries<TiebaUser>())
{
e.Property(nameof(TiebaUser.IconInfo)).IsModified = true;
}
db2.SaveChangesWithoutTimestamping();
newUsers.Clear();
db2.ChangeTracker.Clear();
_logger.LogTrace("i:{} elapsed:{}ms mem:{}kb exceptions:{}", i,
stopwatch.ElapsedMilliseconds,
process.PrivateMemorySize64 / 1024,
JsonSerializer.Serialize(exceptions, JsonSerializerOptions));
stopwatch.Restart();
}
foreach (var user in users.AsNoTracking())
{
i++;
if (i % 10000 == 0) SaveAndLog();
try
{
var json = "{\"value\":" + Encoding.UTF8.GetString(user.IconInfo ?? ReadOnlySpan<byte>.Empty) + "}";
var proto = new JsonParser(JsonParser.Settings.Default.WithIgnoreUnknownFields(true)).Parse<UserIconWrapper>(json);
newUsers.Add(new()
{
Uid = user.Uid,
IconInfo = proto.ToByteArray()
});
}
catch (Exception e)
{
var em = e.GetType().FullName + ": " + e.Message;
if (!exceptions.TryAdd(em, (1, user.Uid)))
{
var a = exceptions[em];
a.times++;
a.uid = user.Uid;
exceptions[em] = a;
}
// _logger.LogError(e, "exception spid:{} content:{}", subReply.Spid, subReply.Content);
}
}
SaveAndLog();
}
private async Task Convert2(Fid fid)
{
await using var scope = Program.Autofac.BeginLifetimeScope();
var db = scope.Resolve<TbmDbContext.New>()(fid);
var db2 = scope.Resolve<TbmDbContext.New>()(fid);
var replies = from p in db.Replies where p.Location != null select new {p.Pid, p.Location};
var i = 0;
using var process = Process.GetCurrentProcess();
var stopwatch = new Stopwatch();
stopwatch.Start();
var exceptions = new Dictionary<string, (uint times, ulong pid, string content)>();
var newReplies = new List<ReplyPost>();
void SaveAndLog()
{
db2.Replies.AttachRange(newReplies);
foreach (var e in db2.ChangeTracker.Entries<ReplyPost>())
{
// e.State = EntityState.Modified;
e.Property(nameof(ReplyPost.Location)).IsModified = true;
}
db2.SaveChangesWithoutTimestamping();
newReplies.Clear();
db2.ChangeTracker.Clear();
_logger.LogTrace("i:{} elapsed:{}ms mem:{}kb exceptions:{}", i,
stopwatch.ElapsedMilliseconds,
process.PrivateMemorySize64 / 1024,
JsonSerializer.Serialize(exceptions, JsonSerializerOptions));
stopwatch.Restart();
}
foreach (var reply in replies.AsNoTracking())
{
i++;
if (i % 10000 == 0) SaveAndLog();
try
{
var proto = new JsonParser(JsonParser.Settings.Default.WithIgnoreUnknownFields(true)).Parse<TbClient.Post.Common.Lbs>(Encoding.UTF8.GetString(reply.Location ?? ReadOnlySpan<byte>.Empty));
newReplies.Add(new()
{
Pid = reply.Pid,
Location = proto.ToByteArray()
});
}
catch (Exception e)
{
var em = e.GetType().FullName + ": " + e.Message;
if (!exceptions.TryAdd(em, (1, reply.Pid, "")))
{
var a = exceptions[em];
a.times++;
a.pid = reply.Pid;
a.content = "";
exceptions[em] = a;
}
// _logger.LogError(e, "exception spid:{} content:{}", subReply.Spid, subReply.Content);
}
}
SaveAndLog();
}
private async Task Convert(Fid fid)
{
await using var scope = Program.Autofac.BeginLifetimeScope();
var db = scope.Resolve<TbmDbContext.New>()(fid);
var db2 = scope.Resolve<TbmDbContext.New>()(fid);
var replies = from p in db.SubReplies where db.SubReplyContents.All(p2 => p2.Spid != p.Spid) select new {p.Spid, p.Content};
var i = 0;
using var process = Process.GetCurrentProcess();
var stopwatch = new Stopwatch();
stopwatch.Start();
var subReplyContents = new List<SubReplyContent>();
var exceptions = new Dictionary<string, (uint times, ulong spid, string content)>();
void SaveAndLog()
{
db2.SubReplyContents.AddRange(subReplyContents);
db2.SaveChangesWithoutTimestamping();
db2.ChangeTracker.Clear();
subReplyContents.Clear();
_logger.LogTrace("i:{} elapsed:{}ms mem:{}kb exceptions:{}", i,
stopwatch.ElapsedMilliseconds,
process.PrivateMemorySize64 / 1024,
JsonSerializer.Serialize(exceptions, JsonSerializerOptions));
stopwatch.Restart();
}
foreach (var subReply in replies.AsNoTracking())
{
i++;
if (i % 10000 == 0) SaveAndLog();
try
{
byte[]? content = null;
if (subReply.Content != null)
{
var useJsonSerialize = false;
var sanitizedJson = "{\"value\":" + subReply.Content + "}";
if (useJsonSerialize)
{
var node = JsonNode.Parse(sanitizedJson);
if (node?["value"] == null) throw new("value is null");
var valueNode = node["value"]?.AsArray();
if (valueNode == null) throw new("value is null");
node["value"] = new JsonArray(valueNode.Select(n =>
{
if (n is JsonValue n2 && n2.GetValue<string>() == "") return null;
var i2 = n?.AsObject();
if (i2 == null) return null;
if (i2.TryGetPropertyValue("native_app", out var nativeApp)
&& nativeApp is JsonArray && nativeApp.AsArray().Count == 0)
i2.Remove("native_app");
i2.Aggregate(new HashSet<string>(), (acc, i3) =>
{
if (i3.Value is JsonArray && i3.Value.AsArray().Count == 0) acc.Add(i3.Key);
return acc;
}).ForEach(key => i2.Remove(key));
/*
if (i2.TryGetPropertyValue("size", out var size)
&& i2.TryGetPropertyValue("original_size", out var originalSize)
&& size != null && originalSize != null
&& size.AsValue().TryGetValue<int>(out var sizeValue)
&& originalSize.AsValue().TryGetValue<int>(out var originalSizeValue)
&& sizeValue != originalSizeValue)
_logger.LogError("size mismatched, size:{} original_size:{}", sizeValue, originalSizeValue);
*/
return i2.Deserialize<JsonNode>();
}).OfType<JsonNode>().ToArray());
sanitizedJson = node["value"]?.AsArray().Count == 0 ? null : node?.ToJsonString();
}
var contentProtoBuf = new JsonParser(JsonParser.Settings.Default.WithIgnoreUnknownFields(true)).Parse<PostContentWrapper>(sanitizedJson).ToByteArray();
content = contentProtoBuf;
}
subReplyContents.Add(new()
{
Spid = subReply.Spid,
Content = content
});
}
catch (Exception e)
{
var em = e.GetType().FullName + ": " + e.Message;
if (!exceptions.TryAdd(em, (1, subReply.Spid, "")))
{
var a = exceptions[em];
a.times++;
a.spid = subReply.Spid;
a.content = "";
exceptions[em] = a;
}
// _logger.LogError(e, "exception spid:{} content:{}", subReply.Spid, subReply.Content);
}
}
SaveAndLog();
}
}
}
ALTER TABLE `tbm_postsIndex` CHANGE `postTime` `postTime` VARCHAR(19) NULL;
UPDATE `tbm_postsIndex` SET `postTime` = UNIX_TIMESTAMP(`postTime`);
ALTER TABLE `tbm_postsIndex` CHANGE `postTime` `postTime` INT UNSIGNED NULL;
ALTER TABLE `tbm_postsIndex` DROP `authorUid`, DROP `isMe0407`, DROP `created_at`, DROP `updated_at`,
CHANGE `type` `type` ENUM('thread','reply','subReply') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL AFTER `id`,
CHANGE `fid` `fid` INT UNSIGNED NOT NULL,
CHANGE `tid` `tid` BIGINT UNSIGNED NOT NULL,
CHANGE `pid` `pid` BIGINT UNSIGNED NOT NULL,
CHANGE `spid` `spid` BIGINT UNSIGNED NOT NULL,
CHANGE `postTime` `postTime` INT UNSIGNED NULL DEFAULT NULL;
UPDATE `tbm_tiebaUsers` SET gender = 0 WHERE gender IS NULL;
ALTER TABLE `tbm_tiebaUsers` MODIFY `gender` TINYINT UNSIGNED NULL DEFAULT NULL,
MODIFY `name` VARCHAR(40) NULL DEFAULT NULL,
MODIFY `displayName` VARCHAR(40) NULL DEFAULT NULL,
CHANGE `avatarUrl` `portrait` VARCHAR(255) not null,
MODIFY `iconInfo` BLOB NULL DEFAULT NULL,
CHANGE `id` `id` INT UNSIGNED NOT NULL AUTO_INCREMENT;
ADD `portraitUpdateTime` INT UNSIGNED NULL DEFAULT NULL AFTER `portrait`,
DROP `privacySettings`, DROP `alaInfo`;
ALTER TABLE `tbm_tiebaUsers` CHANGE `created_at` `created_at` VARCHAR(19) NOT NULL, CHANGE `updated_at` `updated_at` VARCHAR(19) NOT NULL;
UPDATE `tbm_tiebaUsers` SET `created_at` = UNIX_TIMESTAMP(`created_at`), `updated_at` = UNIX_TIMESTAMP(`updated_at`);
ALTER TABLE `tbm_tiebaUsers` CHANGE `created_at` `createdAt` INT UNSIGNED NOT NULL, CHANGE `updated_at` `updatedAt` INT UNSIGNED NOT NULL;
CREATE TABLE `{t_reply}_content` (
`id` int NOT NULL AUTO_INCREMENT,
`pid` bigint NOT NULL,
`content` blob,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `pid` (`pid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=COMPRESSED;
CREATE TABLE `{t_subReply}_content` (
`id` int NOT NULL AUTO_INCREMENT,
`spid` bigint NOT NULL,
`content` blob,
PRIMARY KEY (`id`) USING BTREE,
UNIQUE KEY `spid` (`spid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=COMPRESSED;
ALTER TABLE `{t_reply}` DROP INDEX `floor`, DROP INDEX `postTime`, DROP COLUMN clientVersion, DROP COLUMN content;
ALTER TABLE `{t_thread}` DROP INDEX `postTime`, DROP clientVersion;
ALTER TABLE `{t_subReply}` DROP INDEX `postTime`, DROP COLUMN clientVersion, DROP COLUMN content;
ALTER TABLE `{t_thread}` ADD `agreeNum` INT NOT NULL AFTER `shareNum`, ADD `disagreeNum` INT NOT NULL AFTER `agreeNum`;
UPDATE `{t_thread}` SET agreeNum = agreeInfo->>"$.agree_num", disagreeNum = agreeInfo->>"$.disagree_num" WHERE agreeInfo IS NOT NULL;
ALTER TABLE `{t_thread}` DROP `agreeInfo`;
ALTER TABLE `{t_reply}` ADD `agreeNum` INT NOT NULL AFTER `isFold`, ADD `disagreeNum` INT NOT NULL AFTER `agreeNum`;
UPDATE `{t_reply}` SET agreeNum = agreeInfo->>"$.agree_num", disagreeNum = agreeInfo->>"$.disagree_num" WHERE agreeInfo IS NOT NULL;
ALTER TABLE `{t_reply}` DROP `agreeInfo`;
ALTER TABLE `{t_thread}` CHANGE `postTime` `postTime` VARCHAR(19) NULL DEFAULT NULL, CHANGE `latestReplyTime` `latestReplyTime` VARCHAR(19) NOT NULL, CHANGE `created_at` `created_at` VARCHAR(19) NOT NULL, CHANGE `updated_at` `updated_at` VARCHAR(19) NOT NULL;
UPDATE `{t_thread}` SET `postTime` = UNIX_TIMESTAMP(`postTime`), `latestReplyTime` = UNIX_TIMESTAMP(`latestReplyTime`), `created_at` = UNIX_TIMESTAMP(`created_at`), `updated_at` = UNIX_TIMESTAMP(`updated_at`);
ALTER TABLE `{t_thread}` CHANGE `postTime` `postTime` INT UNSIGNED NULL DEFAULT NULL, CHANGE `latestReplyTime` `latestReplyTime` INT UNSIGNED NOT NULL, CHANGE `created_at` `createdAt` INT UNSIGNED NOT NULL, CHANGE `updated_at` `updatedAt` INT UNSIGNED NOT NULL;
ALTER TABLE `{t_reply}` CHANGE `postTime` `postTime` VARCHAR(19) NOT NULL, CHANGE `created_at` `created_at` VARCHAR(19) NOT NULL, CHANGE `updated_at` `updated_at` VARCHAR(19) NOT NULL;
UPDATE `{t_reply}` SET `postTime` = UNIX_TIMESTAMP(`postTime`), `created_at` = UNIX_TIMESTAMP(`created_at`), `updated_at` = UNIX_TIMESTAMP(`updated_at`);
ALTER TABLE `{t_reply}` CHANGE `postTime` `postTime` INT UNSIGNED NOT NULL, CHANGE `created_at` `createdAt` INT UNSIGNED NOT NULL, CHANGE `updated_at` `updatedAt` INT UNSIGNED NOT NULL;
ALTER TABLE `{t_subReply}` CHANGE `postTime` `postTime` VARCHAR(19) NOT NULL, CHANGE `created_at` `created_at` VARCHAR(19) NOT NULL, CHANGE `updated_at` `updated_at` VARCHAR(19) NOT NULL;
UPDATE `{t_subReply}` SET `postTime` = UNIX_TIMESTAMP(`postTime`), `created_at` = UNIX_TIMESTAMP(`created_at`), `updated_at` = UNIX_TIMESTAMP(`updated_at`);
ALTER TABLE `{t_subReply}` CHANGE `postTime` `postTime` INT UNSIGNED NOT NULL, CHANGE `created_at` `createdAt` INT UNSIGNED NOT NULL, CHANGE `updated_at` `updatedAt` INT UNSIGNED NOT NULL;
ALTER TABLE `{t_thread}`
CHANGE `id` `id` INT UNSIGNED NOT NULL auto_increment FIRST,
CHANGE `tid` `tid` BIGINT UNSIGNED NOT NULL AFTER `id`,
CHANGE `firstPid` `firstPid` BIGINT UNSIGNED NOT NULL AFTER `tid`,
CHANGE `threadType` `threadType` BIGINT UNSIGNED NOT NULL AFTER `firstPid`,
CHANGE `stickyType` `stickyType` TINYTEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL AFTER `threadType`,
CHANGE `topicType` `topicType` TINYTEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL AFTER `stickyType`,
CHANGE `isGood` `isGood` TINYINT NOT NULL AFTER `topicType`,
CHANGE `title` `title` TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL AFTER `isGood`,
CHANGE `authorUid` `authorUid` BIGINT NOT NULL AFTER `title`,
CHANGE `authorManagerType` `authorManagerType` TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL AFTER `authorUid`,
CHANGE `postTime` `postTime` INT UNSIGNED NULL DEFAULT NULL AFTER `authorManagerType`,
CHANGE `latestReplyTime` `latestReplyTime` INT UNSIGNED NOT NULL AFTER `postTime`,
CHANGE `latestReplierUid` `latestReplierUid` BIGINT NULL DEFAULT NULL AFTER `latestReplyTime`,
CHANGE `replyNum` `replyNum` INT UNSIGNED NOT NULL AFTER `latestReplierUid`,
CHANGE `viewNum` `viewNum` INT UNSIGNED NOT NULL AFTER `replyNum`,
CHANGE `shareNum` `shareNum` INT UNSIGNED NULL DEFAULT NULL AFTER `viewNum`,
CHANGE `disagreeNum` `disagreeNum` INT NOT NULL AFTER `agreeNum`,
CHANGE `zanInfo` `zanInfo` JSON NULL DEFAULT NULL AFTER `disagreeNum`,
CHANGE `location` `location` JSON NULL DEFAULT NULL AFTER `zanInfo`,
CHANGE `authorPhoneType` `authorPhoneType` TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL AFTER `location`,
CHANGE `antiSpamInfo` `antiSpamInfo` JSON NULL AFTER authorPhoneType;
ALTER TABLE `{t_reply}` CHANGE `authorExpGrade` `authorExpGrade` TINYINT NULL DEFAULT NULL, CHANGE `isFold` `isFold` TINYINT NOT NULL;
ALTER TABLE `{t_subReply}` CHANGE `authorExpGrade` `authorExpGrade` TINYINT NOT NULL;
ALTER TABLE `{t_reply}` MODIFY tid BIGINT UNSIGNED NOT NULL, MODIFY pid BIGINT UNSIGNED NOT NULL, MODIFY floor INT UNSIGNED NOT NULL, MODIFY authorExpGrade TINYINT UNSIGNED NULL, MODIFY postTime INT UNSIGNED NOT NULL, MODIFY isFold TINYINT UNSIGNED NOT NULL;
ALTER TABLE `{t_subReply}` MODIFY tid BIGINT UNSIGNED NOT NULL, MODIFY pid BIGINT UNSIGNED NOT NULL, MODIFY spid BIGINT UNSIGNED NOT NULL, MODIFY postTime INT UNSIGNED NOT NULL;
ALTER TABLE `{t_reply}` MODIFY `location` BLOB NULL DEFAULT NULL, MODIFY `signInfo` BLOB NULL DEFAULT NULL, MODIFY `tailInfo` BLOB NULL DEFAULT NULL;
ALTER TABLE `{t_thread}` DROP `id`, DROP PRIMARY KEY, ADD `id` INT UNSIGNED NOT NULL AUTO_INCREMENT FIRST, ADD PRIMARY KEY (`id`);
ALTER TABLE `{t_reply}` DROP `id`, DROP PRIMARY KEY, ADD `id` INT UNSIGNED NOT NULL AUTO_INCREMENT FIRST, ADD PRIMARY KEY (`id`);
ALTER TABLE `{t_subReply}` DROP `id`, DROP PRIMARY KEY, ADD `id` INT UNSIGNED NOT NULL AUTO_INCREMENT FIRST, ADD PRIMARY KEY (`id`);
ALTER TABLE `tbm_postsIndex` DROP `id`, DROP PRIMARY KEY, ADD `id` INT UNSIGNED NOT NULL AUTO_INCREMENT FIRST, ADD PRIMARY KEY (`id`);
ALTER TABLE `tbm_tiebaUsers` DROP `id`, DROP PRIMARY KEY, ADD `id` INT UNSIGNED NOT NULL AUTO_INCREMENT FIRST, ADD PRIMARY KEY (`id`);
ALTER TABLE `{t_reply}` MODIFY `signInfo` TEXT NULL DEFAULT NULL;
ALTER TABLE `{t_reply}` MODIFY `signInfo` JSON NULL DEFAULT NULL, ADD `signatureId` INT UNSIGNED NOT NULL AFTER `signInfo`;
UPDATE `{t_reply}` SET signatureId = signInfo->>"$.signature_id" WHERE signInfo IS NOT NULL;
SELECT (SELECT COUNT(*) FROM `{t_reply}` WHERE signInfo IS NOT NULL) UNION ALL (SELECT COUNT(*) FROM `{t_reply}` WHERE signatureId != 0);
SELECT COUNT(DISTINCT `authorUid`) AS uidCount, GROUP_CONCAT(DISTINCT `authorUid`), `signatureId` FROM {t_reply} WHERE signInfo IS NOT NULL GROUP BY signatureId HAVING uidCount != 1;
CREATE TABLE `tbm_reply_signatures` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`userId` BIGINT NOT NULL,
`signatureId` INT UNSIGNED NOT NULL,
`signature` BLOB NOT NULL,
`lastSeen` Int UNSIGNED NOT NULL,
PRIMARY KEY (`id`), INDEX(`userId`), INDEX(`signatureId`)
) ENGINE = InnoDB;
SET SESSION group_concat_max_len = 600000;
INSERT INTO `tbm_reply_signatures` SELECT NULL AS id, userId, signatureId, signature, MAX(updatedAt) AS lastSeen FROM (
SELECT GROUP_CONCAT(DISTINCT `authorUid`) AS userId, `signatureId`, signInfo AS `signature`, updatedAt
FROM `{t_reply}` WHERE signInfo IS NOT NULL GROUP BY signatureId, updatedAt, signInfo
) AS T GROUP BY userId, signature, signatureId;
SELECT COUNT(*) FROM `{t_reply}` WHERE signatureId != 0 AND signatureId NOT IN (SELECT signatureId FROM tbm_reply_signatures);
-- SELECT CONVERT(A.signInfo, CHAR), CONVERT(B.signature, CHAR) FROM `{t_reply}` AS A JOIN tbm_reply_signatures AS B ON A.signatureId = B.signatureId AND A.updatedAt = B.lastSeen WHERE MD5(CONVERT(A.signInfo, CHAR)) != MD5(CONVERT(B.signature, CHAR));
SELECT COUNT(*) FROM {t_reply} AS A WHERE signatureId != 0 AND NOT EXISTS (SELECT 1 FROM tbm_reply_signatures WHERE MD5(signature) = MD5(A.signInfo));
ALTER TABLE `{t_reply}` DROP `signInfo`, MODIFY `signatureId` INT UNSIGNED NULL DEFAULT NULL;
UPDATE {t_reply} SET `signatureId` = NULL WHERE `signatureId` = 0;
UPDATE {t_reply} SET location = NULL WHERE location = "null";
ALTER TABLE `tbm_postsIndex` CHANGE `pid` `pid` BIGINT UNSIGNED NULL DEFAULT NULL, CHANGE `spid` `spid` BIGINT UNSIGNED NULL DEFAULT NULL;
UPDATE tbm_postsIndex SET `pid` = NULL WHERE `pid` = 0;
UPDATE tbm_postsIndex SET `spid` = NULL WHERE `spid` = 0;
ALTER TABLE `{t_reply}` DROP `tailInfo`;
SELECT COUNT(*) FROM {t_reply} WHERE location IS NOT NULL AND location LIKE "{%";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment