Skip to content

Instantly share code, notes, and snippets.

@run-dlang
Created October 15, 2023 03:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save run-dlang/6d603756c64b37aec370d215692ac930 to your computer and use it in GitHub Desktop.
Save run-dlang/6d603756c64b37aec370d215692ac930 to your computer and use it in GitHub Desktop.
Code shared from run.dlang.io.
import std.stdio : writeln, toFile;
import std.datetime.stopwatch : StopWatch, AutoStart;
import asdf.serialization : deserialize, serializeToJson;
import std.file : readText;
enum TopN = 5;
struct Post
{
string _id;
string title;
string[] tags;
}
struct RelatedPosts
{
string _id;
string[] tags;
Post[TopN] related;
}
struct PostIdxAndRelatedCount
{
size_t postIdx;
ubyte relatedCount;
}
void main()
{
auto jsonText = readText("../posts.json");
auto posts = deserialize!(Post[])(jsonText);
auto sw = StopWatch(AutoStart.yes);
auto relatedPosts = new RelatedPosts[posts.length];
size_t[][string] tagMap;
foreach (i, ref const post; posts)
foreach (tag; post.tags)
tagMap[tag] ~= i;
auto relatedCounts = new ubyte[posts.length];
foreach (const myPostIdx, ref post; posts)
{
relatedCounts[] = 0;
foreach (tag; post.tags)
foreach (idx; tagMap[tag])
relatedCounts[idx]++;
relatedCounts[myPostIdx] = 0; // exclude ourselves from consideration
PostIdxAndRelatedCount[TopN] topn;
auto minRelatedCount = 0;
foreach (postIdx, relatedCount; relatedCounts)
{
if (relatedCount > minRelatedCount)
{
// find our insertion loc, shifting lower values to the right as we go
auto loc = topn.length - 1U;
for (; loc > 0 && relatedCount > topn[loc - 1].relatedCount; --loc)
topn[loc] = topn[loc - 1];
topn[loc] = PostIdxAndRelatedCount(postIdx, relatedCount);
minRelatedCount = topn[$ - 1].relatedCount;
}
}
auto rp = &relatedPosts[myPostIdx];
rp._id = post._id;
rp.tags = post.tags;
foreach (i; 0 .. rp.related.length)
rp.related[i] = posts[topn[i].postIdx];
}
sw.stop();
writeln("Processing time (w/o IO): ", sw.peek.total!"usecs" * 1.0 / 1000, "ms");
toFile(serializeToJson(relatedPosts), "../related_posts_d.json");
}k
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment