Skip to content

Instantly share code, notes, and snippets.

@ikskuh
Last active November 4, 2020 12:50
Show Gist options
  • Save ikskuh/f7905331e30660930bdb4b47b0d127e9 to your computer and use it in GitHub Desktop.
Save ikskuh/f7905331e30660930bdb4b47b0d127e9 to your computer and use it in GitHub Desktop.
const std = @import("std");
// debug switch to print all found anagrams
const print_all_results = false;
pub fn main() !void {
// arena for fast allocations
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
var allocator = &arena.allocator;
// the content of the file
var dictionary_contents = try std.fs.cwd().readFileAlloc(allocator, "unixdict.txt", 1_000_000);
defer allocator.free(dictionary_contents);
// a set of words per anagram identifier
// we store a anagram identifier as a key (sorted word, so length remains right, but characters are ordered lexically)
// the value is a unmanaged array list (saves some memory compared to anormal array list)
// of all words for the anagram identifier
var words = std.StringHashMap(std.ArrayListUnmanaged([]const u8)).init(allocator);
defer {
var iter = words.iterator();
while (iter.next()) |kv| {
kv.value.deinit(allocator);
}
words.deinit();
}
// Make sure we have enough space for some words already
try words.ensureCapacity(100_000);
var max_length: usize = 0;
// iterate over all lines in dictionary_contents
{
// temporary buffer for sorting the word
// this is mutable and will be overwritten per word
var word_buffer: [256]u8 = undefined;
var lines = std.mem.split(dictionary_contents, "\n");
while (lines.next()) |word| {
// copy the word to a temporary buffer so we can sort it without
// modifying the original source, then store the sorted version
std.mem.copy(u8, &word_buffer, word);
const sorted_word = word_buffer[0..word.len];
std.sort.sort(u8, sorted_word, {}, struct {
fn lessThan(ctx: void, lhs: u8, rhs: u8) bool {
return lhs < rhs;
}
}.lessThan);
// Get a existing or create a new list for this anagram identifier
var list = if (words.getEntry(sorted_word)) |kv| &kv.value else blk: {
var sorted_word_alloc = try allocator.dupe(u8, sorted_word);
const kv = try words.getOrPut(sorted_word_alloc);
std.debug.assert(kv.found_existing == false);
kv.entry.value = std.ArrayListUnmanaged([]const u8){};
break :blk &kv.entry.value;
};
// Push the word into the list, then check if the current list got longer than
// the previously found longest list. If so: Keep the new length
try list.append(allocator, word);
if (max_length < list.items.len) {
max_length = list.items.len;
}
}
}
// Print out all found results, in a unsorted manner
{
var stdout = std.io.getStdOut().writer();
var buckets = words.iterator();
while (buckets.next()) |bucket| {
if (print_all_results) {
if (bucket.value.items.len <= 1)
continue;
} else {
if (bucket.value.items.len != max_length)
continue;
}
for (bucket.value.items) |word, i| {
if (i > 0) {
try stdout.print(", {}", .{word});
} else {
try stdout.print("{}", .{word});
}
}
try stdout.print("\n", .{});
}
}
}
// Output:
// evil, levi, live, veil, vile
// caret, carte, cater, crate, trace
// abel, able, bale, bela, elba
// alger, glare, lager, large, regal
// angel, angle, galen, glean, lange
// elan, lane, lean, lena, neal
Performance counter stats for './zig-cache/bin/anagrams':
18,91 msec task-clock:u # 0,946 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
2.114 page-faults:u # 0,112 M/sec
29.748.027 cycles:u # 1,573 GHz
1.572.739 stalled-cycles-frontend:u # 5,29% frontend cycles idle
12.228.844 stalled-cycles-backend:u # 41,11% backend cycles idle
20.443.025 instructions:u # 0,69 insn per cycle
# 0,60 stalled cycles per insn
4.178.738 branches:u # 220,942 M/sec
313.344 branch-misses:u # 7,50% of all branches
0,019999471 seconds time elapsed
0,006689000 seconds user
0,013379000 seconds sys
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment