Skip to content

Instantly share code, notes, and snippets.

@Sahnvour
Forked from ikskuh/anagrams.zig
Last active November 4, 2020 13:01
Show Gist options
  • Save Sahnvour/219eac3edc59b688ff0784eef9c0b0fa to your computer and use it in GitHub Desktop.
Save Sahnvour/219eac3edc59b688ff0784eef9c0b0fa to your computer and use it in GitHub Desktop.
const std = @import("std");
// debug switch to print all found anagrams
const print_all_results = false;
fn hashAnagram(word: []const u8) u64 {
var h = @as(u64, 1);
for (word) |c| {
const p = primes[c];
// std.debug.assert(p != 0);
// std.debug.print("{} {c}={} {}\n", .{c, c, p, h});
h *%= p;
}
return h;
}
fn identity(x: u64) u64 {
return x;
}
fn eqlu64(a: u64, b: u64) bool {
return a == b;
}
const primes = [_]u32{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 157, 163, // ' and &
0, 0, 0, 0, 0, 0, 167, 0, // .
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, // 0 to 9
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
// a to z
31,
37, 41, 43, 47, 53, 59, 61, 67, 71, 73,
79, 83, 89, 97, 101, 103, 107, 109, 113, 127,
131, 137, 139, 149, 151,
};
pub fn main() !void {
// arena for fast allocations
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
var allocator = &arena.allocator;
// the content of the file
var dict = try std.fs.cwd().readFileAlloc(allocator, "unixdict.txt", 1_000_000);
defer allocator.free(dict);
// a set of words per anagram identifier
// we store a anagram identifier as a key (sorted word, so length remains right, but characters are ordered lexically)
// the value is a unmanaged array list (saves some memory compared to anormal array list)
// of all words for the anagram identifier
var anagrams = std.hash_map.HashMapUnmanaged(u64, std.ArrayListUnmanaged([]const u8), identity, eqlu64, 80){};
var longest_len: usize = 0;
// Make sure we have enough space for some words already
var lines = std.mem.split(dict, "\n");
while (lines.next()) |word| {
const h = hashAnagram(word);
// Find or create an anagram list for this word.
const result = try anagrams.getOrPut(allocator, h);
if (!result.found_existing) {
result.entry.value = .{};
}
const list = &result.entry.value;
// Add this word to its anagram list and update longest list variable.
try list.append(allocator, word);
longest_len = std.math.max(longest_len, list.items.len);
}
// Print the longest lists of words that are anagrams of each other.
const stdout = std.io.getStdOut().writer();
var entries = anagrams.iterator();
while (entries.next()) |entry| {
const items = entry.value.items;
if (items.len != longest_len) continue;
try stdout.writeAll(items[0]);
for (items[1..]) |item| {
try stdout.print(", {}", .{item});
}
try stdout.writeAll("\n");
}
}
// Output:
// angel, angle, galen, glean, lange
// caret, carte, cater, crate, trace
// alger, glare, lager, large, regal
// evil, levi, live, veil, vile
// elan, lane, lean, lena, neal
// abel, able, bale, bela, elba
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment