-
-
Save Sahnvour/219eac3edc59b688ff0784eef9c0b0fa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
// debug switch to print all found anagrams | |
const print_all_results = false; | |
fn hashAnagram(word: []const u8) u64 { | |
var h = @as(u64, 1); | |
for (word) |c| { | |
const p = primes[c]; | |
// std.debug.assert(p != 0); | |
// std.debug.print("{} {c}={} {}\n", .{c, c, p, h}); | |
h *%= p; | |
} | |
return h; | |
} | |
fn identity(x: u64) u64 { | |
return x; | |
} | |
fn eqlu64(a: u64, b: u64) bool { | |
return a == b; | |
} | |
const primes = [_]u32{ | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 157, 163, // ' and & | |
0, 0, 0, 0, 0, 0, 167, 0, // . | |
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, // 0 to 9 | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, | |
// a to z | |
31, | |
37, 41, 43, 47, 53, 59, 61, 67, 71, 73, | |
79, 83, 89, 97, 101, 103, 107, 109, 113, 127, | |
131, 137, 139, 149, 151, | |
}; | |
pub fn main() !void { | |
// arena for fast allocations | |
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | |
defer arena.deinit(); | |
var allocator = &arena.allocator; | |
// the content of the file | |
var dict = try std.fs.cwd().readFileAlloc(allocator, "unixdict.txt", 1_000_000); | |
defer allocator.free(dict); | |
// a set of words per anagram identifier | |
// we store a anagram identifier as a key (sorted word, so length remains right, but characters are ordered lexically) | |
// the value is a unmanaged array list (saves some memory compared to anormal array list) | |
// of all words for the anagram identifier | |
var anagrams = std.hash_map.HashMapUnmanaged(u64, std.ArrayListUnmanaged([]const u8), identity, eqlu64, 80){}; | |
var longest_len: usize = 0; | |
// Make sure we have enough space for some words already | |
var lines = std.mem.split(dict, "\n"); | |
while (lines.next()) |word| { | |
const h = hashAnagram(word); | |
// Find or create an anagram list for this word. | |
const result = try anagrams.getOrPut(allocator, h); | |
if (!result.found_existing) { | |
result.entry.value = .{}; | |
} | |
const list = &result.entry.value; | |
// Add this word to its anagram list and update longest list variable. | |
try list.append(allocator, word); | |
longest_len = std.math.max(longest_len, list.items.len); | |
} | |
// Print the longest lists of words that are anagrams of each other. | |
const stdout = std.io.getStdOut().writer(); | |
var entries = anagrams.iterator(); | |
while (entries.next()) |entry| { | |
const items = entry.value.items; | |
if (items.len != longest_len) continue; | |
try stdout.writeAll(items[0]); | |
for (items[1..]) |item| { | |
try stdout.print(", {}", .{item}); | |
} | |
try stdout.writeAll("\n"); | |
} | |
} | |
// Output: | |
// angel, angle, galen, glean, lange | |
// caret, carte, cater, crate, trace | |
// alger, glare, lager, large, regal | |
// evil, levi, live, veil, vile | |
// elan, lane, lean, lena, neal | |
// abel, able, bale, bela, elba |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment