Last active
November 4, 2020 12:50
-
-
Save ikskuh/f7905331e30660930bdb4b47b0d127e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
// debug switch to print all found anagrams | |
const print_all_results = false; | |
pub fn main() !void { | |
// arena for fast allocations | |
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | |
defer arena.deinit(); | |
var allocator = &arena.allocator; | |
// the content of the file | |
var dictionary_contents = try std.fs.cwd().readFileAlloc(allocator, "unixdict.txt", 1_000_000); | |
defer allocator.free(dictionary_contents); | |
// a set of words per anagram identifier | |
// we store a anagram identifier as a key (sorted word, so length remains right, but characters are ordered lexically) | |
// the value is a unmanaged array list (saves some memory compared to anormal array list) | |
// of all words for the anagram identifier | |
var words = std.StringHashMap(std.ArrayListUnmanaged([]const u8)).init(allocator); | |
defer { | |
var iter = words.iterator(); | |
while (iter.next()) |kv| { | |
kv.value.deinit(allocator); | |
} | |
words.deinit(); | |
} | |
// Make sure we have enough space for some words already | |
try words.ensureCapacity(100_000); | |
var max_length: usize = 0; | |
// iterate over all lines in dictionary_contents | |
{ | |
// temporary buffer for sorting the word | |
// this is mutable and will be overwritten per word | |
var word_buffer: [256]u8 = undefined; | |
var lines = std.mem.split(dictionary_contents, "\n"); | |
while (lines.next()) |word| { | |
// copy the word to a temporary buffer so we can sort it without | |
// modifying the original source, then store the sorted version | |
std.mem.copy(u8, &word_buffer, word); | |
const sorted_word = word_buffer[0..word.len]; | |
std.sort.sort(u8, sorted_word, {}, struct { | |
fn lessThan(ctx: void, lhs: u8, rhs: u8) bool { | |
return lhs < rhs; | |
} | |
}.lessThan); | |
// Get a existing or create a new list for this anagram identifier | |
var list = if (words.getEntry(sorted_word)) |kv| &kv.value else blk: { | |
var sorted_word_alloc = try allocator.dupe(u8, sorted_word); | |
const kv = try words.getOrPut(sorted_word_alloc); | |
std.debug.assert(kv.found_existing == false); | |
kv.entry.value = std.ArrayListUnmanaged([]const u8){}; | |
break :blk &kv.entry.value; | |
}; | |
// Push the word into the list, then check if the current list got longer than | |
// the previously found longest list. If so: Keep the new length | |
try list.append(allocator, word); | |
if (max_length < list.items.len) { | |
max_length = list.items.len; | |
} | |
} | |
} | |
// Print out all found results, in a unsorted manner | |
{ | |
var stdout = std.io.getStdOut().writer(); | |
var buckets = words.iterator(); | |
while (buckets.next()) |bucket| { | |
if (print_all_results) { | |
if (bucket.value.items.len <= 1) | |
continue; | |
} else { | |
if (bucket.value.items.len != max_length) | |
continue; | |
} | |
for (bucket.value.items) |word, i| { | |
if (i > 0) { | |
try stdout.print(", {}", .{word}); | |
} else { | |
try stdout.print("{}", .{word}); | |
} | |
} | |
try stdout.print("\n", .{}); | |
} | |
} | |
} | |
// Output: | |
// evil, levi, live, veil, vile | |
// caret, carte, cater, crate, trace | |
// abel, able, bale, bela, elba | |
// alger, glare, lager, large, regal | |
// angel, angle, galen, glean, lange | |
// elan, lane, lean, lena, neal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Performance counter stats for './zig-cache/bin/anagrams': | |
18,91 msec task-clock:u # 0,946 CPUs utilized | |
0 context-switches:u # 0,000 K/sec | |
0 cpu-migrations:u # 0,000 K/sec | |
2.114 page-faults:u # 0,112 M/sec | |
29.748.027 cycles:u # 1,573 GHz | |
1.572.739 stalled-cycles-frontend:u # 5,29% frontend cycles idle | |
12.228.844 stalled-cycles-backend:u # 41,11% backend cycles idle | |
20.443.025 instructions:u # 0,69 insn per cycle | |
# 0,60 stalled cycles per insn | |
4.178.738 branches:u # 220,942 M/sec | |
313.344 branch-misses:u # 7,50% of all branches | |
0,019999471 seconds time elapsed | |
0,006689000 seconds user | |
0,013379000 seconds sys |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment