Created
July 2, 2019 03:57
-
-
Save gsinclair/6a470a7f58e647d80ee10e5712b0c94e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Word chains (ORAC Challenge Problems 2) | |
* | |
* Build a tree with all the letters in it, marking the word depth and (boolean) | |
* terminator status of each letter. Remember where the leaves are (in a set). Then it's | |
* an easy matter to find the leaf with the greatest word depth, build the correct path, | |
* and emit all the words in that path. | |
* | |
* Attempt 1 failed -- all tests had memory limit exceeded. No memory limit was stated in | |
* the problem. | |
* | |
* Attempt 2 addressed the memory by removing two non-essential items from each node: the | |
* id and the depth. Failed: timeout in every test. | |
* | |
* The only excessive algorithmic complexity I can think of is the linear lookup of the | |
* children of each node. | |
* | |
* Attempt 3 introduces a map of children instead of a vector. The result of this is 'out | |
* of memory' :( | |
* | |
* OK, I think the reason I'm getting out of memory is because I don't need so many nodes. | |
* The problem statement says that all the input words combined will not exceed 2,000,000 | |
* letters. I've been pre-allocating that many nodes. But many of those letters will | |
* already be in the tree as it grows, meaning a new node is not needed all the time. | |
* | |
* I can't know in advance the maximum possible number of nodes required, so I'll have to | |
* use a vector, I suppose. | |
* | |
* Attempt 4: use a vector of nodes instead of a pre-allocated array. This means creating | |
* a constructor for nodes so I can use emplace_back instead of push_back. It also means | |
* removing NEXT_NODE_ID. | |
* | |
* --> score 80% with two timeouts | |
* | |
* I don't know why timeouts would occur. Could be a technicality (ORAC isn't as fast as | |
* the competition server) or it could be that using a map for children isn't as fast as | |
* vector, although I doubt it. | |
* | |
* Attempt 5: remove the LEAVES set and just do a full linear search for the deepest node | |
* after the tree is built. This makes sense, actually. Result: exactly as above. | |
* | |
* Attempt 6: keep track of maximum word depth as we build the tree. These are in the | |
* global variables MAX_WORD_DEPTH and MAX_WORD_DEPTH_ID. Result: exactly as above! | |
* | |
* Attempt 7: replace the map of children with a 26-int array. That may be a bit wasteful | |
* of memory, but probably not that much, and it should definitely give faster lookup. | |
* | |
* --> score: 100% !!! | |
* | |
* Everything _was_ much faster with the array lookup. About 3.5x faster. | |
*/ | |
#include <bits/stdc++.h> | |
using namespace std; | |
typedef struct Node { | |
int parent_id; | |
int children[26]; | |
char letter; | |
bool terminator; | |
int word_depth; | |
Node(int pid, char l, bool t, int wd) | |
: parent_id(pid), letter(l), terminator(t), word_depth(wd) { | |
for (int i = 0; i < 26; i++) | |
this->children[i] = -1; | |
}; | |
} node; | |
// The nodes of our tree. | |
vector<node> NODES; | |
// (Unfortunately) global variables to keep track of the node with the greatest word depth. | |
int MAX_WORD_DEPTH = 0; | |
int MAX_WORD_DEPTH_ID = 0; | |
// -------------------------------------------------- Tree building functions | |
void init_tree_root_node() { | |
NODES.emplace_back(-1, '-', false, 0); | |
} | |
void add_child(int parent_id, char letter, int child_id) { | |
int key = letter - 'a'; | |
NODES[parent_id].children[key] = child_id; | |
} | |
// Creates a new node in NODES. | |
// LEAVES is not touched by this; the caller of this function has responsibility for that. | |
// The new node has the word_depth assigned, unless it's a terminator, in which case it | |
// gets one more than that. | |
// Returns the id of the created node. | |
int new_node(int parent_id, char letter, bool terminator, int word_depth) { | |
NODES.emplace_back(parent_id, letter, terminator, word_depth); | |
int new_id = NODES.size() - 1; | |
if (terminator) { | |
NODES[new_id].word_depth++; | |
// We may have a new greatest word depth. | |
if (NODES[new_id].word_depth > MAX_WORD_DEPTH) { | |
MAX_WORD_DEPTH = NODES[new_id].word_depth; | |
MAX_WORD_DEPTH_ID = new_id; | |
} | |
} | |
add_child(parent_id, letter, new_id); | |
return new_id; | |
} | |
// Returns the id of the child node matching the given letter, or -1 if none exists. | |
int find_child(int pid, char letter) { | |
int key = letter - 'a'; | |
return NODES[pid].children[key]; | |
} | |
void insert(char *word) { | |
int id = 0; // Start at root. | |
int pid = -1; // Keep track of parent. | |
int L = strlen(word); // The length of the word - we need to know when we have finshed. | |
int idx = 0; | |
// Find the end of the tree for this word. | |
while (true) { | |
if (word[idx] == 0) break; // This should never occur. We should run out of tree before we run out of word. | |
pid = id; // Update parent. | |
id = find_child(id, word[idx]); | |
if (id == -1) break; // End of the tree. | |
idx++; // Move on to next character. | |
} | |
// We are at the end of the tree, and idx marks the first character that needs to be inserted. | |
// Currently id == -1 and pid is the leaf node. | |
// We add the rest of the characters, but must prepare some data first. | |
id = pid; | |
int word_depth = NODES[id].word_depth; | |
while (idx < L) { | |
bool terminator = (idx == L-1); | |
id = new_node(id, word[idx], terminator, word_depth); | |
idx++; | |
} | |
} | |
// -------------------------------------------------- Problem-solving functions | |
deque<int> path_to_node(int target_id) { | |
deque<int> path; | |
int id = target_id; | |
while (id != 0) { | |
path.push_front(id); | |
id = NODES[id].parent_id; | |
} | |
return path; | |
} | |
char BUFFER[100]; | |
void print_words_in_path(deque<int> path, ofstream &out) { | |
char *c = BUFFER; | |
for (int id : path) { | |
node &n = NODES[id]; | |
*c++ = n.letter; | |
if (n.terminator) | |
out << BUFFER << endl; | |
} | |
} | |
// -------------------------------------------------- Debugging functions | |
void print_tree() { | |
for (int id = 0; id < NODES.size(); id++) { | |
node &n = NODES[id]; | |
if (n.terminator) | |
printf("[%2d] '%c' * %d\n", id, n.letter, n.word_depth); | |
else | |
printf("[%2d] '%c' %d\n", id, n.letter, n.word_depth); | |
} | |
} | |
// -------------------------------------------------- Testing functions | |
// -------------------------------------------------- Main | |
int main() { | |
// Open the I/O files. | |
ifstream fin("chains.in"); | |
ofstream fout("chains.out"); | |
// Initialise the tree. | |
init_tree_root_node(); | |
char word[100]; // Words have length 1..75. | |
// Read the input and build the tree. | |
while (true) { | |
fin >> word; | |
if (strcmp(word, ".") == 0) | |
break; | |
else | |
insert(word); | |
} | |
// Find the path to the desired leaf node. | |
auto path = path_to_node(MAX_WORD_DEPTH_ID); | |
// Print the solution (all words implied by the path). | |
print_words_in_path(path, fout); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment