Skip to content

Instantly share code, notes, and snippets.

Created June 9, 2014 21:17
Show Gist options
  • Save anonymous/cd24f98fad9ad352a889 to your computer and use it in GitHub Desktop.
Save anonymous/cd24f98fad9ad352a889 to your computer and use it in GitHub Desktop.
//
// Trie.swift
// SceneTest
//
// Created by Greg Titus on 6/7/14.
// Copyright (c) 2014 The Omni Group. All rights reserved.
//
// This is essentially the same data structure as OFTrie in OmniFoundation, except that the OmniFoundation version is mutable and ephemeral,
// and this version is immutable and persistent. Every time you add a string to the trie, it replaces all the nodes along the string's path
// to the root, and the only mutable change is reassigning the root. Old copies of the trie are unaffected.
// 100 or so lines of code here, 600 or so in 3 classes (6 .h or .m files) in the Objective-C version.
// Speed improvements at some slight expense of readability:
// (1) Taking successively smaller slices of UnicodeScalarView turns out to be slow, so wrap it and a start index in TrieUnicharBuffer.
// (2) Dictionaries much slower than fixed length arrays, so add a new .LetterBranch variant that has 'a'..'z' subnode slots.
// If you insert a string with non-lowercase-ascii characters, nodes convert themselves to the slower dictionary-based .Branch
// Adding all of /usr/share/dict/words is about 3.1s on my system, compared to about 1.5s in Obj-C, still.
let asciiLowercaseA : Int = 97
extension UnicodeScalar {
var lowercaseLetterIndex : Int {
let index = Int(self.value)
return index - asciiLowercaseA
}
var isLowercaseLetter : Bool {
let index = self.lowercaseLetterIndex
return index >= 0 && index < 26
}
}
struct TrieUnicharBuffer : Equatable {
var index: String.UnicodeScalarView.IndexType
var buffer: String.UnicodeScalarView
init(_ s: String) {
buffer = s.unicodeScalars
index = buffer.startIndex
}
init(index: String.UnicodeScalarView.IndexType, buffer: String.UnicodeScalarView) {
self.index = index
self.buffer = buffer
}
var isEmpty : Bool { return index == buffer.endIndex }
var head : UnicodeScalar { return buffer[index] }
var tail : TrieUnicharBuffer { return TrieUnicharBuffer(index: index.succ(), buffer: buffer) }
var emptySuffix : TrieUnicharBuffer {
return TrieUnicharBuffer(index: buffer.endIndex, buffer: buffer)
}
var slice : String.UnicodeScalarView { return buffer[index .. buffer.endIndex] }
func isPrefix(other : TrieUnicharBuffer) -> Bool {
var a = self
var b = other
while (!a.isEmpty && !b.isEmpty) {
if (a.head != b.head) {
return false
}
a = a.tail
b = b.tail
}
return b.isEmpty
}
}
func ==(lhs: TrieUnicharBuffer, rhs: TrieUnicharBuffer) -> Bool {
return lhs.slice == rhs.slice
}
struct TrieNode {
typealias Node = TrieNode
enum TrieChildren {
// either a single leaf value, with a trailing suffix
case Leaf(TrieUnicharBuffer)
// common case of possible a-z leaves
case LetterBranch(ContiguousArray<Node?>)
// or a full branch, mapping any unichar to child node
case Branch(Dictionary<UnicodeScalar, Node>)
}
var value : AnyObject?
var children : TrieChildren
init(_ val: AnyObject?, prefix: UnicodeScalar, subnode: Node) {
value = val
if (prefix.isLowercaseLetter) {
var array = ContiguousArray<Node?>(count: 26, repeatedValue: nil)
var index = prefix.lowercaseLetterIndex
array[index] = subnode
children = .LetterBranch(array)
} else {
children = .Branch([prefix: subnode])
}
}
init(_ val: AnyObject?, suffix: TrieUnicharBuffer) {
value = val
children = .Leaf(suffix)
}
init(value: AnyObject?, children: TrieChildren) {
self.value = value
self.children = children
}
func nodeByInsertingValue(newValue: AnyObject, forBuffer buffer: TrieUnicharBuffer) -> Node {
switch children {
case .Leaf(let suffix) where suffix == buffer:
return Node(newValue, suffix: suffix)
case .Leaf(let suffix) where buffer.isEmpty:
let oldHead = suffix.head
let oldLeaf = Node(value, suffix: suffix.tail)
return Node(newValue, prefix: oldHead, subnode: oldLeaf)
case .Leaf(let suffix) where suffix.isEmpty:
let newHead = buffer.head
let newLeaf = Node(newValue, suffix: buffer.tail)
return Node(value, prefix: newHead, subnode: newLeaf)
case .Leaf(let suffix):
let oldHead = suffix.head
let newHead = buffer.head
let oldLeaf = Node(value, suffix: suffix.tail)
if (oldHead == newHead) {
let newChild = oldLeaf.nodeByInsertingValue(newValue, forBuffer: buffer.tail)
return Node(nil, prefix: newHead, subnode: newChild)
} else {
let partial = Node(nil, prefix: oldHead, subnode:oldLeaf)
return partial.nodeByInsertingValue(newValue, forBuffer: buffer)
}
case .LetterBranch(_) where buffer.isEmpty:
return Node(value: newValue, children: children)
case .LetterBranch(var array) where buffer.head.isLowercaseLetter:
let letterIndex = buffer.head.lowercaseLetterIndex
if let child = array[letterIndex] {
array[letterIndex] = child.nodeByInsertingValue(newValue, forBuffer: buffer.tail)
} else {
array[letterIndex] = Node(newValue, suffix: buffer.tail)
}
return Node(value: value, children: .LetterBranch(array))
case .LetterBranch(let array):
let newLeaf = Node(newValue, suffix: buffer.tail)
var newSubnodes = [buffer.head: newLeaf]
for i in 0..26 {
if let child = array[i] {
newSubnodes[unicodeScalarFromLetterIndex(i)] = child
}
}
return Node(value: value, children: .Branch(newSubnodes))
case .Branch(_) where buffer.isEmpty:
return Node(value: newValue, children: children)
case .Branch(let subnodes):
var newChild : Node
let charHead = buffer.head
if let child = subnodes[charHead] {
newChild = child.nodeByInsertingValue(newValue, forBuffer: buffer.tail)
} else {
newChild = Node(newValue, suffix: buffer.tail)
}
var newSubnodes = subnodes
newSubnodes[charHead] = newChild
return Node(value: value, children: .Branch(newSubnodes))
}
}
func subnodeForPrefix(buffer : TrieUnicharBuffer, exactMatch: Bool) -> TrieNode? {
switch children {
case .Leaf(let suffix) where exactMatch:
return suffix == buffer ? self : nil
case .Leaf(let suffix):
return suffix.isPrefix(buffer) ? self : nil
case .LetterBranch(let array):
if (buffer.isEmpty) {
return self
}
let scalar = buffer.head
if (scalar.isLowercaseLetter) {
return array[scalar.lowercaseLetterIndex]?.subnodeForPrefix(buffer.tail, exactMatch: exactMatch)
} else {
return nil
}
case .Branch(let subnodes):
if (buffer.isEmpty) {
return self
}
return subnodes[buffer.head]?.subnodeForPrefix(buffer.tail, exactMatch: exactMatch)
}
}
}
class Trie {
var root : TrieNode? = nil
func insert(value: String, forString: String) {
let buffer = TrieUnicharBuffer(forString)
if root {
root = root!.nodeByInsertingValue(value, forBuffer: buffer)
} else {
root = TrieNode(value, suffix: buffer)
}
}
func lookup(str : String) -> AnyObject? {
let buffer = TrieUnicharBuffer(str)
return root?.subnodeForPrefix(buffer, exactMatch: true)?.value
}
}
// Make a TrieNode generate all child nodes, by just making an array of them and returning the array generator.
extension TrieNode : Sequence {
typealias GeneratorType = Array<TrieNode>.GeneratorType
func generate() -> GeneratorType {
var nodes: TrieNode[] = []
switch children {
case .LetterBranch(let array):
for o in array {
if let n = o {
nodes.append(n)
}
}
case .Branch(let subnodes):
nodes.extend(subnodes.values)
default:
break
}
return nodes.generate()
}
}
// Traverse the tree of nodes, by keeping a stack of generators for the children of the current node at each level
struct TrieGenerator : Generator {
typealias Element = AnyObject
var generatorStack : TrieNode.GeneratorType[] = []
var currentNode : TrieNode?
init(_ n : TrieNode?) {
currentNode = n
}
mutating func next() -> Element? {
if let n = currentNode {
generatorStack.append(n.generate())
currentNode = nil
if let v : AnyObject = n.value {
return v
}
}
if (generatorStack.count == 0) {
return nil
}
var g = generatorStack.removeLast()
currentNode = g.next()
if (currentNode) {
generatorStack.append(g)
}
return next()
}
}
extension Trie : Sequence {
typealias GeneratorType = TrieGenerator
func generate() -> GeneratorType {
return TrieGenerator(root)
}
func subsequenceForPrefix(str : String) -> SequenceOf<AnyObject> {
let buffer = TrieUnicharBuffer(str)
let subnode = root?.subnodeForPrefix(buffer, exactMatch: false)
let g = TrieGenerator(subnode)
return SequenceOf<AnyObject>({g})
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment