chriseidhof/graph.swift

## graph.swift
import Foundation

/*

 Talking points

 - It's hard for Node to conform to Codable, because of references
   - This is a problem in both directions. For encoding, we need to keep track of what we've already seen. For decoding, we need two-phase initialization.
   - But we can convert to sparsegraph, and then we get conformance for free
     - to do this, an iterator over all reachable nodes really helps!
   - We can also go back from sparsegraph to node
 - There's no "natural" element (nor order) when we view Node as a sequence. Neither is there for SparseGraph.
 - ObjectIdentifier can be used when you have objects that don't conform to Hashable
 - Writing an iterator for SparseGraph's edges is interesting, because the efficient implementation has a complicated type. But in Swift 5.1 we can use some Iterator!
 - We could talk about information hiding (e.g. Identifier only has fileprivate props, so is an opaque struct).
 - Writing BFS is interesting: it uses AnyIterator, and it has closures that capture `queue` and `discovered` (and writing `discovered` as a Set is more efficient than doing an Array).
 - Maybe interesting: talk about wrapping Node in another type (e.g. NodeGraph) which maintains a list of all nodes that were created. This could allow for graphs with more than one [component](https://en.wikipedia.org/wiki/Component_(graph_theory)), as well as help with cleanup (currently, once you have a cycle in your nodes, they'll never be released).
 - For the future: it'd be nice to come up with some kind of graph protocol and implement common methods on top of it. However, I find it hard to imagine what this protocol would look like...

 */

// Here's a reference-based graph. There's no "Graph" datatype, just `Node`s. A node contains an array of outgoing edges (the other nodes).
final class Node<Value> {
    var value: Value
    var edges: [Node<Value>] = []
    init(value: Value, edges: [Node<Value>] = []) {
        self.value = value
        self.edges = edges
    }

    func addEdge(to other: Node<Value>) {
        edges.append(other)
    }
}

// This compiles but doesn't work unless we have an acyclic graph.
extension Node: Codable where Value: Codable { }

extension Node {
    // This returns all reachable nodes in a BFS order. Each node is returned exactly once.
    var breadFirstSearch: AnyIterator<Node<Value>> {
        var queue = [self]
        var discovered: Set<ObjectIdentifier> = [] // should be set of object identifiers
        return AnyIterator {
            guard let element = queue.popLast() else { return nil }
            discovered.insert(ObjectIdentifier(element))
            for n in element.edges {
                guard !queue.contains{ $0 === n }, !discovered.contains(ObjectIdentifier(n)) else { continue }
                queue.append(n)
            }
            return element
        }
    }
}

// Problem 1: encoding recursive graphs results in an infinite loop.

func sample1() {
    let one = Node(value: 1)
    let two = Node(value: 2, edges: [one])
    one.addEdge(to: two) // cycle
    try! JSONEncoder().encode(one) // infinite loop
}

// Problem 2: encoding shared objects (e.g. a Node has two outgoing edges to the same object).

func sample2() {
    let one = Node(value: 1)
    let two = Node(value: 2, edges: [one, one])
    assert(two.edges[0] === two.edges[1])
    let data = try! JSONEncoder().encode(two)

    let result = try! JSONDecoder().decode(Node<Int>.self, from: data)
    assert(result.edges[0] === result.edges[1]) // assertion failure
}

// We can come up with an alternative representation.


// First: identifiers.
struct Identifier: Hashable, Codable {
    fileprivate var id: Int
    fileprivate init(_ id: Int) {
        self.id = id
    }
}

// A sparse graph contains a list of vertex values, and a list of edges between vertices.
struct SparseGraph<Value> {
    private var freshIdentifier: Int = 0
    var vertices: [Identifier:Value] = [:]
    var edges: [Identifier:[Identifier]] = [:]
}

extension SparseGraph {
    mutating fileprivate func fresh() -> Identifier {
        defer { freshIdentifier += 1 }
        return Identifier(freshIdentifier)

    }
    mutating func insert(_ value: Value, edges: [Identifier] = []) -> Identifier {
        let id = fresh()
        vertices[id] = value
        self.edges[id] = edges
        return id
    }

    mutating func addEdge(from: Identifier, to: Identifier) {
        edges[from, default: []].append(to)
    }
}

extension SparseGraph {
    // This would be nicer in Swift 5.1: some Iterator
    var allEdges: FlattenSequence<LazyMapSequence<[Identifier : [Identifier]], [(Identifier, Identifier)]>>.Iterator {
        return edges.lazy.flatMap { (source, targets) in targets.map { (source, $0) } }.makeIterator()
    }
}


extension SparseGraph: Codable where Value: Codable { }
extension SparseGraph: Equatable where Value: Equatable { }

func sample3() {
    var sample = SparseGraph<Int>()
    let one = sample.insert(1)
    let two = sample.insert(2, edges: [one])
    sample.addEdge(from: one, to: two)
    print(try! JSONEncoder().encode(sample))
}
sample3()

func sample4() {
    var sample = SparseGraph<Int>()
    let one = sample.insert(1)
    let two = sample.insert(2, edges: [one, one])
    sample.addEdge(from: one, to: two)
    let data = try! JSONEncoder().encode(sample)

    let decoded = try! JSONDecoder().decode(SparseGraph<Int>.self, from: data)
    assert(sample == decoded)
}
sample4()

// We can even go from a Node to a SparseGraph

extension SparseGraph {
    // Given breadFirstSearch, we can initialize a graph very easily:
    init(rootNode: Node<Value>) {
        let all = Dictionary(uniqueKeysWithValues: rootNode.breadFirstSearch.map { (ObjectIdentifier($0), (identifier: fresh(), node: $0)) })
        for (identifier: id, node: node) in all.values {
            vertices[id] = node.value
            edges[id] = node.edges.map { all[ObjectIdentifier($0)]!.identifier }
        }
    }
}

extension SparseGraph {
    subscript(identifier: Identifier) -> Value {
        return vertices[identifier]!
    }
}


// And from a SparseGraph back to a Node

extension Node {
    convenience init(graph: SparseGraph<Value>, rootNode: Identifier) {
        self.init(value: graph[rootNode])
        var existing: [Identifier:Node<Value>] = [rootNode: self]
        for (id, value) in graph.vertices {
            guard id != rootNode else { continue }
            existing[id] = Node(value: value)
        }
        for (source, targets) in graph.edges {
            existing[source]!.edges = targets.map { existing[$0]! }
        }
    }
}


func sample5() {
    let one = Node(value: 1)
    let two = Node(value: 2, edges: [one])
    one.addEdge(to: two) // cycle
    let graph = SparseGraph(rootNode: one)
    dump(graph)
    let node = Node(graph: graph, rootNode: Identifier(0))
    dump(node)
    assert(node.edges[0].edges[0] === node)
    print(node.breadFirstSearch.map { $0.value })
}
sample5()
	import Foundation

	/*

	Talking points

	- It's hard for Node to conform to Codable, because of references
	- This is a problem in both directions. For encoding, we need to keep track of what we've already seen. For decoding, we need two-phase initialization.
	- But we can convert to sparsegraph, and then we get conformance for free
	- to do this, an iterator over all reachable nodes really helps!
	- We can also go back from sparsegraph to node
	- There's no "natural" element (nor order) when we view Node as a sequence. Neither is there for SparseGraph.
	- ObjectIdentifier can be used when you have objects that don't conform to Hashable
	- Writing an iterator for SparseGraph's edges is interesting, because the efficient implementation has a complicated type. But in Swift 5.1 we can use some Iterator!
	- We could talk about information hiding (e.g. Identifier only has fileprivate props, so is an opaque struct).
	- Writing BFS is interesting: it uses AnyIterator, and it has closures that capture `queue` and `discovered` (and writing `discovered` as a Set is more efficient than doing an Array).
	- Maybe interesting: talk about wrapping Node in another type (e.g. NodeGraph) which maintains a list of all nodes that were created. This could allow for graphs with more than one [component](https://en.wikipedia.org/wiki/Component_(graph_theory)), as well as help with cleanup (currently, once you have a cycle in your nodes, they'll never be released).
	- For the future: it'd be nice to come up with some kind of graph protocol and implement common methods on top of it. However, I find it hard to imagine what this protocol would look like...

	*/

	// Here's a reference-based graph. There's no "Graph" datatype, just `Node`s. A node contains an array of outgoing edges (the other nodes).
	final class Node<Value> {
	var value: Value
	var edges: [Node<Value>] = []
	init(value: Value, edges: [Node<Value>] = []) {
	self.value = value
	self.edges = edges
	}

	func addEdge(to other: Node<Value>) {
	edges.append(other)
	}
	}

	// This compiles but doesn't work unless we have an acyclic graph.
	extension Node: Codable where Value: Codable { }

	extension Node {
	// This returns all reachable nodes in a BFS order. Each node is returned exactly once.
	var breadFirstSearch: AnyIterator<Node<Value>> {
	var queue = [self]
	var discovered: Set<ObjectIdentifier> = [] // should be set of object identifiers
	return AnyIterator {
	guard let element = queue.popLast() else { return nil }
	discovered.insert(ObjectIdentifier(element))
	for n in element.edges {
	guard !queue.contains{ $0 === n }, !discovered.contains(ObjectIdentifier(n)) else { continue }
	queue.append(n)
	}
	return element
	}
	}
	}

	// Problem 1: encoding recursive graphs results in an infinite loop.

	func sample1() {
	let one = Node(value: 1)
	let two = Node(value: 2, edges: [one])
	one.addEdge(to: two) // cycle
	try! JSONEncoder().encode(one) // infinite loop
	}

	// Problem 2: encoding shared objects (e.g. a Node has two outgoing edges to the same object).

	func sample2() {
	let one = Node(value: 1)
	let two = Node(value: 2, edges: [one, one])
	assert(two.edges[0] === two.edges[1])
	let data = try! JSONEncoder().encode(two)

	let result = try! JSONDecoder().decode(Node<Int>.self, from: data)
	assert(result.edges[0] === result.edges[1]) // assertion failure
	}

	// We can come up with an alternative representation.


	// First: identifiers.
	struct Identifier: Hashable, Codable {
	fileprivate var id: Int
	fileprivate init(_ id: Int) {
	self.id = id
	}
	}

	// A sparse graph contains a list of vertex values, and a list of edges between vertices.
	struct SparseGraph<Value> {
	private var freshIdentifier: Int = 0
	var vertices: [Identifier:Value] = [:]
	var edges: [Identifier:[Identifier]] = [:]
	}

	extension SparseGraph {
	mutating fileprivate func fresh() -> Identifier {
	defer { freshIdentifier += 1 }
	return Identifier(freshIdentifier)

	}
	mutating func insert(_ value: Value, edges: [Identifier] = []) -> Identifier {
	let id = fresh()
	vertices[id] = value
	self.edges[id] = edges
	return id
	}

	mutating func addEdge(from: Identifier, to: Identifier) {
	edges[from, default: []].append(to)
	}
	}

	extension SparseGraph {
	// This would be nicer in Swift 5.1: some Iterator
	var allEdges: FlattenSequence<LazyMapSequence<[Identifier : [Identifier]], [(Identifier, Identifier)]>>.Iterator {
	return edges.lazy.flatMap { (source, targets) in targets.map { (source, $0) } }.makeIterator()
	}
	}


	extension SparseGraph: Codable where Value: Codable { }
	extension SparseGraph: Equatable where Value: Equatable { }

	func sample3() {
	var sample = SparseGraph<Int>()
	let one = sample.insert(1)
	let two = sample.insert(2, edges: [one])
	sample.addEdge(from: one, to: two)
	print(try! JSONEncoder().encode(sample))
	}
	sample3()

	func sample4() {
	var sample = SparseGraph<Int>()
	let one = sample.insert(1)
	let two = sample.insert(2, edges: [one, one])
	sample.addEdge(from: one, to: two)
	let data = try! JSONEncoder().encode(sample)

	let decoded = try! JSONDecoder().decode(SparseGraph<Int>.self, from: data)
	assert(sample == decoded)
	}
	sample4()

	// We can even go from a Node to a SparseGraph

	extension SparseGraph {
	// Given breadFirstSearch, we can initialize a graph very easily:
	init(rootNode: Node<Value>) {
	let all = Dictionary(uniqueKeysWithValues: rootNode.breadFirstSearch.map { (ObjectIdentifier($0), (identifier: fresh(), node: $0)) })
	for (identifier: id, node: node) in all.values {
	vertices[id] = node.value
	edges[id] = node.edges.map { all[ObjectIdentifier($0)]!.identifier }
	}
	}
	}

	extension SparseGraph {
	subscript(identifier: Identifier) -> Value {
	return vertices[identifier]!
	}
	}


	// And from a SparseGraph back to a Node

	extension Node {
	convenience init(graph: SparseGraph<Value>, rootNode: Identifier) {
	self.init(value: graph[rootNode])
	var existing: [Identifier:Node<Value>] = [rootNode: self]
	for (id, value) in graph.vertices {
	guard id != rootNode else { continue }
	existing[id] = Node(value: value)
	}
	for (source, targets) in graph.edges {
	existing[source]!.edges = targets.map { existing[$0]! }
	}
	}
	}


	func sample5() {
	let one = Node(value: 1)
	let two = Node(value: 2, edges: [one])
	one.addEdge(to: two) // cycle
	let graph = SparseGraph(rootNode: one)
	dump(graph)
	let node = Node(graph: graph, rootNode: Identifier(0))
	dump(node)
	assert(node.edges[0].edges[0] === node)
	print(node.breadFirstSearch.map { $0.value })
	}
	sample5()