Skip to content

Instantly share code, notes, and snippets.

@solutus
Created March 21, 2018 19:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save solutus/f42302d5408bdc60cf0f0df7a89f411e to your computer and use it in GitHub Desktop.
Save solutus/f42302d5408bdc60cf0f0df7a89f411e to your computer and use it in GitHub Desktop.
bidirect breadth first search algorithm based on doc -> relations:array -> doc model
require 'set'
# Purpose of this class - to build connected undirected graph
# and effectively find all possible paths between vertices
# relations between docs in graph described below.
# Algorithm to build graph: bidirect breadth first search inspired by https://github.com/jwngr/sdow
#
# Example:
# doc1:
# id: 1
# relations: [:a, :b]
#
# doc2:
# id: 2
# relations: [:b, :c]
#
# doc3:
# id: 3
# relations: [:b, :d]
#
# doc4:
# id: 4
# relations: [:c, :d]
#
# doc4:
# id: 4
# relations: [:e]
#
#
# Usage:
#
# find all paths from doc1 to doc4:
# BidirectBFS.new(source_id, dest_id).paths
# returns following array:
# doc1 -> doc2 -> doc4
# doc1 -> doc3 -> doc4
#
# return graph contained only connected docs
# BidirectBFS.new(source_id, dest_id).graph
# doc1 -> doc2 -> doc4
# \-> doc3 /->
class BidirectBFS
Node = Struct.new(:data, :parents, :children)
Data = Struct.new(:id, :relations)
# stub of repository class
class Repository
def initialize(*args, **kwargs)
raise NotImplementedError
end
def find_by_relations(relations)
raise NotImplementedError
docs = nil
docs.map { |doc| Data.new(doc.id, doc.send(@relations_key)) }
end
def find_by_id(id)
raise NotImplementedError
doc = nil
Data.new(doc.id, doc.send(@relations_key))
end
end
def initialize(repository = BidirectBFS::Repository.new, source_id, dest_id)
@repository = repository
@source = repository.find_by_id(source_id)
@dest = repository.find_by_id(dest_id)
end
# 0. initialization
# set up source_set by node created by source
# set up source_tree source by source var
# set up dest_set dest.id
# set up dest_tree node created from dest
# 1. build source_tree
# get descendants of source node - children
# add children to source_set (doc id only)
# build relations tree - source_tree doc has relations, every relation has doc ids, etc
# if child is in dest_set, exclude it from children, result is child_to_enqueue
# if children are empty - goto 3
# otherwise go to п.2
# 2. build dest_tree
# get descendants of dest node - children
# add children to dest_set (doc id only)
# build relations tree - dest_tree doc has relations, every relation has doc ids, etc
# if child is in source_set, exclude it from children, result is child_to_enqueue
# if children are empty - goto 3
# otherwise go to п.2
# 3. tree union
# create connecting_set = (source_children & dest_children)
# remove relations from source_tree if they are not in connection_set,
# add nodes from dest_set to the rest source nodes
# (skip cleanup of dest_tree)
# now, every path from source to dest is defined
# 4. getting paths
# use dfs with process visited element firstly
# while graph traverse add every element to path array,
# if children is empty (todo: add to paths only if last element is dest)
# it means we are on dest element, add path to paths,
# traverse every child of current node by the same algorithm, remove previous child from path
def graph
# repository item (like @source, @dest and ancestors) has id, relations methods
# 0. initialization
# set up source_set by node created by source
# set up source_tree source by source var
source_node = Node.new(@source, nil, nil)
@source_set = Set.new([@source.id])
@source_tree = source_node
@source_relations_set = Set.new
# set up dest_set dest.id
# set up dest_tree node created from dest
dest_node = Node.new(@dest, nil, nil)
@dest_set = Set.new([@dest.id])
@dest_tree = dest_node
@dest_relations_set = Set.new
source_queue = [source_node]
dest_queue = [dest_node]
loop do
source_queue = build_branch(@source_set, source_queue, @source_relations_set)
if source_queue.empty?
puts "break as source_queue is empty"
break
end
dest_queue = build_branch(@dest_set, dest_queue, @dest_relations_set)
if dest_queue.empty?
puts "break as dest_queue is empty"
break
end
end
# 3. tree union
# create connecting_set = (source_children & dest_children)
# remove relations from source_tree if they are not in connection_set,
# add nodes from dest_set to the rest source nodes
# (skip cleanup of dest_tree)
# now, every path from source to dest is defined
# remove dest chiildren as it related to source through parents
connecting_set = @source_set & @dest_set
dfs_traverse(@source_tree, nil) do |node, _acc|
if connecting_set.include?(node.data.id)
node.children ||= []
dest_node = find_node(@dest_tree, node)
parents = dest_node.parents
node.children = parents if parents
node.children.select! { |child| child.children }
node.children.uniq!
else
node.parents.each { |parent| parent.children -= [node] if parent.children } if node.parents
end
end
# remove dest chiildren as it related to source through parents
@dest_tree.children = nil
puts "source: #{@source.id}, dest: #{@dest.id}"
@source_tree
end
# 4. getting paths
# use dfs with process visited element firstly
# while graph traverse add every element to path array,
# if children is empty (todo: add to paths only if last element is dest)
# it means we are on dest element, add path to paths,
# traverse every child of current node by the same algorithm, remove previous child from path
def paths
paths = []
dfs_traverse_node_first(@source_tree, []) do |node, acc|
# root node
if node.children.nil? || node.children.empty?
paths << (acc + [node.data.id]).dup
acc = []
else
acc << node.data.id
end
end
paths
end
def dfs_traverse(node, acc, &block)
node.children.each { |child| dfs_traverse(child, acc, &block) } if node.children
block.call node, acc
end
def dfs_traverse_node_first(node, acc, &block)
block.call node, acc
return unless node.children
node.children.each do |child|
acc.pop if node.data.id != acc.last # remove last added child
dfs_traverse_node_first(child, acc, &block)
end
end
# build source_tree
# build relations tree - source_tree doc has relations, every relation has doc ids, etc
# if child is in dest_set, exclude it from children, result is child_to_enqueue
# if children are empty - goto 3
# otherwise go to п.2
def build_branch(set, queue, relations_set)
# get descendants of source node - children
# add children to source_set (doc id only)
relations = queue.map { |node| node.data.relations }.flatten.uniq
relations -= relations_set.to_a
new_docs = if relations.empty?
[]
else
@repository.find_by_relations(relations) - queue.map(&:data) # Data objects
end
relations_set.merge relations
new_queue = []
queue.each do |node|
parents = node.parents
parents = [node] if parents.nil?
parents.each do |parent|
children = new_docs.select do |child|
parent.data.relations.find { |rel| child.relations.include? rel }
end
children_nodes = children.map do |child|
node = Node.new(child)
node.parents ||= []
node.parents << parent
node
end
parent.children ||= []
parent.children += children_nodes
parent.children.uniq!
new_queue.concat children_nodes.reject { |child| set.include? child.data.id }
end
end
set.merge new_docs.map(&:id)
new_queue
end
def find_node(tree, node)
dfs_traverse(tree, nil) do |cur_node, _acc|
return cur_node if cur_node.data.id == node.data.id
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment