Created
March 21, 2018 19:24
-
-
Save solutus/f42302d5408bdc60cf0f0df7a89f411e to your computer and use it in GitHub Desktop.
bidirect breadth first search algorithm based on doc -> relations:array -> doc model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'set' | |
# Purpose of this class - to build connected undirected graph | |
# and effectively find all possible paths between vertices | |
# relations between docs in graph described below. | |
# Algorithm to build graph: bidirect breadth first search inspired by https://github.com/jwngr/sdow | |
# | |
# Example: | |
# doc1: | |
# id: 1 | |
# relations: [:a, :b] | |
# | |
# doc2: | |
# id: 2 | |
# relations: [:b, :c] | |
# | |
# doc3: | |
# id: 3 | |
# relations: [:b, :d] | |
# | |
# doc4: | |
# id: 4 | |
# relations: [:c, :d] | |
# | |
# doc4: | |
# id: 4 | |
# relations: [:e] | |
# | |
# | |
# Usage: | |
# | |
# find all paths from doc1 to doc4: | |
# BidirectBFS.new(source_id, dest_id).paths | |
# returns following array: | |
# doc1 -> doc2 -> doc4 | |
# doc1 -> doc3 -> doc4 | |
# | |
# return graph contained only connected docs | |
# BidirectBFS.new(source_id, dest_id).graph | |
# doc1 -> doc2 -> doc4 | |
# \-> doc3 /-> | |
class BidirectBFS | |
Node = Struct.new(:data, :parents, :children) | |
Data = Struct.new(:id, :relations) | |
# stub of repository class | |
class Repository | |
def initialize(*args, **kwargs) | |
raise NotImplementedError | |
end | |
def find_by_relations(relations) | |
raise NotImplementedError | |
docs = nil | |
docs.map { |doc| Data.new(doc.id, doc.send(@relations_key)) } | |
end | |
def find_by_id(id) | |
raise NotImplementedError | |
doc = nil | |
Data.new(doc.id, doc.send(@relations_key)) | |
end | |
end | |
def initialize(repository = BidirectBFS::Repository.new, source_id, dest_id) | |
@repository = repository | |
@source = repository.find_by_id(source_id) | |
@dest = repository.find_by_id(dest_id) | |
end | |
# 0. initialization | |
# set up source_set by node created by source | |
# set up source_tree source by source var | |
# set up dest_set dest.id | |
# set up dest_tree node created from dest | |
# 1. build source_tree | |
# get descendants of source node - children | |
# add children to source_set (doc id only) | |
# build relations tree - source_tree doc has relations, every relation has doc ids, etc | |
# if child is in dest_set, exclude it from children, result is child_to_enqueue | |
# if children are empty - goto 3 | |
# otherwise go to п.2 | |
# 2. build dest_tree | |
# get descendants of dest node - children | |
# add children to dest_set (doc id only) | |
# build relations tree - dest_tree doc has relations, every relation has doc ids, etc | |
# if child is in source_set, exclude it from children, result is child_to_enqueue | |
# if children are empty - goto 3 | |
# otherwise go to п.2 | |
# 3. tree union | |
# create connecting_set = (source_children & dest_children) | |
# remove relations from source_tree if they are not in connection_set, | |
# add nodes from dest_set to the rest source nodes | |
# (skip cleanup of dest_tree) | |
# now, every path from source to dest is defined | |
# 4. getting paths | |
# use dfs with process visited element firstly | |
# while graph traverse add every element to path array, | |
# if children is empty (todo: add to paths only if last element is dest) | |
# it means we are on dest element, add path to paths, | |
# traverse every child of current node by the same algorithm, remove previous child from path | |
def graph | |
# repository item (like @source, @dest and ancestors) has id, relations methods | |
# 0. initialization | |
# set up source_set by node created by source | |
# set up source_tree source by source var | |
source_node = Node.new(@source, nil, nil) | |
@source_set = Set.new([@source.id]) | |
@source_tree = source_node | |
@source_relations_set = Set.new | |
# set up dest_set dest.id | |
# set up dest_tree node created from dest | |
dest_node = Node.new(@dest, nil, nil) | |
@dest_set = Set.new([@dest.id]) | |
@dest_tree = dest_node | |
@dest_relations_set = Set.new | |
source_queue = [source_node] | |
dest_queue = [dest_node] | |
loop do | |
source_queue = build_branch(@source_set, source_queue, @source_relations_set) | |
if source_queue.empty? | |
puts "break as source_queue is empty" | |
break | |
end | |
dest_queue = build_branch(@dest_set, dest_queue, @dest_relations_set) | |
if dest_queue.empty? | |
puts "break as dest_queue is empty" | |
break | |
end | |
end | |
# 3. tree union | |
# create connecting_set = (source_children & dest_children) | |
# remove relations from source_tree if they are not in connection_set, | |
# add nodes from dest_set to the rest source nodes | |
# (skip cleanup of dest_tree) | |
# now, every path from source to dest is defined | |
# remove dest chiildren as it related to source through parents | |
connecting_set = @source_set & @dest_set | |
dfs_traverse(@source_tree, nil) do |node, _acc| | |
if connecting_set.include?(node.data.id) | |
node.children ||= [] | |
dest_node = find_node(@dest_tree, node) | |
parents = dest_node.parents | |
node.children = parents if parents | |
node.children.select! { |child| child.children } | |
node.children.uniq! | |
else | |
node.parents.each { |parent| parent.children -= [node] if parent.children } if node.parents | |
end | |
end | |
# remove dest chiildren as it related to source through parents | |
@dest_tree.children = nil | |
puts "source: #{@source.id}, dest: #{@dest.id}" | |
@source_tree | |
end | |
# 4. getting paths | |
# use dfs with process visited element firstly | |
# while graph traverse add every element to path array, | |
# if children is empty (todo: add to paths only if last element is dest) | |
# it means we are on dest element, add path to paths, | |
# traverse every child of current node by the same algorithm, remove previous child from path | |
def paths | |
paths = [] | |
dfs_traverse_node_first(@source_tree, []) do |node, acc| | |
# root node | |
if node.children.nil? || node.children.empty? | |
paths << (acc + [node.data.id]).dup | |
acc = [] | |
else | |
acc << node.data.id | |
end | |
end | |
paths | |
end | |
def dfs_traverse(node, acc, &block) | |
node.children.each { |child| dfs_traverse(child, acc, &block) } if node.children | |
block.call node, acc | |
end | |
def dfs_traverse_node_first(node, acc, &block) | |
block.call node, acc | |
return unless node.children | |
node.children.each do |child| | |
acc.pop if node.data.id != acc.last # remove last added child | |
dfs_traverse_node_first(child, acc, &block) | |
end | |
end | |
# build source_tree | |
# build relations tree - source_tree doc has relations, every relation has doc ids, etc | |
# if child is in dest_set, exclude it from children, result is child_to_enqueue | |
# if children are empty - goto 3 | |
# otherwise go to п.2 | |
def build_branch(set, queue, relations_set) | |
# get descendants of source node - children | |
# add children to source_set (doc id only) | |
relations = queue.map { |node| node.data.relations }.flatten.uniq | |
relations -= relations_set.to_a | |
new_docs = if relations.empty? | |
[] | |
else | |
@repository.find_by_relations(relations) - queue.map(&:data) # Data objects | |
end | |
relations_set.merge relations | |
new_queue = [] | |
queue.each do |node| | |
parents = node.parents | |
parents = [node] if parents.nil? | |
parents.each do |parent| | |
children = new_docs.select do |child| | |
parent.data.relations.find { |rel| child.relations.include? rel } | |
end | |
children_nodes = children.map do |child| | |
node = Node.new(child) | |
node.parents ||= [] | |
node.parents << parent | |
node | |
end | |
parent.children ||= [] | |
parent.children += children_nodes | |
parent.children.uniq! | |
new_queue.concat children_nodes.reject { |child| set.include? child.data.id } | |
end | |
end | |
set.merge new_docs.map(&:id) | |
new_queue | |
end | |
def find_node(tree, node) | |
dfs_traverse(tree, nil) do |cur_node, _acc| | |
return cur_node if cur_node.data.id == node.data.id | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment