Skip to content

Instantly share code, notes, and snippets.

@Masa331
Last active September 19, 2020 13:48
Show Gist options
  • Save Masa331/ed5f89bb75d6ee4c0325ce7877c1c64a to your computer and use it in GitHub Desktop.
Save Masa331/ed5f89bb75d6ee4c0325ce7877c1c64a to your computer and use it in GitHub Desktop.
Grouping text entries by associated tags into hierarchical structure by theirs occurence across entries
#!/usr/bin/env ruby
# Following is a code which groups some entries hierarchically by it's tags composition. The hierarchy is determined by tag occurence in other entries.
require 'pry'
# entry = [id, some text description, array of tags]
entries = [
[1, 'lorem ipsum', ['#customer_x', '#administration', '#paperwork']],
[2, 'lorem ipsum', ['#customer_x', '#development']],
[3, 'lorem ipsum', ['#customer_x', '#administration']],
[4, 'lorem ipsum', ['#customer_y', '#administration']],
[5, 'lorem ipsum', ['#customer_y', '#administration', '#invoice']],
[6, 'lorem ipsum', ['#bike']],
[7, 'lorem ipsum', ['#project_foo', '#mvp']],
[8, 'lorem ipsum', ['#project_foo', '#docs']],
[9, 'lorem ipsum', ['#project_bar', '#docs']],
[10, 'lorem ipsum', ['#blog', '#article']],
[11, 'lorem ipsum', ['#customer_x', '#maintenance', '#infrastructure']],
[12, 'lorem ipsum', ['#customer_x', '#maintenance', '#infrastructure']],
[13, 'lorem ipsum', []]
]
expected_result = [
{"#customer_x"=>[{"#administration"=>[{"#paperwork"=>[{}, [[1, "lorem ipsum", []]]]}, [[3, "lorem ipsum", []]]], "#development"=>[{}, [[2, "lorem ipsum", []]]], ["#infrastructure", "#maintenance"]=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []],
"#administration"=>[{"#customer_x"=>[{"#paperwork"=>[{}, [[1, "lorem ipsum", []]]]}, [[3, "lorem ipsum", []]]], "#customer_y"=>[{"#invoice"=>[{}, [[5, "lorem ipsum", []]]]}, [[4, "lorem ipsum", []]]]}, []],
"#customer_y"=>[{"#administration"=>[{"#invoice"=>[{}, [[5, "lorem ipsum", []]]]}, [[4, "lorem ipsum", []]]]}, []],
"#bike"=>[{}, [[6, "lorem ipsum", []]]],
"#project_foo"=>[{"#mvp"=>[{}, [[7, "lorem ipsum", []]]], "#docs"=>[{}, [[8, "lorem ipsum", []]]]}, []],
"#docs"=>[{"#project_foo"=>[{}, [[8, "lorem ipsum", []]]], "#project_bar"=>[{}, [[9, "lorem ipsum", []]]]}, []],
["#article", "#blog"]=>[{}, [[10, "lorem ipsum", []]]]},
[[13, "lorem ipsum", []]]
]
def tag_hierarchy(entries)
without_tag = []
grouped = entries.inject({}) do |memo, entry|
id = entry[0]
desc = entry[1]
tags = entry[2]
without_tag << entry if tags.empty?
tags.each do |tag|
without = tags - [tag]
current = memo[tag] || []
memo[tag] = current + [[id, desc, without]]
end
memo
end
merged = {}
grouped.each do |key, value|
other_tag_combinations = value.map { _1[2] }.uniq
other_tags = other_tag_combinations.flatten.uniq
# Entries don't have other tags
if other_tags.empty?
next
end
# Entries which are details of only one other tag
if other_tag_combinations.size == 1
grouped.delete key
end
entry_ids = value.map &:first
# Entries which all have only same combination of tags
if other_tags.all? { |tag| grouped.fetch(tag, []).map(&:first) == entry_ids }
new_key = other_tags << key
new_key.each { grouped.delete _1 }
merged[new_key] = value.map { [_1[0], _1[1], []]}
end
end
merged = grouped.merge(merged)
[merged.transform_values { |vs| tag_hierarchy vs }, without_tag]
end
result = tag_hierarchy entries
if result == expected_result
puts "\e[32mCool, result matches expected value\e[0m"
else
puts "\e[31mResult doesn't match the expected value\e[0m"
end
#!/usr/bin/env ruby
# Following is a code which groups entries by theirs's tags and then recursively does the some with subgroups
require 'pry'
# entry = [id, some text description, array of tags]
entries = [
[1, 'lorem ipsum', ['#customer_x', '#administration', '#paperwork']],
[2, 'lorem ipsum', ['#customer_x', '#development']],
[3, 'lorem ipsum', ['#customer_x', '#administration']],
[4, 'lorem ipsum', ['#customer_y', '#administration']],
[5, 'lorem ipsum', ['#customer_y', '#administration', '#invoice']],
[6, 'lorem ipsum', ['#bike']],
[7, 'lorem ipsum', ['#project_foo', '#mvp']],
[8, 'lorem ipsum', ['#project_foo', '#docs']],
[9, 'lorem ipsum', ['#project_bar', '#docs']],
[10, 'lorem ipsum', ['#blog', '#article']],
[11, 'lorem ipsum', ['#customer_x', '#maintenance', '#infrastructure']],
[12, 'lorem ipsum', ['#customer_x', '#maintenance', '#infrastructure']],
[13, 'lorem ipsum', []]
]
expected_result = [
{"#customer_x"=>
[{"#administration"=>[{"#paperwork"=>[{}, [[1, "lorem ipsum", []]]]}, [[3, "lorem ipsum", []]]],
"#paperwork"=>[{"#administration"=>[{}, [[1, "lorem ipsum", []]]]}, []],
"#development"=>[{}, [[2, "lorem ipsum", []]]],
"#maintenance"=>[{"#infrastructure"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []],
"#infrastructure"=>[{"#maintenance"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []]},
[]],
"#administration"=>
[{"#customer_x"=>[{"#paperwork"=>[{}, [[1, "lorem ipsum", []]]]}, [[3, "lorem ipsum", []]]],
"#paperwork"=>[{"#customer_x"=>[{}, [[1, "lorem ipsum", []]]]}, []],
"#customer_y"=>[{"#invoice"=>[{}, [[5, "lorem ipsum", []]]]}, [[4, "lorem ipsum", []]]],
"#invoice"=>[{"#customer_y"=>[{}, [[5, "lorem ipsum", []]]]}, []]},
[]],
"#paperwork"=>[{"#customer_x"=>[{"#administration"=>[{}, [[1, "lorem ipsum", []]]]}, []], "#administration"=>[{"#customer_x"=>[{}, [[1, "lorem ipsum", []]]]}, []]}, []],
"#development"=>[{"#customer_x"=>[{}, [[2, "lorem ipsum", []]]]}, []],
"#customer_y"=>[{"#administration"=>[{"#invoice"=>[{}, [[5, "lorem ipsum", []]]]}, [[4, "lorem ipsum", []]]], "#invoice"=>[{"#administration"=>[{}, [[5, "lorem ipsum", []]]]}, []]}, []],
"#invoice"=>[{"#customer_y"=>[{"#administration"=>[{}, [[5, "lorem ipsum", []]]]}, []], "#administration"=>[{"#customer_y"=>[{}, [[5, "lorem ipsum", []]]]}, []]}, []],
"#bike"=>[{}, [[6, "lorem ipsum", []]]],
"#project_foo"=>[{"#mvp"=>[{}, [[7, "lorem ipsum", []]]], "#docs"=>[{}, [[8, "lorem ipsum", []]]]}, []],
"#mvp"=>[{"#project_foo"=>[{}, [[7, "lorem ipsum", []]]]}, []],
"#docs"=>[{"#project_foo"=>[{}, [[8, "lorem ipsum", []]]], "#project_bar"=>[{}, [[9, "lorem ipsum", []]]]}, []],
"#project_bar"=>[{"#docs"=>[{}, [[9, "lorem ipsum", []]]]}, []],
"#blog"=>[{"#article"=>[{}, [[10, "lorem ipsum", []]]]}, []],
"#article"=>[{"#blog"=>[{}, [[10, "lorem ipsum", []]]]}, []],
"#maintenance"=>[{"#customer_x"=>[{"#infrastructure"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []], "#infrastructure"=>[{"#customer_x"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []]}, []],
"#infrastructure"=>[{"#customer_x"=>[{"#maintenance"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []], "#maintenance"=>[{"#customer_x"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []]}, []]},
[[13, "lorem ipsum", []]]
]
def tag_pyramid(entries)
without_tag = []
grouped = entries.reduce({}) do |memo, entry|
id = entry[0]
desc = entry[1]
tags = entry[2]
without_tag << entry if tags.empty?
tags.each do |tag|
current = memo[tag] || []
memo[tag] = current + [[id, desc, tags - [tag]]]
end
memo
end
grouped.transform_values! { |subentries| tag_pyramid subentries }
[grouped, without_tag]
end
result = tag_pyramid entries
if result == expected_result
puts "\e[32mCool, result matches expected value\e[0m"
else
puts "\e[31mResult doesn't match the expected value\e[0m"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment