Skip to content

Instantly share code, notes, and snippets.

@RStankov
Last active January 30, 2020 15:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RStankov/f71623b2506d85aa912cfb8860546d43 to your computer and use it in GitHub Desktop.
Save RStankov/f71623b2506d85aa912cfb8860546d43 to your computer and use it in GitHub Desktop.
# frozen_string_literal: true
require 'csv'
require 'ostruct'
require 'prettyprint'
users = {}
CSV.foreach('data.csv', headers: true) do |row|
users[row['id'].to_i] = row['friend_ids'].gsub('{', '').gsub('}', '').split(',').map(&:to_i)
end
USERS = users
MIN = 3
def make_cluster(pair)
pair[1]
.map { |id| [id, pair[1].select { |i| USERS[id] && USERS[id].include?(i) }] }
.reduce([]) do |acc, a|
acc << [pair[0], *a].flatten
acc += make_cluster(a)
acc
end.select { |a| a.size >= MIN }.uniq
end
clusters = users.to_a.reduce([]) do |acc, pair|
acc += make_cluster(pair)
acc
end.uniq.sort_by { |c| c.size }.reverse
pp clusters
pp clusters.size
CSV.open("clusters.csv", "wb") do |csv|
csv << clusters[0].size.times.map { |i| "Profile #{i+1}" }
clusters.each do |cluster|
csv << cluster
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment