Skip to content

Instantly share code, notes, and snippets.

@makaroni4
Created July 1, 2013 07:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save makaroni4/5898950 to your computer and use it in GitHub Desktop.
Save makaroni4/5898950 to your computer and use it in GitHub Desktop.
A cohort analysis code described in <http://gistflow.com/posts/837-cohort-analysis-revealed>
require 'active_record'
require 'csv'
ActiveRecord::Base.establish_connection(
adapter: "mysql2",
host: "localhost",
database: "36on_development",
user: "username",
password: "password",
pool: 5,
timeout: 5000
)
class Reply < ActiveRecord::Base
belongs_to :user
end
class User < ActiveRecord::Base
has_many :replies
end
replies = Reply.joins(:user).
joins("JOIN
(SELECT user_id,
Min(created_at) AS cohortDate
FROM replies
GROUP BY user_id) AS cohorts ON replies.user_id = cohorts.user_id
AND replies.user_id NOT IN
(SELECT user_id
FROM site_admins)").
select("replies.user_id,
replies.created_at,
DATE_FORMAT(cohorts.cohortDate, '%Y') AS cohort_date_symbol,
TIMESTAMPDIFF(YEAR, cohorts.cohortDate, replies.created_at) AS cohort_period")
stats = replies.inject({}) do |stats, reply|
stats[reply.cohort_period] ||= {}
stats[reply.cohort_period][reply.cohort_date_symbol] ||= 0
stats[reply.cohort_period][reply.cohort_date_symbol] += 1
stats
end
# output
file = CSV.open('cohort_data.csv', 'w')
min_year = Reply.minimum(:created_at).year
max_year = Reply.maximum(:created_at).year
cohort_dates = (min_year..max_year).map(&:to_s)
title_row = ['cohortPeriod'] + cohort_dates
file << title_row
stats.each_pair do |cohortPeriod, dates|
arr = [cohortPeriod]
cohort_dates.each do |date|
arr << dates[date].to_i
end
file << arr
end
file.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment