Skip to content

Instantly share code, notes, and snippets.

@whitequark
Last active December 12, 2015 03:18
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save whitequark/4705506 to your computer and use it in GitHub Desktop.
Save whitequark/4705506 to your computer and use it in GitHub Desktop.
NoSQL is for those who can't learn SQL.
# Convert raw stats into hourly stats.
def self.squash!(before=Time.now)
transaction do
range = where(%{
date_trunc('hour', collected_at) < date_trunc('hour', timestamp ?)
}, before)
from_id, to_id = range.minimum(:id), range.maximum('id')
return nil if [from_id, to_id].none?
# Requires PostgreSQL >= 8.2
connection.execute(%Q{
INSERT INTO hourly_stats (date, hour,
resource_type, resource_id, site_id,
kind,
utm_source_id,
delta,
count)
SELECT date_trunc('day', collected_at) AS date,
EXTRACT(HOUR FROM collected_at) AS hour,
resource_type, resource_id, site_id,
kind,
utm_source_id,
-- Compute delta.
COUNT(*) AS delta,
-- Compute a cumulative total for count.
SUM(COUNT(*)) OVER w +
-- Fetch a preceding cumulative total.
COALESCE(
(
SELECT count FROM hourly_stats hs
WHERE hs.resource_type = rs.resource_type AND
hs.resource_id = rs.resource_id AND
hs.site_id = rs.site_id AND
hs.kind = rs.kind AND
hs.utm_source_id
IS NOT DISTINCT FROM
rs.utm_source_id
ORDER BY date DESC, hour DESC
LIMIT 1
), 0
) AS count
FROM raw_stats rs
WHERE id >= #{from_id} AND id <= #{to_id}
GROUP BY hour, date,
resource_type, resource_id, site_id,
kind,
utm_source_id
WINDOW w AS (PARTITION BY
resource_type, resource_id, site_id,
kind,
utm_source_id
-- This should've been ORDER BY date, hour...
-- but postgres is too stupid.
ORDER BY date_trunc('day', collected_at) ASC,
EXTRACT(HOUR FROM collected_at) ASC
ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW)
})
where('id >= :from AND id <= :to', from: from_id, to: to_id).
delete_all
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment