Skip to content

Instantly share code, notes, and snippets.

@papaver
Last active December 15, 2015 20:19
Show Gist options
  • Save papaver/5318141 to your computer and use it in GitHub Desktop.
Save papaver/5318141 to your computer and use it in GitHub Desktop.
SSC Group Engineering Challenge: A Content Releaser
interval: 10
source 'http://rubygems.org'
gem 'activerecord'
gem 'fastercsv'
gem 'sqlite3'
#!/usr/bin/env ruby -w
#
# SSC Group Programming Challenge: A Content Releaser
# by Moiz Merchant
#
#------------------------------------------------------------------------------
# requires
#------------------------------------------------------------------------------
# first task is figuring out the best way to ingest the csv content, looking
# around on google quickly reveals a gem we can take advantage of
require 'csv'
# second task is storing the parsed csv data into a database, active record
# should help with abstracting away the data layer
require 'active_record'
# sqlite should be sufficient for this exercise
require 'sqlite3'
# quick and easy configs
require 'yaml'
#------------------------------------------------------------------------------
# globals
#------------------------------------------------------------------------------
# possible setups - the csv file could be uploaded from a webpage strait to the
# server managing the content delivery, or it could be an argument for a cmd
# line script
@kCSVDataPath = "#{Dir.home}/Documents/content.csv"
# holds settings for dynamic script control
@kConfigPath = "config.yaml"
#------------------------------------------------------------------------------
# active_record setup
#------------------------------------------------------------------------------
# initiate a connection to the content db, normally a persistance db would be
# used, a temporary in memory db will used for this example, this helps run
# code multiple times without extra code being used to constantly reset the
# database
ActiveRecord::Base.establish_connection(
:adapter => 'sqlite3',
:database => ':memory:'
)
#------------------------------------------------------------------------------
# scheme
#------------------------------------------------------------------------------
puts "Initializing database..."
# define a table to reflect our csv data
ActiveRecord::Schema.define do
create_table :contents do |table|
table.column :title, :string
table.column :site, :string
table.column :released_at, :datetime
end
end
#------------------------------------------------------------------------------
# records
#------------------------------------------------------------------------------
# object reflecting entries into the content table
class Content < ActiveRecord::Base
#- scopes -----------------------------------------------------------------
# return content which hasn't been released yet
scope :active, lambda {
where('released_at > :time', :time => DateTime.current)
}
# return content which has already been released
scope :released, lambda {
where('released_at <= :time', :time => DateTime.current)
}
# return content associated with a certain site
scope :of_site, lambda { |site|
where('site == :site', :site => site)
}
#- methods ----------------------------------------------------------------
#
# site_count - return the number of unique sites we have content for
#
def self.site_count
count(:select => 'distinct site')
end
#
# sites - list of sites
#
def self.sites
select('distinct site').map { |c| c.site }
end
#
# recommend_release_content - find a site to release content for making
# sure the distribution is averaged out over days_history
#
def self.recommend_release_content(sites, days_history)
site_hash = Hash[*sites.map { |s| [s, 0] }.flatten] # initialize hash with sites with available releases
select(:site) # only load the site column from the db
.released # look through the released content
.order(:released_at).reverse_order # order backwards so latest rls is first
.limit(days_history) # grab days history worth of entires
.to_a # run the query and convert into array
.inject(site_hash) { |h,s| h[s.site] += 1 if h.key? s.site; h } # count the number of times each site had a release
.group_by { |k,v| v } # group the sites with similar release numbers together
.sort # sort the list to smallest released sites first
.first.last # grab the sites with the least amount of content released
.sample # pick a random site with the least content released
.first # done
end
#--------------------------------------------------------------------------
#
# to_s - helpful for debuggin
#
def to_s
"#{self.title}, #{self.site}, #{self.released_at}"
end
end
#------------------------------------------------------------------------------
# methods
#------------------------------------------------------------------------------
#
# parseCSVData - parse the contents of the given csv file, it is assumed that
# the file will contain header info
#
def parseCSVData(csv_path)
CSV.parse(File.read(csv_path), :headers => true) do |row|
Content.create!(
:title => row['title'],
:site => row['site'],
:released_at => Date.strptime(row['released_at'], '%Y-%m-%d:%H:%M:%S')
)
end
end
#
# release_content - release unreleasd content
#
def release_content
# figure out how many sites we have so we can look at the history accordingly
site_count = Content.site_count
# get a list of all the active sites with content
sites = Content.active.sites
# no sites found with active content
if sites.size == 0 then
puts "Warning ->> xxx <<-- no releasable content found"
return false
end
# find a fairly distributed site to release content for
site = Content.recommend_release_content sites, site_count * 2
# find random content for the site. we could distribute how the content
# was pushed out as well by doing a simliar sort of history check and then
# finding the union of the available content for the picked site
content = Content.active.of_site(site).sample
# 'rls' the content by printing it out
if not content.nil?
content.released_at = DateTime.now
puts "New Release ->> #{content.site} <<-- " \
"#{content.title} released at #{content.released_at}"
content.save
else
puts "Warning ->> #{site} <<-- no releasable content found"
end
not content.nil?
end
#------------------------------------------------------------------------------
# main
#------------------------------------------------------------------------------
# load config file with interval time, can set this in code or provide a config
# for the user, really just depends on how often its invisioned the data is
# going to change, we use a config as an exersize, we assume this exists, can
# always add make_a_default_config to faciliate auto generating one when none
# exists
config = YAML.load(File.read(@kConfigPath))
# validate the csv file exists at the path
if not File.exists? @kCSVDataPath then
puts "Error: specified CSV path is invalid: #{@kCSVDataPath}"
end
# load the cvs data into the database
puts "Loading csv data..."
parseCSVData @kCSVDataPath
# we emulate a quickened release process to show the results of the script.
# minutes become seconds. in a real enviornment a cron job could be used
# to run a script which releases content or this script with just sleeps
# for the configured intervals
while release_content
sleep config['interval']
end
#
# this script will stop running once it runs out of content
#
# the order of the releases could also be pre sorted once and writen out and
# only updated when new release content is added. this would be much more
# efficient in terms of processing.
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment