papaver/Gemfile

## config.yaml
interval: 10

## Gemfile
source 'http://rubygems.org'

gem 'activerecord'
gem 'fastercsv'
gem 'sqlite3'

## releaser.rb
#!/usr/bin/env ruby -w

#
# SSC Group Programming Challenge: A Content Releaser
#   by Moiz Merchant
#

#------------------------------------------------------------------------------
# requires
#------------------------------------------------------------------------------

# first task is figuring out the best way to ingest the csv content, looking
#  around on google quickly reveals a gem we can take advantage of
require 'csv'

# second task is storing the parsed csv data into a database, active record
#  should help with abstracting away the data layer
require 'active_record'

# sqlite should be sufficient for this exercise
require 'sqlite3'

# quick and easy configs
require 'yaml'

#------------------------------------------------------------------------------
# globals
#------------------------------------------------------------------------------

# possible setups - the csv file could be uploaded from a webpage strait to the
#  server managing the content delivery, or it could be an argument for a cmd
#  line script
@kCSVDataPath = "#{Dir.home}/Documents/content.csv"

# holds settings for dynamic script control
@kConfigPath = "config.yaml"

#------------------------------------------------------------------------------
# active_record setup
#------------------------------------------------------------------------------

# initiate a connection to the content db, normally a persistance db would be
#  used, a temporary in memory db will used for this example, this helps run
#  code multiple times without extra code being used to constantly reset the
#  database
ActiveRecord::Base.establish_connection(
  :adapter  => 'sqlite3',
  :database => ':memory:'
)

#------------------------------------------------------------------------------
# scheme
#------------------------------------------------------------------------------

puts "Initializing database..."

# define a table to reflect our csv data
ActiveRecord::Schema.define do
    create_table :contents do |table|
        table.column :title,       :string
        table.column :site,        :string
        table.column :released_at, :datetime
    end
end

#------------------------------------------------------------------------------
# records
#------------------------------------------------------------------------------

# object reflecting entries into the content table
class Content < ActiveRecord::Base

    #- scopes -----------------------------------------------------------------

    # return content which hasn't been released yet
    scope :active, lambda {
        where('released_at > :time', :time => DateTime.current)
    }

    # return content which has already been released
    scope :released, lambda {
        where('released_at <= :time', :time => DateTime.current)
    }

    # return content associated with a certain site
    scope :of_site, lambda { |site|
        where('site == :site', :site => site)
    }

    #- methods ----------------------------------------------------------------

    #
    # site_count - return the number of unique sites we have content for
    #
    def self.site_count
        count(:select => 'distinct site')
    end

    #
    # sites - list of sites
    #
    def self.sites
        select('distinct site').map { |c| c.site }
    end

    #
    # recommend_release_content - find a site to release content for making
    #  sure the distribution is averaged out over days_history
    #
    def self.recommend_release_content(sites, days_history)
        site_hash = Hash[*sites.map { |s| [s, 0] }.flatten]                  # initialize hash with sites with available releases
        select(:site)                                                        # only load the site column from the db
            .released                                                        # look through the released content
            .order(:released_at).reverse_order                               # order backwards so latest rls is first
            .limit(days_history)                                             # grab days history worth of entires
            .to_a                                                            # run the query and convert into array
            .inject(site_hash) { |h,s| h[s.site] += 1 if h.key? s.site; h }  # count the number of times each site had a release
            .group_by { |k,v| v }                                            # group the sites with similar release numbers together
            .sort                                                            # sort the list to smallest released sites first
            .first.last                                                      # grab the sites with the least amount of content released
            .sample                                                          # pick a random site with the least content released
            .first                                                           # done
    end

    #--------------------------------------------------------------------------

    #
    # to_s - helpful for debuggin
    #
    def to_s
        "#{self.title}, #{self.site}, #{self.released_at}"
    end

end

#------------------------------------------------------------------------------
# methods
#------------------------------------------------------------------------------

#
# parseCSVData - parse the contents of the given csv file, it is assumed that
#   the file will contain header info
#
def parseCSVData(csv_path)
    CSV.parse(File.read(csv_path), :headers => true) do |row|
        Content.create!(
            :title       => row['title'],
            :site        => row['site'],
            :released_at => Date.strptime(row['released_at'], '%Y-%m-%d:%H:%M:%S')
        )
    end
end

#
# release_content - release unreleasd content
#
def release_content

    # figure out how many sites we have so we can look at the history accordingly
    site_count = Content.site_count

    # get a list of all the active sites with content
    sites = Content.active.sites

    # no sites found with active content
    if sites.size == 0 then
        puts "Warning ->> xxx <<-- no releasable content found"
        return false
    end

    # find a fairly distributed site to release content for
    site = Content.recommend_release_content sites, site_count * 2

    # find random content for the site.  we could distribute how the content
    #  was pushed out as well by doing a simliar sort of history check and then
    #  finding the union of the available content for the picked site
    content = Content.active.of_site(site).sample

    # 'rls' the content by printing it out
    if not content.nil?
        content.released_at = DateTime.now
        puts "New Release ->> #{content.site} <<-- " \
                "#{content.title} released at #{content.released_at}"
        content.save
    else
        puts "Warning ->> #{site} <<-- no releasable content found"
    end

    not content.nil?
end

#------------------------------------------------------------------------------
# main
#------------------------------------------------------------------------------

# load config file with interval time, can set this in code or provide a config
#  for the user, really just depends on how often its invisioned the data is
#  going to change, we use a config as an exersize, we assume this exists, can
#  always add make_a_default_config to faciliate auto generating one when none
#  exists
config = YAML.load(File.read(@kConfigPath))

# validate the csv file exists at the path
if not File.exists? @kCSVDataPath then
    puts "Error: specified CSV path is invalid: #{@kCSVDataPath}"
end

# load the cvs data into the database
puts "Loading csv data..."
parseCSVData @kCSVDataPath

# we emulate a quickened release process to show the results of the script.
#  minutes become seconds.  in a real enviornment a cron job could be used
#  to run a script which releases content or this script with just sleeps
#  for the configured intervals
while release_content
    sleep config['interval']
end

#
# this script will stop running once it runs out of content
#
# the order of the releases could also be pre sorted once and writen out and
#  only updated when new release content is added.  this would be much more
#  efficient in terms of processing.
#
	source 'http://rubygems.org'

	gem 'activerecord'
	gem 'fastercsv'
	gem 'sqlite3'
	#!/usr/bin/env ruby -w

	#
	# SSC Group Programming Challenge: A Content Releaser
	# by Moiz Merchant
	#

	#------------------------------------------------------------------------------
	# requires
	#------------------------------------------------------------------------------

	# first task is figuring out the best way to ingest the csv content, looking
	# around on google quickly reveals a gem we can take advantage of
	require 'csv'

	# second task is storing the parsed csv data into a database, active record
	# should help with abstracting away the data layer
	require 'active_record'

	# sqlite should be sufficient for this exercise
	require 'sqlite3'

	# quick and easy configs
	require 'yaml'

	#------------------------------------------------------------------------------
	# globals
	#------------------------------------------------------------------------------

	# possible setups - the csv file could be uploaded from a webpage strait to the
	# server managing the content delivery, or it could be an argument for a cmd
	# line script
	@kCSVDataPath = "#{Dir.home}/Documents/content.csv"

	# holds settings for dynamic script control
	@kConfigPath = "config.yaml"

	#------------------------------------------------------------------------------
	# active_record setup
	#------------------------------------------------------------------------------

	# initiate a connection to the content db, normally a persistance db would be
	# used, a temporary in memory db will used for this example, this helps run
	# code multiple times without extra code being used to constantly reset the
	# database
	ActiveRecord::Base.establish_connection(
	:adapter => 'sqlite3',
	:database => ':memory:'
	)

	#------------------------------------------------------------------------------
	# scheme
	#------------------------------------------------------------------------------

	puts "Initializing database..."

	# define a table to reflect our csv data
	ActiveRecord::Schema.define do
	create_table :contents do \|table\|
	table.column :title, :string
	table.column :site, :string
	table.column :released_at, :datetime
	end
	end

	#------------------------------------------------------------------------------
	# records
	#------------------------------------------------------------------------------

	# object reflecting entries into the content table
	class Content < ActiveRecord::Base

	#- scopes -----------------------------------------------------------------

	# return content which hasn't been released yet
	scope :active, lambda {
	where('released_at > :time', :time => DateTime.current)
	}

	# return content which has already been released
	scope :released, lambda {
	where('released_at <= :time', :time => DateTime.current)
	}

	# return content associated with a certain site
	scope :of_site, lambda { \|site\|
	where('site == :site', :site => site)
	}

	#- methods ----------------------------------------------------------------

	#
	# site_count - return the number of unique sites we have content for
	#
	def self.site_count
	count(:select => 'distinct site')
	end

	#
	# sites - list of sites
	#
	def self.sites
	select('distinct site').map { \|c\| c.site }
	end

	#
	# recommend_release_content - find a site to release content for making
	# sure the distribution is averaged out over days_history
	#
	def self.recommend_release_content(sites, days_history)
	site_hash = Hash[*sites.map { \|s\| [s, 0] }.flatten] # initialize hash with sites with available releases
	select(:site) # only load the site column from the db
	.released # look through the released content
	.order(:released_at).reverse_order # order backwards so latest rls is first
	.limit(days_history) # grab days history worth of entires
	.to_a # run the query and convert into array
	.inject(site_hash) { \|h,s\| h[s.site] += 1 if h.key? s.site; h } # count the number of times each site had a release
	.group_by { \|k,v\| v } # group the sites with similar release numbers together
	.sort # sort the list to smallest released sites first
	.first.last # grab the sites with the least amount of content released
	.sample # pick a random site with the least content released
	.first # done
	end

	#--------------------------------------------------------------------------

	#
	# to_s - helpful for debuggin
	#
	def to_s
	"#{self.title}, #{self.site}, #{self.released_at}"
	end

	end

	#------------------------------------------------------------------------------
	# methods
	#------------------------------------------------------------------------------

	#
	# parseCSVData - parse the contents of the given csv file, it is assumed that
	# the file will contain header info
	#
	def parseCSVData(csv_path)
	CSV.parse(File.read(csv_path), :headers => true) do \|row\|
	Content.create!(
	:title => row['title'],
	:site => row['site'],
	:released_at => Date.strptime(row['released_at'], '%Y-%m-%d:%H:%M:%S')
	)
	end
	end

	#
	# release_content - release unreleasd content
	#
	def release_content

	# figure out how many sites we have so we can look at the history accordingly
	site_count = Content.site_count

	# get a list of all the active sites with content
	sites = Content.active.sites

	# no sites found with active content
	if sites.size == 0 then
	puts "Warning ->> xxx <<-- no releasable content found"
	return false
	end

	# find a fairly distributed site to release content for
	site = Content.recommend_release_content sites, site_count * 2

	# find random content for the site. we could distribute how the content
	# was pushed out as well by doing a simliar sort of history check and then
	# finding the union of the available content for the picked site
	content = Content.active.of_site(site).sample

	# 'rls' the content by printing it out
	if not content.nil?
	content.released_at = DateTime.now
	puts "New Release ->> #{content.site} <<-- " \
	"#{content.title} released at #{content.released_at}"
	content.save
	else
	puts "Warning ->> #{site} <<-- no releasable content found"
	end

	not content.nil?
	end

	#------------------------------------------------------------------------------
	# main
	#------------------------------------------------------------------------------

	# load config file with interval time, can set this in code or provide a config
	# for the user, really just depends on how often its invisioned the data is
	# going to change, we use a config as an exersize, we assume this exists, can
	# always add make_a_default_config to faciliate auto generating one when none
	# exists
	config = YAML.load(File.read(@kConfigPath))

	# validate the csv file exists at the path
	if not File.exists? @kCSVDataPath then
	puts "Error: specified CSV path is invalid: #{@kCSVDataPath}"
	end

	# load the cvs data into the database
	puts "Loading csv data..."
	parseCSVData @kCSVDataPath

	# we emulate a quickened release process to show the results of the script.
	# minutes become seconds. in a real enviornment a cron job could be used
	# to run a script which releases content or this script with just sleeps
	# for the configured intervals
	while release_content
	sleep config['interval']
	end

	#
	# this script will stop running once it runs out of content
	#
	# the order of the releases could also be pre sorted once and writen out and
	# only updated when new release content is added. this would be much more
	# efficient in terms of processing.
	#