Skip to content

Instantly share code, notes, and snippets.

@ebot
Created February 23, 2010 14:56
Show Gist options
  • Save ebot/312255 to your computer and use it in GitHub Desktop.
Save ebot/312255 to your computer and use it in GitHub Desktop.
Reads the apache server log to gather statistics on the web site.
#!/usr/bin/env ruby
require 'rubygems'
require 'RedCloth'
require 'net/ssh'
require 'net/scp'
host_name = 'host_name'
username = 'user_name'
password = 'password'
download_file = "#{host_name}-#{Time.now.strftime('%b-%Y')}.gz"
download_path = "./logs/"
download_me = download_path << download_file
#TODO Insert the ssh code here
# Deflate the gz file.
command = "nice -n 5 gzip -d #{download_file}"
success = system(command)
exit unless success && $?.exitstatus == 0
# Now we can build the reports
log_file = File.new File.basename(download_file, '.gz'), 'r'
ip_addresses = {}
episodes = {}
log_file.each_line do |line|
fields = line.split(" ")
if fields[8] == '200' && fields[5] == '"GET'
if fields[6].index('/audio/dle') != nil
episode = fields[6].split('/')[2]
# count the episode downloads
if episodes[episode].nil?
episodes[episode] = {:downloads => 1,
:ip_addresses => {}}
else
episodes[episode][:downloads] += 1
end
# Add unique ip addresses to get a unique count
if episodes[episode][:ip_addresses][fields[0]].nil?
episodes[episode][:ip_addresses][fields[0]] = ''
end
# Gather infotmation about this unique ip address
if ip_addresses[fields[0]].nil?
ip_addresses[fields[0]] = {:count => 0,
:audio_count => 0,
:video_count => 0,
:log => []}
end
ip_addresses[fields[0]][:count] += 1
ip_addresses[fields[0]][:audio_count] += 1 if fields[6].index('mp3')
ip_addresses[fields[0]][:video_count] += 1 if fields[6].index('m4v')
ip_addresses[fields[0]][:log] << fields[6]
end
end
end
# Now that we have parsed the data, let's build the report
output = File.new "#{Time.now.strftime('%Y_%m')}_podcast_stats.textile", 'w'
# Add some styling
css = <<EOF
<style type="text/css">
html {
font-family: Verdana;
font-size: .9em
}
h1 {
font-size: 175%;
}
h2 {
font-size: 150%;
}
h3 {
font-size: 125%;
}
h4 {
font-size: 115%;
}
</style>
EOF
output << css
output << "\nh1. DLE Podcast Statistics for #{Time.now.strftime('%b %Y')}\n\n"
output << "\"Home\":index.html\n\n"
# Add Listener information to the report
output << "h2. Listener Statistics (how many unique IPs downloaded each episode)\n\n"
episodes.sort.each do |episode, stats|
output << "* Episode #{episode} had #{stats[:ip_addresses].count} listeners or viewers.\n"
end
# Add Episode information to the report
output << "\nh2. Episode Statistics (how many times each episode was downloaded)\n\n"
episodes.sort.each do |episode, stats|
output << "* Episode #{episode} was downloaded #{stats[:downloads]} times.\n"
end
# Add ip user information
output << "\nh2. User Statistics\n\n"
output << "The server had #{ip_addresses.count} unique visitors.\n\n"
ip_addresses.sort.each do |ip, stats|
output << "* *#{ip} hit the server #{stats[:count]} time(s), video => #{stats[:video_count]}, mp3 => #{stats[:audio_count]}.*\n"
stats[:log].sort.each { |element| output << "** #{element}\n" }
end
output << "\n\"Home\":index.html"
output.close
# generate html version of the report
textile = File.new "#{Time.now.strftime('%Y_%m')}_podcast_stats.textile", 'r'
rc = RedCloth.new textile.read
html = File.new "#{Time.now.strftime('%Y_%m')}_podcast_stats.html", 'w'
html << rc.to_html
html.close
# TODO Add a links page here
index = css
index << "h1. Podcast Statistics\n\n"
links = []
Dir.glob('*stats.html') do |dir|
links << "* \"#{dir}\":#{dir}\n"
end
links.reverse.each { |link| index << link }
links_page = File.new 'index.html', 'w'
links_page << RedCloth.new( index ).to_html
links_page.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment