Created
June 29, 2022 18:41
-
-
Save karagenit/859f2d048215b2d572267b0546c5ab19 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'uri' | |
# Helper to get hostnames from referrer fields | |
def get_host(url) | |
begin | |
URI.parse(url).host.to_s | |
rescue StandardError | |
'' | |
end | |
end | |
# Read the log file | |
data = File.readlines('custom.log').map do |line| | |
elems = line.split(' ') | |
{ datetime: elems.shift + ' ' + elems.shift, ip: elems.shift, url: elems.shift, status: elems.shift, referrer: elems.shift, agent: elems.join(' ') } | |
end | |
# Writing to the output file | |
file = File.new('stats.md', 'w') # this overwrites the old file! | |
# Write the liquid header and the start of a markdown table | |
file.write <<-END | |
--- | |
layout: default | |
title: 'Site Stats' | |
--- | |
|Page|Views|Visitors| | |
|----|-----|--------| | |
END | |
# Only care about successful requests to .html pages | |
urls = data.select { |d| d[:status] == "200" }.map { |d| d[:url] }.uniq.select { |u| u.end_with?('.html') } | |
# Count number of views and unique viewers for each page, and write them as a row in the file | |
urls.each do |url| | |
views = data.select { |d| d[:url] == url }.size | |
unique = data.select { |d| d[:url] == url }.map { |d| d[:ip] + d[:agent] }.uniq.size | |
file.write "|#{url}|#{views}|#{unique}|\n" | |
end | |
# Setup the next markdown table | |
file.write <<-END | |
|Referrer|Total| | |
|--------|-----| | |
END | |
# Find all of the referrers, count the number of valid .html pages they requested | |
referrers = data.map { |d| get_host(d[:referrer]) }.uniq.map do |ref| | |
count = data.select { |d| d[:url].end_with?('.html') and d[:status] == "200" and get_host(d[:referrer]) == ref }.size | |
[ref, count] | |
end | |
# Write any non-zero referrers as rows in the table | |
referrers.select { |r| r[1] > 0 }.sort_by { |r| -r[1] }.each { |r| file.write "|#{r[0]}|#{r[1]}|\n" } | |
# Finish up with a timestamp on the page | |
file.write <<-END | |
*Last updated: #{Time.now.strftime("%m/%d/%Y %H:%M")}* | |
END | |
file.close |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment