Skip to content

Instantly share code, notes, and snippets.

@daveadams
Last active August 29, 2015 13:55
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save daveadams/8730747 to your computer and use it in GitHub Desktop.
Save daveadams/8730747 to your computer and use it in GitHub Desktop.
Sakai disk usage reports by binary properties
#!/usr/bin/env ruby
#
# binary-resource-report.rb
# Reports disk usage broken down by properties in the binary_entity field.
#
# WARNING: takes a long time to run
# (~5 hours for 10,000,000 records)
#
# Written by David Adams (daveadams@gmail.com)
#
# This software is licensed to the Public Domain; No Rights Reserved
#
# Requirements
# You must install the sakai-info gem as well as a database driver.
# Tested drivers are:
# For Oracle: ruby-oci8
# For MySQL: mysql2
#
# MySQL access may also work with the mysqlplus or mysql drivers.
#
######################################################################
#
# Variables
#
# connect to database using Sequel connection strings
# (http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html)
#
CONNECTION_STRING = "oracle://username:password@dbsid"
#
# Report filenames
#
CREATORS_REPORT = "creators.csv"
CREATEDATES_REPORT = "creation-dates.csv"
MIMETYPES_REPORT = "mimetypes.csv"
EXTENSIONS_REPORT = "file-extensions.csv"
#
######################################################################
STDOUT.sync = true
require 'sakai-info'
SakaiInfo::DB.configure({ "db" => CONNECTION_STRING })
creators = {}
createdates = {}
mimetypes = {}
extensions = {}
counter = 0
creator_missing = 0
created_errors = 0
mimetype_errors = 0
no_display_name = 0
print "Collecting data..."
SakaiInfo::DB.connect[:content_resource].select(:resource_id, :binary_entity).all.each do |dbrow|
begin
meta = SakaiInfo::ContentBinaryEntity.new(dbrow[:binary_entity])
# creators
creator = meta["CHEF:creator"]
if creator.nil?
creator_missing += 1
else
if creators[creator].nil?
creators[creator] = { :count => 1, :size => meta["content_length"] }
else
creators[creator][:count] += 1
creators[creator][:size] += meta["content_length"]
end
end
# created dates
created = meta["DAV:creationdate"]
if not created.nil?
created = created.slice(0,8)
if createdates[created].nil?
createdates[created] = { :count => 1, :size => meta["content_length"] }
else
createdates[created][:count] += 1
createdates[created][:size] += meta["content_length"]
end
else
created_errors += 1
end
# mimetypes
mimetype = meta["content_type"]
if not mimetype.nil?
if mimetypes[mimetype].nil?
mimetypes[mimetype] = { :count => 1, :size => meta["content_length"] }
else
mimetypes[mimetype][:count] += 1
mimetypes[mimetype][:size] += meta["content_length"]
end
else
mimetype_errors += 1
end
# file extensions
extension = ""
if not meta["DAV:displayname"].nil?
extension = meta["DAV:displayname"].split(".").last
extension = "" if extension == meta["DAV:displayname"]
else
no_display_name += 1
end
if extensions[extension].nil?
extensions[extension] = { :count => 1, :size => meta["content_length"] }
else
extensions[extension][:count] += 1
extensions[extension][:size] += meta["content_length"]
end
# UI
counter += 1
if counter > 500
print "."
counter = 0
end
rescue => e
STDERR.puts "ERROR: #{dbrow[:resource_id]}"
STDERR.puts " #{e}"
end
end
puts " OK"
puts
puts "Quick summary:"
puts " Unique file creators: #{creators.keys.count}"
puts " Unique creation dates: #{createdates.keys.count}"
puts " Unique mime types: #{mimetypes.keys.count}"
puts " Unique file extensions: #{extensions.keys.count}"
puts
puts " Files with no creator: #{creator_missing}"
puts " Created date errors: #{created_errors}"
puts " Mime type errors: #{mimetype_errors}"
puts " Missing display name: #{no_display_name}"
puts
print "Writing out #{CREATORS_REPORT}... "
File.open(CREATORS_REPORT, "w") do |f|
f.puts "user_id,file count,total size"
creators.keys.each do |user_id|
f.puts "#{user_id},#{creators[user_id][:count]},#{creators[user_id][:size]}"
end
end
puts "OK"
print "Writing out #{CREATEDATES_REPORT}... "
File.open(CREATEDATES_REPORT, "w") do |f|
f.puts "creation date,file count,total size"
createdates.keys.sort.each do |createdate|
f.puts "#{createdate},#{createdates[createdate][:count]},#{createdates[createdate][:size]}"
end
end
puts "OK"
print "Writing out #{MIMETYPES_REPORT}... "
File.open(MIMETYPES_REPORT, "w") do |f|
f.puts "mimetype,file count,total size"
mimetypes.keys.each do |mimetype|
f.puts "#{mimetype},#{mimetypes[mimetype][:count]},#{mimetypes[mimetype][:size]}"
end
end
puts "OK"
print "Writing out #{EXTENSIONS_REPORT}... "
File.open(EXTENSIONS_REPORT, "w") do |f|
f.puts "extension,file count,total size"
extensions.keys.each do |extension|
f.puts "#{extension},#{extensions[extension][:count]},#{extensions[extension][:size]}"
end
end
puts "OK"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment