Skip to content

Instantly share code, notes, and snippets.

@mweppler
Last active September 28, 2015 03:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mweppler/1375184 to your computer and use it in GitHub Desktop.
Save mweppler/1375184 to your computer and use it in GitHub Desktop.
Compares file hashes
#!/usr/bin/env ruby
require 'digest/md5'
require 'optparse'
class RepositoryFile
attr_accessor :obj_hash, :file_hash, :file_mtime, :file_name, :file_path
def initialize(obj_hash, file_hash, file_mtime, file_name, file_path)
@obj_hash = obj_hash
@file_hash = file_hash
@file_mtime = file_mtime
@file_name = file_name
@file_path = file_path
end
def self.from_file(file_name)
file_obj = {}
File.open(file_name, 'r') do |file|
while line = file.gets
file_csv = line.split ','
unless file_csv.size != 5
file_obj[file_csv[0]] = RepositoryFile.new(file_csv[0], file_csv[1], file_csv[2], file_csv[3], file_csv[4])
end
end
end
return file_obj
end
def self.to_file(repository_files, file_name)
file = File.open(file_name, 'w')
repository_files.each do |key,value|
file.write "#{value.obj_hash},#{value.file_hash},#{value.file_mtime},#{value.file_name},#{value.file_path}\n"
end
file.close
end
def to_s
"obj_hash:#{@obj_hash}, file_hash:#{@file_hash}, file_mtime:#{@file_mtime}, file_name:#{@file_name}, file_path:#{@file_path}"
end
def self.to_file_names repository_files
file_names = []
repository_files.each do |key,value|
file_names << value.file_name
end
return file_names
end
end
$BUFFER_SIZE = 1024
$options = {}
def calculate_md5_for file_path
hash_buffer = Digest::MD5.new
File.open(file_path, 'r') do |byte|
while (!byte.eof)
temp_buffer = byte.readpartial($BUFFER_SIZE)
hash_buffer.update(temp_buffer)
end
end
return hash_buffer.hexdigest
end
def calculate_md5_for_no_white file_path
hash_buffer = Digest::MD5.new
File.open(file_path, 'r') do |byte|
while (!byte.eof)
the_byte = byte.readbyte
unless the_byte === 9 || the_byte === 10 || the_byte === 13 || the_byte === 32
temp_buffer = the_byte.chr
hash_buffer.update(temp_buffer)
end
end
end
return hash_buffer.hexdigest
end
def compare_hashes_from_recorded(base, delta)
compare_repositories(base, delta)
end
def compare_repositories(base, delta)
base_inventory = RepositoryFile.to_file_names(base)
delta_inventory = RepositoryFile.to_file_names(delta)
unless $options[:skipcompare].include? 'a'
puts 'The following files have been added:'
puts delta_inventory - base_inventory
puts ''
end
unless $options[:skipcompare].include? 'd'
puts 'The following files have been deleted:'
puts base_inventory - delta_inventory
puts ''
end
unless $options[:skipcompare].include? 'c'
puts 'The following files have changed:'
compare_repositories_md5sums(base, delta)
puts ''
end
end
def compare_repositories_md5sums(base, delta)
base.each do |key,value|
if delta.include?(key)
delta_file = delta.fetch(key)
unless delta_file.file_hash === value.file_hash
puts "(base) #{value.file_hash} - #{value.file_mtime} - #{File.join(value.file_path, value.file_name)}\n"
puts "(delta) #{delta_file.file_hash} - #{delta_file.file_mtime} - #{File.join(delta_file.file_path, delta_file.file_name)}\n\n"
end
end
end
end
def invalid_arguments
puts @optparse
exit
end
def inventory_ignore_pattern ignore_pattern_file
$ignore_pattern = []
file = File.open(ignore_pattern_file,'r')
lines = file.readlines
lines.each do |line|
line.gsub!(/\s+/, '')
line.gsub!("\n", '')
$ignore_pattern << line
end
file.close
end
def inventory_repository repository_directory
if not File.exists? repository_directory
puts "Repository #{repository_directory} does not exist."
invalid_arguments
end
directory_listing = []
if File.directory? repository_directory
Dir.chdir(repository_directory)
Dir.glob('**/*').each do |file|
unless File.directory?(file)
if $ignore_pattern.nil?
directory_listing << file
else
unless should_be_ignored? file
directory_listing << file
end
end
end
end
else
directory_listing << repository_directory
end
repository_file_obj_hash = {}
directory_listing.sort!.each do |file|
file_and_path = File.join(repository_directory, file)
# repository_file_obj_hash[md5sum(file)] = RepositoryFile.new(md5sum(file), calculate_md5_for(file_and_path), File.mtime(file_and_path), file, repository_directory)
repository_file_obj_hash[md5sum(file)] = RepositoryFile.new(md5sum(file), calculate_md5_for_no_white(file_and_path), File.mtime(file_and_path), file, repository_directory)
end
return repository_file_obj_hash
end
def md5sum file_name
Digest::MD5.hexdigest file_name
end
def parse_options
@optparse = OptionParser.new do|opts|
# Define the options, and what they do
opts.banner = "Usage: compare_hash.rb [options]"
$options[:baserepo] = nil
opts.on( '-b', '--baserepo FILE', 'The base repository to create file hashes from' ) do |file|
$options[:baserepo] = file
end
$options[:deltarepo] = nil
opts.on( '-d', '--deltarepo FILE', 'The delta repository to test against base repository file hashes' ) do |file|
$options[:deltarepo] = file
end
# $options[:hashtype] = nil
# opts.on( '-t', '--hashtype [OPT]', 'Hash type to use: MD5, SHA1' ) do |type|
# $options[:hashtype] = type || 'MD5'
# end
opts.on( '-h', '--help', 'Display this screen' ) do
puts opts
exit
end
$options[:ignorefile] = nil
opts.on( '-i', '--ignorefile FILE', 'Plain text file containing list of files or file types to ignore.' ) do |file|
$options[:ignorefile] = file
end
$options[:logfile] = nil
opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) do |file|
$options[:logfile] = file
end
$options[:recordedfile] = nil
opts.on( '-r', '--recordedfile FILE', 'A copy of a previous base repositories files (Manditory if no base repository is passed)' ) do |file|
$options[:recordedfile] = file
end
# $options[:verbose] = false
# opts.on( '-v', '--verbose', 'Output more information' ) do
# $options[:verbose] = true
# end
$options[:skipcompare] = []
opts.on( '-s', '--skipcompare a,d,c', 'Skip comparing files that have been: (a=Added, d=Deleted, c=Changed)' ) do |list|
$options[:skipcompare] = list
end
end
# Parse the Options...
@optparse.parse!
end
def record_hashes_to_file(repository_files, output_file)
RepositoryFile.to_file(repository_files, output_file)
end
def should_be_ignored? file_name
$ignore_pattern.each do |pattern|
return true if file_name.include? pattern
end
return false
end
def main
parse_options
# ignore
unless $options[:ignorefile].nil?
if File.exists?($options[:ignorefile])
inventory_ignore_pattern $options[:ignorefile]
end
end
# base repository
unless $options[:baserepo].nil?
base_repository_files = inventory_repository $options[:baserepo]
end
# delta repository
unless $options[:deltarepo].nil?
delta_repository_files = inventory_repository $options[:deltarepo]
end
if base_repository_files && delta_repository_files
compare_repositories_md5sums(base_repository_files, delta_repository_files)
puts 'Finished!'
exit
elsif base_repository_files && !delta_repository_files
logfile = ($options[:logfile].nil? || $options[:logfile].empty?) ? './recorded.txt' : $options[:logfile]
record_hashes_to_file(base_repository_files, logfile)
puts 'Finished!'
exit
elsif !base_repository_files && delta_repository_files
logfile = ($options[:logfile].nil? || $options[:logfile].empty?) ? './recorded.txt' : $options[:logfile]
compare_hashes_from_recorded(RepositoryFile.from_file($options[:recordedfile]) || RepositoryFile.from_file(logfile), delta_repository_files)
puts 'Finished!'
exit
else
puts 'No base or delta repository to hash/compare!'
invalid_arguments
end
end
main if __FILE__ === $0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment