Skip to content

Instantly share code, notes, and snippets.

@mseri
Created May 28, 2015 10:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mseri/6c08836b95fe5ba7c535 to your computer and use it in GitHub Desktop.
Save mseri/6c08836b95fe5ba7c535 to your computer and use it in GitHub Desktop.
Trivial backup system
#!/usr/bin/env ruby
require 'find'
require 'digest/sha1'
require 'fileutils'
#
# input: file = "/full/path/to/file"
# output: sha1 hash hexdigest of the file
# e.g 7d99a42960fa0f1e67ca7d00f99a279f1c8bfd07
#
# note: it is faster than: Digest::SHA1.file(file).hexdigest
#
def getFileSha1(file)
sha1 = Digest::SHA1.new
file = File.open(file)
buffer = ''
# Read the file 65536 bytes at a time
# This is kind of arbitrary: simply the faster setting for my laptop
while not file.eof
file.read(65536, buffer)
sha1.update(buffer)
end
return sha1.hexdigest
end
#
# input: file = "/full/path/to/file"
# output: hash[0..1]/hash[2..3]/SHA1-hash
# e.g 7d/99/SHA1-7d99a42960fa0f1e67ca7d00f99a279f1c8bfd07
#
def getPathHashname(file)
shaname = getFileSha1(file)
return shaname[0..1] + "/" + shaname[2..3] + "/SHA1-" + shaname
end
def createPathIfNotExists(dirName, options={})
begin
FileUtils.mkdir_p(dirName, options) unless File.exists?(dirName)
rescue
raise "ERROR! The folder " + dirName + " cannot be created!"
end
end
#
# Creante a new snapshot with name
# YEAR-MONTH-DAY_HOURMINUTE
# in the Backup Folder
#
def commitSnapshot(sourcePath, destinationPath, options={})
t = Time.now()
# Define the snapshot folder path as
# backup path + Year-Month-Day_HourMinute
# e.g. /path/to/backup/2013-12-1_1230
# And the dataset folder path as
# backup path + .dataset
datasetPath = destinationPath + '/.dataset'
snapshotPath = destinationPath + '/' + t.year.to_s + '-' +
t.month.to_s + '-' +
t.day.to_s + '_' +
t.hour.to_s + t.min.to_s
# Create those folders if they do not exist
createPathIfNotExists(datasetPath)
createPathIfNotExists(snapshotPath)
Find.find(sourcePath) do |file|
# Generate the Path for the file/folder in the snapshot
destName = file.gsub(sourcePath,snapshotPath)
# Process Files
if File.directory?(file)
# createPathIfNotExists(File.dirname(destName))
createPathIfNotExists(destName)
else
# Generate the path for the datasource file
dataFile = datasetPath + "/" + getPathHashname(file)
# Check if dataset subpath exists otherwise create it
createPathIfNotExists(File.dirname(dataFile))
# copy file into dataFile unless it exists
FileUtils.cp(file, dataFile, options) unless File.exists?(dataFile)
# create a new hardlink destName pointing to dataName
FileUtils.ln(dataFile, destName, options)
end
end
end
#
# Backup dataset cleanup
#
def cleanupDataset(backupPath, options={})
# Define the dataset folder path as
# backup path + .dataset
datasetPath = destinationPath + '/.dataset'
Find.find(datasetPath) do |file|
if File.directory?(file)
# Delete folder if it contains only . and ..
FileUtils.rmdir(file, options) if File.stat(file).nlink == 2
elsif File.stat(file).nlink == 1
# Delete folder if it contains a file not linked in
# any of the snapshots
FileUtils.rm(file, options)
end
end
end
#
# Usage Message for users
#
def showUsage()
puts """
fyomh - For Your Own Mental Health (version 0.1)
USAGE fyomh command [options]
COMMANDS
* commit backupPath [sourcePath]
Creates a new snapshot of sourcePath in a subfolder of 'backupPath'
named 'Year-Month-Day_HourMinute'.
'sourcePath' is an optional parameter to indicate the folder to be backed up
[the default value is . ]
* cleanup backupPath
Cleans the unused files in backupPath/.dataset after one or more snapshots
have been deleted.
OPTIONS
* -s, --simulate
Simulates the backup or the cleanup. It just shows what would happen without
doing anything for real.
* -v, --verbose
Writes down every action the software is doing.
!!!!! THIS SOFTWARE COMES WITH NO WARRANTY - PLEASE USE AT YOUR OWN RISK !!!!!
"""
end
#
# MAIN
# APPLICATION
#
if __FILE__ == $PROGRAM_NAME
ignoreList = ''
# Before doing anything else check if there are option flags
# and set the system accordingly
if ARGV.include?('-s') or ARGV.include?('--simulate')
# Equivalent as doing FileUtils::DryRun
options = { :verbose => true, :noop => true }
ARGV.delete('-s')
ARGV.delete('--simulate')
else
options = { :verbose => (ARGV.include?('-v') or ARGV.include?('--verbose')) }
end
ARGV.delete('-v')
ARGV.delete('--verbose')
# Now that ARGV has no more known option flag we can proceed
nArgs = ARGV.length
isCommit = ARGV[0] == 'commit'
isCleanup = ARGV[0] == 'cleanup'
if (nArgs <= 1 or nArgs > 3) and not (isCommit or isCleanup)
showUsage
else
# Get backup path
backupPath = File.absolute_path(ARGV[1])
if isCommit
sourcePath = File.absolute_path(ARGV[2])
raise "The Backup folder cannot be in the folder that you are going to backup!" if backupPath.match("^"+sourcePath)
createPathIfNotExists(backupPath)
puts "commit " + sourcePath + " to " + backupPath + "\n please be patient!"
commitSnapshot(sourcePath, backupPath, options)
elsif isCleanup
puts "cleaning " + backupPath + "\n please be patient!"
cleanupDataset(backupPath, options)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment