Created
October 30, 2008 13:49
-
-
Save matthewford/21006 to your computer and use it in GitHub Desktop.
Detect Duplicates with progressbar
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Ruby/ProgressBar - a text progress bar library | |
# | |
# Copyright (C) 2001-2005 Satoru Takabayashi <satoru@namazu.org> | |
# All rights reserved. | |
# This is free software with ABSOLUTELY NO WARRANTY. | |
# | |
# You can redistribute it and/or modify it under the terms | |
# of Ruby's license. | |
# | |
class ProgressBar | |
VERSION = "0.9" | |
def initialize (title, total, out = STDERR) | |
@title = title | |
@total = total | |
@out = out | |
@terminal_width = 80 | |
@bar_mark = "o" | |
@current = 0 | |
@previous = 0 | |
@finished_p = false | |
@start_time = Time.now | |
@previous_time = @start_time | |
@title_width = 14 | |
@format = "%-#{@title_width}s %3d%% %s %s" | |
@format_arguments = [:title, :percentage, :bar, :stat] | |
clear | |
show | |
end | |
attr_reader :title | |
attr_reader :current | |
attr_reader :total | |
attr_accessor :start_time | |
private | |
def fmt_bar | |
bar_width = do_percentage * @terminal_width / 100 | |
sprintf("|%s%s|", | |
@bar_mark * bar_width, | |
" " * (@terminal_width - bar_width)) | |
end | |
def fmt_percentage | |
do_percentage | |
end | |
def fmt_stat | |
if @finished_p then elapsed else eta end | |
end | |
def fmt_stat_for_file_transfer | |
if @finished_p then | |
sprintf("%s %s %s", bytes, transfer_rate, elapsed) | |
else | |
sprintf("%s %s %s", bytes, transfer_rate, eta) | |
end | |
end | |
def fmt_title | |
@title[0,(@title_width - 1)] + ":" | |
end | |
def convert_bytes (bytes) | |
if bytes < 1024 | |
sprintf("%6dB", bytes) | |
elsif bytes < 1024 * 1000 # 1000kb | |
sprintf("%5.1fKB", bytes.to_f / 1024) | |
elsif bytes < 1024 * 1024 * 1000 # 1000mb | |
sprintf("%5.1fMB", bytes.to_f / 1024 / 1024) | |
else | |
sprintf("%5.1fGB", bytes.to_f / 1024 / 1024 / 1024) | |
end | |
end | |
def transfer_rate | |
bytes_per_second = @current.to_f / (Time.now - @start_time) | |
sprintf("%s/s", convert_bytes(bytes_per_second)) | |
end | |
def bytes | |
convert_bytes(@current) | |
end | |
def format_time (t) | |
t = t.to_i | |
sec = t % 60 | |
min = (t / 60) % 60 | |
hour = t / 3600 | |
sprintf("%02d:%02d:%02d", hour, min, sec); | |
end | |
# ETA stands for Estimated Time of Arrival. | |
def eta | |
if @current == 0 | |
"ETA: --:--:--" | |
else | |
elapsed = Time.now - @start_time | |
eta = elapsed * @total / @current - elapsed; | |
sprintf("ETA: %s", format_time(eta)) | |
end | |
end | |
def elapsed | |
elapsed = Time.now - @start_time | |
sprintf("Time: %s", format_time(elapsed)) | |
end | |
def eol | |
if @finished_p then "\n" else "\r" end | |
end | |
def do_percentage | |
if @total.zero? | |
100 | |
else | |
@current * 100 / @total | |
end | |
end | |
def get_width | |
# FIXME: I don't know how portable it is. | |
default_width = 80 | |
begin | |
tiocgwinsz = 0x5413 | |
data = [0, 0, 0, 0].pack("SSSS") | |
if @out.ioctl(tiocgwinsz, data) >= 0 then | |
rows, cols, xpixels, ypixels = data.unpack("SSSS") | |
if cols >= 0 then cols else default_width end | |
else | |
default_width | |
end | |
rescue Exception | |
default_width | |
end | |
end | |
def show | |
arguments = @format_arguments.map {|method| | |
method = sprintf("fmt_%s", method) | |
send(method) | |
} | |
line = sprintf(@format, *arguments) | |
width = get_width | |
if line.length == width - 1 | |
@out.print(line + eol) | |
@out.flush | |
elsif line.length >= width | |
@terminal_width = [@terminal_width - (line.length - width + 1), 0].max | |
if @terminal_width == 0 then @out.print(line + eol) else show end | |
else # line.length < width - 1 | |
@terminal_width += width - line.length + 1 | |
show | |
end | |
@previous_time = Time.now | |
end | |
def show_if_needed | |
if @total.zero? | |
cur_percentage = 100 | |
prev_percentage = 0 | |
else | |
cur_percentage = (@current * 100 / @total).to_i | |
prev_percentage = (@previous * 100 / @total).to_i | |
end | |
# Use "!=" instead of ">" to support negative changes | |
if cur_percentage != prev_percentage || | |
Time.now - @previous_time >= 1 || @finished_p | |
show | |
end | |
end | |
public | |
def clear | |
@out.print "\r" | |
@out.print(" " * (get_width - 1)) | |
@out.print "\r" | |
end | |
def finish | |
@current = @total | |
@finished_p = true | |
show | |
end | |
def finished? | |
@finished_p | |
end | |
def file_transfer_mode | |
@format_arguments = [:title, :percentage, :bar, :stat_for_file_transfer] | |
end | |
def format= (format) | |
@format = format | |
end | |
def format_arguments= (arguments) | |
@format_arguments = arguments | |
end | |
def halt | |
@finished_p = true | |
show | |
end | |
def inc (step = 1) | |
@current += step | |
@current = @total if @current > @total | |
show_if_needed | |
@previous = @current | |
end | |
def set (count) | |
if count < 0 || count > @total | |
raise "invalid count: #{count} (total: #{@total})" | |
end | |
@current = count | |
show_if_needed | |
@previous = @current | |
end | |
def inspect | |
"#<ProgressBar:#{@current}/#{@total}>" | |
end | |
end | |
# Detect Duplicates | |
# Copyright (C) 2008 Matthew Ford <matt@new-bamboo.co.uk> | |
# All rights reserved. | |
# This is free software with ABSOLUTELY NO WARRANTY. | |
# | |
# You can redistribute it and/or modify it under the terms | |
# of Ruby's license. | |
# In the case where id is unique and it do not matter to you (i.e. it's not in permalinks) | |
# used to detect duplicate objects, returns nil, if no objects are duplicated | |
# arguments: array of objects, the object you're checking, and keys to be deleted from attributes hash | |
# pass {:default => false} in the ops hash to not use the default keys | |
# USE: delete_dups_for(Comment, :post_id) | |
def detect_dup(keep_array, obj,opts={:keys => [], :default=>['id','created_at','updated_at']}) | |
return unless keep_array && obj | |
if opts[:default] | |
delete_keys = opts[:keys] + opts[:default] | |
else | |
delete_keys = opts[:keys] | |
end | |
tmp_obj=obj.attributes | |
delete_keys.each do |k| | |
tmp_obj.delete(k) | |
end | |
keep_array.detect do |o| | |
tmp_o=o.attributes | |
delete_keys.each do |k| | |
tmp_o.delete(k) | |
end | |
tmp_o == tmp_obj | |
end | |
end | |
def delete_dups_for(model, collect_by) | |
keep_array = Hash.new { |h,k| h[k] = [] } | |
delete_array = [] | |
model_name = model.name | |
all_objects = model.all.reverse #so we add newest first, sort of | |
pbar = ProgressBar.new(model_name.pluralize, all_objects.count) | |
all_objects.each do |obj| | |
if detect_dup((keep_array[obj.send(collect_by)]), obj).nil? | |
keep_array[obj.send(collect_by)] << obj | |
else | |
delete_array << obj.id | |
end | |
pbar.inc | |
end | |
sql = ActiveRecord::Base.connection(); | |
sql.execute "DELETE FROM #{model.table_name} WHERE id IN(#{delete_array.join(',')})" unless delete_array.empty? | |
pbar.finish | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment