Created
December 6, 2013 01:26
-
-
Save gabehollombe/7817161 to your computer and use it in GitHub Desktop.
A simple Ruby script to calculate the percentage of similar lines between files.
Useful for seeing duplicated lines in rails view files, for example.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Show the percentage of similar lines between files. | |
# Requires: diffy gem | |
# Usage: Edit line 33 to match the files you're interested in, then run this script. | |
require 'rubygems' | |
require 'diffy' | |
def get_dup_lines_percentage(file1, file2) | |
starts_with_minus = /^-.*/ | |
starts_with_plus = /^\+.*/ | |
diff = Diffy::Diff.new(file1, file2, :source => 'files', context: 0) | |
num_lines_in_1 = File.read(file1).lines.count | |
num_lines_in_2 = File.read(file2).lines.count | |
num_minuses_from_1 = diff.select{|l| starts_with_minus.match(l) }.count | |
num_pluses_from_2 = diff.select{|l| starts_with_plus.match(l) }.count | |
no_overlap = num_lines_in_1 == num_minuses_from_1 | |
file1_larger = num_lines_in_1 >= num_lines_in_2 | |
if file1_larger | |
(1 - num_minuses_from_1.to_f / num_lines_in_1 ) * 100 | |
else | |
(1 - num_pluses_from_2.to_f / num_lines_in_2 ) * 100 | |
end | |
end | |
threshold = 50 | |
all_files = Dir.glob '**/*.haml*' | |
all_files.each do |file_a| | |
all_files.each do |file_b| | |
next if file_b == file_a | |
score = get_dup_lines_percentage(file_a, file_b) | |
if score > threshold | |
puts "#{file_a} - #{file_b} - #{score}" | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment