Last active
December 15, 2015 17:58
-
-
Save wilkie/5299853 to your computer and use it in GitHub Desktop.
Retrieves a nice word based diff of a file in git within ruby.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require_relative 'git_diff_word_chunks' | |
git_diff_word_chunks('some_file', 'HEAD^1').each do |chunk| | |
if chunk[:mode] == :changed | |
print "<del>#{chunk[:delete]}</del><ins>#{chunk[:insert]}</ins>" | |
else | |
print chunk[:content] | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source 'https://rubygems.org' | |
gem 'git', '~> 1.2.5' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'bundler' | |
Bundler.require | |
def git_diff_word_chunks(file, commit_old = 'HEAD^1', commit_new = 'HEAD') | |
g = Git.open('.') | |
diff = g.diff(commit_old, commit_new).path(file) | |
patch = diff.to_s | |
# Get current file | |
file = open(file).read | |
current_lines = file.lines | |
# interpret patch | |
start = 0 | |
lines = 0 | |
lines = patch.lines.drop(4) | |
current_line = 0 | |
output = [] | |
lines.each do |line| | |
next if line =~ /^diff/ | |
if line.start_with? '@@' | |
line.match /^@@\s\-\d+,\d+\s\+(\d+),(\d+)\s/ do | |
start = $1.to_i | |
lines = $2.to_i | |
chunk_lines = current_lines.take(start - current_line) | |
# output all lines to this point | |
if !output.empty? and output.last and output.last[:mode] == :unchanged | |
output.last[:content].concat(chunk_lines) | |
else | |
output << {:mode => :unchanged, :content => chunk_lines} | |
end | |
current_lines = current_lines.drop(start - current_line + lines - 1) | |
current_line = start + lines | |
end | |
elsif line.start_with? ' ' | |
line = line[1..-1] | |
# output this line | |
if !output.empty? and output.last and output.last[:mode] == :unchanged | |
output.last[:content] << line | |
else | |
output << {:mode => :unchanged, :content => [line]} | |
end | |
elsif line.start_with? '+' | |
line = line[1..-1] | |
# output this change | |
if !output.empty? and output.last and output.last[:mode] == :changed | |
output.last[:insertions] << line | |
else | |
output << {:mode => :changed, :insertions => [line], :deletions => []} | |
end | |
elsif line.start_with? '-' | |
line = line[1..-1] | |
# output this change | |
if !output.empty? and output.last and output.last[:mode] == :changed | |
output.last[:deletions] << line | |
else | |
output << {:mode => :changed, :insertions => [], :deletions => [line]} | |
end | |
end | |
end | |
chunk_lines = current_lines | |
# output all lines to this point | |
if !output.empty? and output.last and output.last[:mode] == :unchanged | |
output.last[:content].concat(chunk_lines) | |
else | |
output << {:mode => :unchanged, :content => chunk_lines} | |
end | |
output.each do |l| | |
if l[:mode] == :changed | |
inserts = l[:insertions].join | |
deletes = l[:deletions].join | |
whitespace = true | |
regex = nil | |
changed = false | |
tolerance = 3 | |
l[:chunks] = [{:mode => :unchanged, :content => ""}] | |
while !inserts.empty? | |
# Find first change. But do so word by word. | |
if whitespace | |
regex = /^(\s*)(\S|$)/m | |
else | |
regex = /^(\S*)(\s|$)/m | |
end | |
# Compare up until a word | |
insert = inserts.match(regex) | |
delete = deletes.match(regex) | |
# Is the word span the same? | |
if insert[1] == delete[1] | |
# Yes. | |
if l[:chunks].last[:mode] == :unchanged | |
# Just continue an unchanged chunk | |
l[:chunks].last[:content] << insert[1] | |
elsif whitespace | |
# Just add whitespace to streak always | |
l[:chunks].last[:streak_content] << insert[1] | |
elsif l[:chunks].last[:streak] == tolerance | |
# We have so many words in a row that are the same, divide those chunks | |
l[:chunks] << {:mode => :unchanged, :content => l[:chunks].last[:streak_content]} | |
else | |
l[:chunks].last[:streak_content] << insert[1] | |
end | |
else | |
# Mismatch. | |
if l[:chunks].last[:mode] == :changed | |
l[:chunks].last[:insert] << l[:chunks].last[:streak_content] | |
l[:chunks].last[:delete] << l[:chunks].last[:streak_content] | |
l[:chunks].last[:streak] = 0 | |
l[:chunks].last[:streak_content] = "" | |
l[:chunks].last[:insert] << insert[1] | |
l[:chunks].last[:delete] << delete[1] | |
else | |
l[:chunks] << {:mode => :changed, | |
:insert => insert[1], | |
:delete => delete[1], | |
:streak => 0, | |
:streak_content => ""} | |
end | |
end | |
inserts = inserts[insert[1].length .. -1] | |
deletes = deletes[delete[1].length .. -1] | |
whitespace = !whitespace | |
end | |
# Finally publish last bit as unchanged if streak content | |
if l[:chunks].last[:mode] == :changed and !l[:chunks].last[:streak_content].empty? | |
l[:chunks] << {:mode => :unchanged, :content => l[:chunks].last[:streak_content]} | |
end | |
end | |
end | |
chunks = [] | |
output.each do |chunk| | |
if chunk[:mode] == :unchanged | |
if chunks.empty? || chunks.last[:mode] == :changed | |
chunks << {:mode => :unchanged, | |
:content => chunk[:content].join} | |
else | |
chunks.last[:content] << chunk[:content].join | |
end | |
else | |
chunk[:chunks].each do |mid_chunk| | |
if mid_chunk[:mode] == :unchanged | |
if chunks.empty? || chunks.last[:mode] == :changed | |
chunks << {:mode => :unchanged, | |
:content => mid_chunk[:content]} | |
else | |
chunks.last[:content] << mid_chunk[:content] | |
end | |
else | |
if chunks.empty? || chunks.last[:mode] == :unchanged | |
chunks << {:mode => :changed, | |
:insert => mid_chunk[:insert], | |
:delete => mid_chunk[:delete]} | |
else | |
chunks.last[:insert] << mid_chunk[:insert] | |
chunks.last[:delete] << mid_chunk[:delete] | |
end | |
end | |
end | |
end | |
end | |
# Woo! chunks contain the things! | |
# | |
# Array of hashes of the form: | |
# { :mode => :unchanged, :content => "some string..."} | |
# | |
# and some of the form: | |
# { :mode => :changed, :insert => "new text", :delete => "old text"} | |
chunks | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment