Skip to content

Instantly share code, notes, and snippets.

@wilkie
Last active December 15, 2015 17:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wilkie/5299853 to your computer and use it in GitHub Desktop.
Save wilkie/5299853 to your computer and use it in GitHub Desktop.
Retrieves a nice word based diff of a file in git within ruby.
require_relative 'git_diff_word_chunks'
git_diff_word_chunks('some_file', 'HEAD^1').each do |chunk|
if chunk[:mode] == :changed
print "<del>#{chunk[:delete]}</del><ins>#{chunk[:insert]}</ins>"
else
print chunk[:content]
end
end
source 'https://rubygems.org'
gem 'git', '~> 1.2.5'
require 'bundler'
Bundler.require
def git_diff_word_chunks(file, commit_old = 'HEAD^1', commit_new = 'HEAD')
g = Git.open('.')
diff = g.diff(commit_old, commit_new).path(file)
patch = diff.to_s
# Get current file
file = open(file).read
current_lines = file.lines
# interpret patch
start = 0
lines = 0
lines = patch.lines.drop(4)
current_line = 0
output = []
lines.each do |line|
next if line =~ /^diff/
if line.start_with? '@@'
line.match /^@@\s\-\d+,\d+\s\+(\d+),(\d+)\s/ do
start = $1.to_i
lines = $2.to_i
chunk_lines = current_lines.take(start - current_line)
# output all lines to this point
if !output.empty? and output.last and output.last[:mode] == :unchanged
output.last[:content].concat(chunk_lines)
else
output << {:mode => :unchanged, :content => chunk_lines}
end
current_lines = current_lines.drop(start - current_line + lines - 1)
current_line = start + lines
end
elsif line.start_with? ' '
line = line[1..-1]
# output this line
if !output.empty? and output.last and output.last[:mode] == :unchanged
output.last[:content] << line
else
output << {:mode => :unchanged, :content => [line]}
end
elsif line.start_with? '+'
line = line[1..-1]
# output this change
if !output.empty? and output.last and output.last[:mode] == :changed
output.last[:insertions] << line
else
output << {:mode => :changed, :insertions => [line], :deletions => []}
end
elsif line.start_with? '-'
line = line[1..-1]
# output this change
if !output.empty? and output.last and output.last[:mode] == :changed
output.last[:deletions] << line
else
output << {:mode => :changed, :insertions => [], :deletions => [line]}
end
end
end
chunk_lines = current_lines
# output all lines to this point
if !output.empty? and output.last and output.last[:mode] == :unchanged
output.last[:content].concat(chunk_lines)
else
output << {:mode => :unchanged, :content => chunk_lines}
end
output.each do |l|
if l[:mode] == :changed
inserts = l[:insertions].join
deletes = l[:deletions].join
whitespace = true
regex = nil
changed = false
tolerance = 3
l[:chunks] = [{:mode => :unchanged, :content => ""}]
while !inserts.empty?
# Find first change. But do so word by word.
if whitespace
regex = /^(\s*)(\S|$)/m
else
regex = /^(\S*)(\s|$)/m
end
# Compare up until a word
insert = inserts.match(regex)
delete = deletes.match(regex)
# Is the word span the same?
if insert[1] == delete[1]
# Yes.
if l[:chunks].last[:mode] == :unchanged
# Just continue an unchanged chunk
l[:chunks].last[:content] << insert[1]
elsif whitespace
# Just add whitespace to streak always
l[:chunks].last[:streak_content] << insert[1]
elsif l[:chunks].last[:streak] == tolerance
# We have so many words in a row that are the same, divide those chunks
l[:chunks] << {:mode => :unchanged, :content => l[:chunks].last[:streak_content]}
else
l[:chunks].last[:streak_content] << insert[1]
end
else
# Mismatch.
if l[:chunks].last[:mode] == :changed
l[:chunks].last[:insert] << l[:chunks].last[:streak_content]
l[:chunks].last[:delete] << l[:chunks].last[:streak_content]
l[:chunks].last[:streak] = 0
l[:chunks].last[:streak_content] = ""
l[:chunks].last[:insert] << insert[1]
l[:chunks].last[:delete] << delete[1]
else
l[:chunks] << {:mode => :changed,
:insert => insert[1],
:delete => delete[1],
:streak => 0,
:streak_content => ""}
end
end
inserts = inserts[insert[1].length .. -1]
deletes = deletes[delete[1].length .. -1]
whitespace = !whitespace
end
# Finally publish last bit as unchanged if streak content
if l[:chunks].last[:mode] == :changed and !l[:chunks].last[:streak_content].empty?
l[:chunks] << {:mode => :unchanged, :content => l[:chunks].last[:streak_content]}
end
end
end
chunks = []
output.each do |chunk|
if chunk[:mode] == :unchanged
if chunks.empty? || chunks.last[:mode] == :changed
chunks << {:mode => :unchanged,
:content => chunk[:content].join}
else
chunks.last[:content] << chunk[:content].join
end
else
chunk[:chunks].each do |mid_chunk|
if mid_chunk[:mode] == :unchanged
if chunks.empty? || chunks.last[:mode] == :changed
chunks << {:mode => :unchanged,
:content => mid_chunk[:content]}
else
chunks.last[:content] << mid_chunk[:content]
end
else
if chunks.empty? || chunks.last[:mode] == :unchanged
chunks << {:mode => :changed,
:insert => mid_chunk[:insert],
:delete => mid_chunk[:delete]}
else
chunks.last[:insert] << mid_chunk[:insert]
chunks.last[:delete] << mid_chunk[:delete]
end
end
end
end
end
# Woo! chunks contain the things!
#
# Array of hashes of the form:
# { :mode => :unchanged, :content => "some string..."}
#
# and some of the form:
# { :mode => :changed, :insert => "new text", :delete => "old text"}
chunks
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment