wilkie/Gemfile

## example.rb
require_relative 'git_diff_word_chunks'

git_diff_word_chunks('some_file', 'HEAD^1').each do |chunk|
  if chunk[:mode] == :changed
    print "<del>#{chunk[:delete]}</del><ins>#{chunk[:insert]}</ins>"
  else
    print chunk[:content]
  end
end

## Gemfile
source 'https://rubygems.org'

gem 'git', '~> 1.2.5'

## git_diff_word_chunks.rb
require 'bundler'
Bundler.require

def git_diff_word_chunks(file, commit_old = 'HEAD^1', commit_new = 'HEAD')
  g = Git.open('.')

  diff = g.diff(commit_old, commit_new).path(file)
  patch = diff.to_s

  # Get current file
  file = open(file).read
  current_lines = file.lines

  # interpret patch
  start = 0
  lines = 0
  lines = patch.lines.drop(4)

  current_line = 0
  output = []

  lines.each do |line|
    next if line =~ /^diff/

    if line.start_with? '@@'
      line.match /^@@\s\-\d+,\d+\s\+(\d+),(\d+)\s/ do
        start = $1.to_i
        lines = $2.to_i

        chunk_lines = current_lines.take(start - current_line)

        # output all lines to this point
        if !output.empty? and output.last and output.last[:mode] == :unchanged
          output.last[:content].concat(chunk_lines)
        else
          output << {:mode => :unchanged, :content => chunk_lines}
        end

        current_lines = current_lines.drop(start - current_line + lines - 1)
        current_line = start + lines
      end
    elsif line.start_with? ' '
      line = line[1..-1]
      # output this line
      if !output.empty? and output.last and output.last[:mode] == :unchanged
        output.last[:content] << line
      else
        output << {:mode => :unchanged, :content => [line]}
      end
    elsif line.start_with? '+'
      line = line[1..-1]
      # output this change
      if !output.empty? and output.last and output.last[:mode] == :changed
        output.last[:insertions] << line
      else
        output << {:mode => :changed, :insertions => [line], :deletions => []}
      end
    elsif line.start_with? '-'
      line = line[1..-1]
      # output this change
      if !output.empty? and output.last and output.last[:mode] == :changed
        output.last[:deletions] << line
      else
        output << {:mode => :changed, :insertions => [], :deletions => [line]}
      end
    end
  end

  chunk_lines = current_lines

  # output all lines to this point
  if !output.empty? and output.last and output.last[:mode] == :unchanged
    output.last[:content].concat(chunk_lines)
  else
    output << {:mode => :unchanged, :content => chunk_lines}
  end

  output.each do |l|
    if l[:mode] == :changed
      inserts = l[:insertions].join
      deletes = l[:deletions].join
      whitespace = true
      regex = nil
      changed = false
      tolerance = 3

      l[:chunks] = [{:mode => :unchanged, :content => ""}]

      while !inserts.empty?
        # Find first change. But do so word by word.
        if whitespace
          regex = /^(\s*)(\S|$)/m
        else
          regex = /^(\S*)(\s|$)/m
        end

        # Compare up until a word
        insert = inserts.match(regex)
        delete = deletes.match(regex)

        # Is the word span the same?
        if insert[1] == delete[1]
          # Yes.
          if l[:chunks].last[:mode] == :unchanged
            # Just continue an unchanged chunk
            l[:chunks].last[:content] << insert[1]
          elsif whitespace
            # Just add whitespace to streak always
            l[:chunks].last[:streak_content] << insert[1]
          elsif l[:chunks].last[:streak] == tolerance
            # We have so many words in a row that are the same, divide those chunks
            l[:chunks] << {:mode => :unchanged, :content => l[:chunks].last[:streak_content]}
          else
            l[:chunks].last[:streak_content] << insert[1]
          end
        else
          # Mismatch.
          if l[:chunks].last[:mode] == :changed
            l[:chunks].last[:insert] << l[:chunks].last[:streak_content]
            l[:chunks].last[:delete] << l[:chunks].last[:streak_content]
            l[:chunks].last[:streak] = 0
            l[:chunks].last[:streak_content] = ""
            l[:chunks].last[:insert] << insert[1]
            l[:chunks].last[:delete] << delete[1]
          else
            l[:chunks] << {:mode   => :changed,
                           :insert => insert[1],
                           :delete => delete[1],
                           :streak => 0,
                           :streak_content => ""}
          end
        end
        inserts = inserts[insert[1].length .. -1]
        deletes = deletes[delete[1].length .. -1]

        whitespace = !whitespace
      end

      # Finally publish last bit as unchanged if streak content
      if l[:chunks].last[:mode] == :changed and !l[:chunks].last[:streak_content].empty?
        l[:chunks] << {:mode => :unchanged, :content => l[:chunks].last[:streak_content]}
      end
    end
  end

  chunks = []
  output.each do |chunk|
    if chunk[:mode] == :unchanged
      if chunks.empty? || chunks.last[:mode] == :changed
        chunks << {:mode    => :unchanged,
                   :content => chunk[:content].join}
      else
        chunks.last[:content] << chunk[:content].join
      end
    else
      chunk[:chunks].each do |mid_chunk|
        if mid_chunk[:mode] == :unchanged
          if chunks.empty? || chunks.last[:mode] == :changed
            chunks << {:mode => :unchanged,
                       :content => mid_chunk[:content]}
          else
            chunks.last[:content] << mid_chunk[:content]
          end
        else
          if chunks.empty? || chunks.last[:mode] == :unchanged
            chunks << {:mode => :changed,
                       :insert => mid_chunk[:insert],
                       :delete => mid_chunk[:delete]}
          else
            chunks.last[:insert] << mid_chunk[:insert]
            chunks.last[:delete] << mid_chunk[:delete]
          end
        end
      end
    end
  end

  # Woo! chunks contain the things!
  #
  # Array of hashes of the form:
  # { :mode => :unchanged, :content => "some string..."}
  #
  # and some of the form:
  # { :mode => :changed, :insert => "new text", :delete => "old text"}

  chunks
end
	require_relative 'git_diff_word_chunks'

	git_diff_word_chunks('some_file', 'HEAD^1').each do \|chunk\|
	if chunk[:mode] == :changed
	print "<del>#{chunk[:delete]}</del><ins>#{chunk[:insert]}</ins>"
	else
	print chunk[:content]
	end
	end
	require 'bundler'
	Bundler.require

	def git_diff_word_chunks(file, commit_old = 'HEAD^1', commit_new = 'HEAD')
	g = Git.open('.')

	diff = g.diff(commit_old, commit_new).path(file)
	patch = diff.to_s

	# Get current file
	file = open(file).read
	current_lines = file.lines

	# interpret patch
	start = 0
	lines = 0
	lines = patch.lines.drop(4)

	current_line = 0
	output = []

	lines.each do \|line\|
	next if line =~ /^diff/

	if line.start_with? '@@'
	line.match /^@@\s\-\d+,\d+\s\+(\d+),(\d+)\s/ do
	start = $1.to_i
	lines = $2.to_i

	chunk_lines = current_lines.take(start - current_line)

	# output all lines to this point
	if !output.empty? and output.last and output.last[:mode] == :unchanged
	output.last[:content].concat(chunk_lines)
	else
	output << {:mode => :unchanged, :content => chunk_lines}
	end

	current_lines = current_lines.drop(start - current_line + lines - 1)
	current_line = start + lines
	end
	elsif line.start_with? ' '
	line = line[1..-1]
	# output this line
	if !output.empty? and output.last and output.last[:mode] == :unchanged
	output.last[:content] << line
	else
	output << {:mode => :unchanged, :content => [line]}
	end
	elsif line.start_with? '+'
	line = line[1..-1]
	# output this change
	if !output.empty? and output.last and output.last[:mode] == :changed
	output.last[:insertions] << line
	else
	output << {:mode => :changed, :insertions => [line], :deletions => []}
	end
	elsif line.start_with? '-'
	line = line[1..-1]
	# output this change
	if !output.empty? and output.last and output.last[:mode] == :changed
	output.last[:deletions] << line
	else
	output << {:mode => :changed, :insertions => [], :deletions => [line]}
	end
	end
	end

	chunk_lines = current_lines

	# output all lines to this point
	if !output.empty? and output.last and output.last[:mode] == :unchanged
	output.last[:content].concat(chunk_lines)
	else
	output << {:mode => :unchanged, :content => chunk_lines}
	end

	output.each do \|l\|
	if l[:mode] == :changed
	inserts = l[:insertions].join
	deletes = l[:deletions].join
	whitespace = true
	regex = nil
	changed = false
	tolerance = 3

	l[:chunks] = [{:mode => :unchanged, :content => ""}]

	while !inserts.empty?
	# Find first change. But do so word by word.
	if whitespace
	regex = /^(\s*)(\S\|$)/m
	else
	regex = /^(\S*)(\s\|$)/m
	end

	# Compare up until a word
	insert = inserts.match(regex)
	delete = deletes.match(regex)

	# Is the word span the same?
	if insert[1] == delete[1]
	# Yes.
	if l[:chunks].last[:mode] == :unchanged
	# Just continue an unchanged chunk
	l[:chunks].last[:content] << insert[1]
	elsif whitespace
	# Just add whitespace to streak always
	l[:chunks].last[:streak_content] << insert[1]
	elsif l[:chunks].last[:streak] == tolerance
	# We have so many words in a row that are the same, divide those chunks
	l[:chunks] << {:mode => :unchanged, :content => l[:chunks].last[:streak_content]}
	else
	l[:chunks].last[:streak_content] << insert[1]
	end
	else
	# Mismatch.
	if l[:chunks].last[:mode] == :changed
	l[:chunks].last[:insert] << l[:chunks].last[:streak_content]
	l[:chunks].last[:delete] << l[:chunks].last[:streak_content]
	l[:chunks].last[:streak] = 0
	l[:chunks].last[:streak_content] = ""
	l[:chunks].last[:insert] << insert[1]
	l[:chunks].last[:delete] << delete[1]
	else
	l[:chunks] << {:mode => :changed,
	:insert => insert[1],
	:delete => delete[1],
	:streak => 0,
	:streak_content => ""}
	end
	end
	inserts = inserts[insert[1].length .. -1]
	deletes = deletes[delete[1].length .. -1]

	whitespace = !whitespace
	end

	# Finally publish last bit as unchanged if streak content
	if l[:chunks].last[:mode] == :changed and !l[:chunks].last[:streak_content].empty?
	l[:chunks] << {:mode => :unchanged, :content => l[:chunks].last[:streak_content]}
	end
	end
	end

	chunks = []
	output.each do \|chunk\|
	if chunk[:mode] == :unchanged
	if chunks.empty? \|\| chunks.last[:mode] == :changed
	chunks << {:mode => :unchanged,
	:content => chunk[:content].join}
	else
	chunks.last[:content] << chunk[:content].join
	end
	else
	chunk[:chunks].each do \|mid_chunk\|
	if mid_chunk[:mode] == :unchanged
	if chunks.empty? \|\| chunks.last[:mode] == :changed
	chunks << {:mode => :unchanged,
	:content => mid_chunk[:content]}
	else
	chunks.last[:content] << mid_chunk[:content]
	end
	else
	if chunks.empty? \|\| chunks.last[:mode] == :unchanged
	chunks << {:mode => :changed,
	:insert => mid_chunk[:insert],
	:delete => mid_chunk[:delete]}
	else
	chunks.last[:insert] << mid_chunk[:insert]
	chunks.last[:delete] << mid_chunk[:delete]
	end
	end
	end
	end
	end

	# Woo! chunks contain the things!
	#
	# Array of hashes of the form:
	# { :mode => :unchanged, :content => "some string..."}
	#
	# and some of the form:
	# { :mode => :changed, :insert => "new text", :delete => "old text"}

	chunks
	end