sriranggd/git_index_file_reader.rb

## git_index_file_reader.rb
#!/usr/bin/env ruby

# ************************************************************************
# A simple ruby script to read the index file of any git repository
# and dump its contents to a file in a readable format.
#
# This is totally a study tool, written when I was trying to understand
# the git internals.
#
# The file strucutre specification considered is this :
# http://git.rsbx.net/Documents/Git_Data_Formats.txt
#
# Note that this doesn't care about the INDEX_ENTRY_EXTENSIONS part.
#
# Author : Srirang G Doddihal (Brahmana)
#
# Feel free to use this and modify it, with no restrictions what-so-ever.
#
# ************************************************************************

require 'pp'

EntryTime = Struct.new(:time, :nsec) do
  def initialize(arr)
    self.time = Time.at(arr[0])
    self.nsec = arr[1]
  end
end

EntryStatInfo = Struct.new(:ctime, :mtime, :device, :inode, :mode, :uid, :gid, :size) do
  def initialize(arr)
    self.ctime = EntryTime.new(arr.slice(0, 2))
    self.mtime = EntryTime.new(arr.slice(2, 2))
    (4..9).each do |i|
      self[members[i - 2]] = arr[i]
    end
  end
end

EntryFlags = Struct.new(:name_length, :stage) do
  def initialize(num)
    self.name_length = num & 0x0fff
    self.stage = num & 0x3000
  end
end

IndexEntry = Struct.new(:stat_info, :id, :flags, :name) do
  def initialize(arr)
    self.stat_info = EntryStatInfo.new(arr.slice(0, 10))
    self.id = arr[10]
    self.flags = EntryFlags.new(arr[11])
  end
end

def index_file_reader(path)
  iFile = File.open(path, 'rb')
  oFile = File.open(path + '_dechiphered_for_study', 'w')

  result = {}

  result[:header] = iFile.read(4) # Reads the string "DIRC"
  result[:version] = iFile.read(4).unpack("N")[0]
  index_entry_count = iFile.read(4).unpack("N")[0]
  result[:index_entry_count] = index_entry_count

  oFile.puts ("Header :")
  oFile.puts (result.pretty_inspect)
  oFile.puts("==========================================================================")

  count = 0
  index_entry_count.times do |i|

    # The following commented lines of code are elegant, but the pretty_inspect output
    # of these strucutres involved isn't really pretty and writing custom pretty_print
    # for each of those is a pain. So the non-elegant Hash based code gives better looking
    # output. Conceptually they are still the same, though the hash based code involves
    # more file reads.
=begin
    entry = IndexEntry.new(iFile.read(62).unpack("N10H40n"))
    entry.name = iFile.read(entry.flags.name_length)
    excess_fill = entry[:flags][:name_length] % 8
    iFile.read(8 - excess_fill ) if (excess_fill > 0 )
=end

    entry = {}
    ############ STAT_INFO #############
    stat_info = {}

    stat_info[:ctime] = {}
    stat_info[:ctime][:time] = Time.at(iFile.read(4).unpack("N")[0]).to_s
    stat_info[:ctime][:nsec] = iFile.read(4).unpack("N")[0]
    stat_info[:mtime] = {}
    stat_info[:mtime][:time] = Time.at(iFile.read(4).unpack("N")[0]).to_s
    stat_info[:mtime][:nsec] = iFile.read(4).unpack("N")[0]

    stat_info[:dev] = iFile.read(4).unpack("N")[0]
    stat_info[:inode] = iFile.read(4).unpack("N")[0]
    stat_info[:mode] = iFile.read(4).unpack("N")[0]
    stat_info[:uid] = iFile.read(4).unpack("N")[0]
    stat_info[:gid] = iFile.read(4).unpack("N")[0]
    stat_info[:size] = iFile.read(4).unpack("N")[0]

    entry[:stat_info] = stat_info

    #entry[:id] = iFile.read(20).each_byte.map { |a| s = a.to_s(16); s = "0"+s if (s.length == 1); s }.join
    entry[:id] = iFile.read(20).unpack("H40")[0]
    entry[:flags] = {}
    flags = iFile.read(2).unpack("n")[0]
    entry[:flags][:name_length] = flags & 0x0fff
    entry[:flags][:stage] = flags & 0x3000
    entry[:name] = iFile.read(entry[:flags][:name_length])

    # Skip the zero-padding, if any
    excess_fill = entry[:flags][:name_length] % 8
    iFile.read(8 - excess_fill ) if (excess_fill > 0 )

    count+= 1
    oFile.puts("Entry #{count}")
    oFile.puts (entry.pretty_inspect)
    oFile.puts("==========================================================================")
  end

  iFile.close
  oFile.close
rescue StandardError => e
  puts e.message
  exit
end

if (ARGV.length < 1)
  puts "Usage : #{__FILE__ } <path-to-index-file>"
  exit
end

if (ARGV.length > 1)
  puts "Warning : Only the first argument is used as the path to the index file to be read. Others are discarded"
end

index_file_reader(ARGV[0])

puts "The contents of the index file are saved at #{ARGV[0]}_deciphered_for_study"

## sample_output.txt
Header :
{:header=>"DIRC", :version=>2, :index_entry_count=>1}
==========================================================================
Entry 1
{:stat_info=>
  {:ctime=>{:time=>"2011-08-14 01:29:03 +0530", :nsec=>780046898},
   :mtime=>{:time=>"2011-08-14 01:29:03 +0530", :nsec=>780046898},
   :dev=>2049,
   :inode=>937315,
   :mode=>33188,
   :uid=>1000,
   :gid=>1000,
   :size=>120},
 :id=>"d4639d8ddadbc592222efc3b42f705b7e369ef58",
 :flags=>{:name_length=>8, :stage=>0},
 :name=>"file1.rb"}
==========================================================================
	#!/usr/bin/env ruby

	# ************************************************************************
	# A simple ruby script to read the index file of any git repository
	# and dump its contents to a file in a readable format.
	#
	# This is totally a study tool, written when I was trying to understand
	# the git internals.
	#
	# The file strucutre specification considered is this :
	# http://git.rsbx.net/Documents/Git_Data_Formats.txt
	#
	# Note that this doesn't care about the INDEX_ENTRY_EXTENSIONS part.
	#
	# Author : Srirang G Doddihal (Brahmana)
	#
	# Feel free to use this and modify it, with no restrictions what-so-ever.
	#
	# ************************************************************************

	require 'pp'

	EntryTime = Struct.new(:time, :nsec) do
	def initialize(arr)
	self.time = Time.at(arr[0])
	self.nsec = arr[1]
	end
	end

	EntryStatInfo = Struct.new(:ctime, :mtime, :device, :inode, :mode, :uid, :gid, :size) do
	def initialize(arr)
	self.ctime = EntryTime.new(arr.slice(0, 2))
	self.mtime = EntryTime.new(arr.slice(2, 2))
	(4..9).each do \|i\|
	self[members[i - 2]] = arr[i]
	end
	end
	end

	EntryFlags = Struct.new(:name_length, :stage) do
	def initialize(num)
	self.name_length = num & 0x0fff
	self.stage = num & 0x3000
	end
	end

	IndexEntry = Struct.new(:stat_info, :id, :flags, :name) do
	def initialize(arr)
	self.stat_info = EntryStatInfo.new(arr.slice(0, 10))
	self.id = arr[10]
	self.flags = EntryFlags.new(arr[11])
	end
	end

	def index_file_reader(path)
	iFile = File.open(path, 'rb')
	oFile = File.open(path + '_dechiphered_for_study', 'w')

	result = {}

	result[:header] = iFile.read(4) # Reads the string "DIRC"
	result[:version] = iFile.read(4).unpack("N")[0]
	index_entry_count = iFile.read(4).unpack("N")[0]
	result[:index_entry_count] = index_entry_count

	oFile.puts ("Header :")
	oFile.puts (result.pretty_inspect)
	oFile.puts("==========================================================================")

	count = 0
	index_entry_count.times do \|i\|

	# The following commented lines of code are elegant, but the pretty_inspect output
	# of these strucutres involved isn't really pretty and writing custom pretty_print
	# for each of those is a pain. So the non-elegant Hash based code gives better looking
	# output. Conceptually they are still the same, though the hash based code involves
	# more file reads.
	=begin
	entry = IndexEntry.new(iFile.read(62).unpack("N10H40n"))
	entry.name = iFile.read(entry.flags.name_length)
	excess_fill = entry[:flags][:name_length] % 8
	iFile.read(8 - excess_fill ) if (excess_fill > 0 )
	=end

	entry = {}
	############ STAT_INFO #############
	stat_info = {}

	stat_info[:ctime] = {}
	stat_info[:ctime][:time] = Time.at(iFile.read(4).unpack("N")[0]).to_s
	stat_info[:ctime][:nsec] = iFile.read(4).unpack("N")[0]
	stat_info[:mtime] = {}
	stat_info[:mtime][:time] = Time.at(iFile.read(4).unpack("N")[0]).to_s
	stat_info[:mtime][:nsec] = iFile.read(4).unpack("N")[0]

	stat_info[:dev] = iFile.read(4).unpack("N")[0]
	stat_info[:inode] = iFile.read(4).unpack("N")[0]
	stat_info[:mode] = iFile.read(4).unpack("N")[0]
	stat_info[:uid] = iFile.read(4).unpack("N")[0]
	stat_info[:gid] = iFile.read(4).unpack("N")[0]
	stat_info[:size] = iFile.read(4).unpack("N")[0]

	entry[:stat_info] = stat_info

	#entry[:id] = iFile.read(20).each_byte.map { \|a\| s = a.to_s(16); s = "0"+s if (s.length == 1); s }.join
	entry[:id] = iFile.read(20).unpack("H40")[0]
	entry[:flags] = {}
	flags = iFile.read(2).unpack("n")[0]
	entry[:flags][:name_length] = flags & 0x0fff
	entry[:flags][:stage] = flags & 0x3000
	entry[:name] = iFile.read(entry[:flags][:name_length])

	# Skip the zero-padding, if any
	excess_fill = entry[:flags][:name_length] % 8
	iFile.read(8 - excess_fill ) if (excess_fill > 0 )

	count+= 1
	oFile.puts("Entry #{count}")
	oFile.puts (entry.pretty_inspect)
	oFile.puts("==========================================================================")
	end

	iFile.close
	oFile.close
	rescue StandardError => e
	puts e.message
	exit
	end

	if (ARGV.length < 1)
	puts "Usage : #{__FILE__ } <path-to-index-file>"
	exit
	end

	if (ARGV.length > 1)
	puts "Warning : Only the first argument is used as the path to the index file to be read. Others are discarded"
	end

	index_file_reader(ARGV[0])

	puts "The contents of the index file are saved at #{ARGV[0]}_deciphered_for_study"
	Header :
	{:header=>"DIRC", :version=>2, :index_entry_count=>1}
	==========================================================================
	Entry 1
	{:stat_info=>
	{:ctime=>{:time=>"2011-08-14 01:29:03 +0530", :nsec=>780046898},
	:mtime=>{:time=>"2011-08-14 01:29:03 +0530", :nsec=>780046898},
	:dev=>2049,
	:inode=>937315,
	:mode=>33188,
	:uid=>1000,
	:gid=>1000,
	:size=>120},
	:id=>"d4639d8ddadbc592222efc3b42f705b7e369ef58",
	:flags=>{:name_length=>8, :stage=>0},
	:name=>"file1.rb"}
	==========================================================================