rooreynolds/pivotal_parse.rb

## pivotal_parse.rb
require 'csv'
require 'sqlite3'

def setupDB(db, csv_file)
	db.execute("drop table if exists stories")
	db.execute("create table stories(id, labels, iterationend, created, accepted, size, requester, owner, type)")
	CSV.foreach(File.path(csv_file), :headers => true) do |col|
	   db.execute("insert into stories(id, labels, iterationend, created, accepted, size, requester, owner, type) " +
	   	          "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
	   	          [col['Id'],
	   	          col['Labels'],
	   	          col['Iteration End'] ? Date.parse(col['Iteration End']).to_s : 0,
	   	          Date.parse(col['Created at']).to_s,
	   	          col['Accepted at'] ? Date.parse(col['Accepted at']).to_s : 0,
	   	          col['Estimate'].to_i, col['Requested By'],
	   	          col['Owned By'], col['Story Type']])
	end
	puts "Parsed #{csv_file} and stored in database"
end

def storySizeAndAgePerSprint(db, output_filename)
	File.open(output_filename, 'w') { |file|
		file.puts "iteration end,count,average size,total points,min age,max age,av age"
		db.execute( "select iterationend, " +
		"count(id) as count, " +
		"avg(size) as avsize, " +
		"sum(size) as points, " +
		"min(julianday(accepted) - julianday(created)) as minage, " +
		"max(julianday(accepted) - julianday(created)) as maxage, " +
		"avg(julianday(accepted) - julianday(created)) as avage " +
		"from stories where accepted != 0 " +
		"group by iterationend" ) do |row|
			file.puts "#{row['iterationend']},#{row['count']},#{row['avsize'].round(3)},#{row['points'].round(3)},#{row['minage']},#{row['maxage']},#{row['avage'].round(3)}"
		end
	}
	puts "Written #{output_filename}"
end

def storySizePerSprint(db, as_percentage = false, output_filename)
	File.open(output_filename, 'w') { |file|
		if as_percentage then
			file.puts "iteration end,% 0s,% 1s,% 2s,% 3s,% 5s,% 8s"
		else
			file.puts "iteration end,0s,1s,2s,3s,5s,8s"
		end
		db.execute( "select iterationend, " +
		"count(case when size = 0 then id else null end) as zeros, " +
		"count(case when size = 1 then id else null end) as ones, " +
		"count(case when size = 2 then id else null end) as twos, " +
		"count(case when size = 3 then id else null end) as threes, " +
		"count(case when size = 5 then id else null end) as fives, " +
		"count(case when size = 8 then id else null end) as eights " +
		"from stories where accepted != 0 " +
		"group by iterationend" ) do |row|
			if as_percentage then
				zeros = row['zeros']
				ones = row['ones']
				twos = row['twos']
				threes = row['threes']
				fives = row['fives']
				eights = row['eights']
				total = zeros + ones + twos + threes + fives + eights
				file.puts "#{row['iterationend']},#{(zeros.to_f/total*100).round(3)},#{(ones.to_f/total*100).round(3)},#{(twos.to_f/total*100).round(3)},#{(threes.to_f/total*100).round(3)},#{(fives.to_f/total*100).round(3)},#{(eights.to_f/total*100).round(3)}"
			else
				file.puts "#{row['iterationend']},#{row['zeros']},#{row['ones']},#{row['twos']},#{row['threes']},#{row['fives']},#{row['eights']}"
			end
		end
	}
	puts "Written #{output_filename}"
end

def typePerSprint(db, as_percentage = false, output_filename)
	File.open(output_filename, 'w') { |file|
		if as_percentage
			file.puts "iteration end,% bugs,% chores,% features"
		else
			file.puts "iteration end,bugs,chores,features"
		end
		db.execute( "select iterationend, " +
		"count(case when type = 'feature' then id else null end) as feature, " +
		"count(case when type = 'chore' then id else null end) as chore, " +
		"count(case when type = 'bug' then id else null end) as bug " +
		"from stories where type != 'release' and accepted != 0 group by iterationend" ) do |row|
			if as_percentage then
				features = row['feature']
				chores = row['chore']
				bugs = row['bug']
				total = features + chores + bugs
				file.puts "#{row['iterationend']},#{(bugs.to_f/total*100).round(3)},#{(chores.to_f/total*100).round(3)},#{(features.to_f/total*100).round(3)}"
			else
				file.puts "#{row['iterationend']},#{row['bug']},#{row['chore']},#{row['feature']}"
			end
		end
	}
	puts "Written #{output_filename}"
end

def allLabels(db, output_filename)
	File.open(output_filename, 'w') { |file|
		file.print "label,count"
		labels_hash = Hash.new(0)
		db.execute("select labels,count(labels) as count from stories group by labels order by count desc") do |row|
			count = row['count']
			labels = row['labels']
			labels.split(",").each{|label|
				labels_hash[label.strip] += count
			}
		end
		labels_hash.each{|key,count|
			file.puts "#{key},#{count}"
		}
	}
	puts "Written #{output_filename}"
end


def labelsPerSprint(db, as_percentage = false, tags, output_filename)
	File.open(output_filename, 'w') { |file|
		if as_percentage
			file.print "iteration end,"
			tags.each {|key| file.print "% #{key},"}
			file.print "% other\n"
		else
			file.print "iteration end,", tags.join(','), ",other\n"
		end
	    sql = "select iterationend, "
	    tags.each {|key| sql += "count(case when labels like '%#{key}%' then id else null end) as #{key.gsub(/[- :]/, '')},\n"}
		sql += "count(case when "
		tags.each{|key| sql += "labels not like '%#{key}%' and "}
		sql += "1 = 1 then id else null end) as other \n"
		sql += "from stories where accepted != 0 group by iterationend"
		db.execute(sql) do |row|
			if as_percentage
				total = 0
				tags.each {|key| total+=row[key.gsub(/[- :]/, '')]}
				total += row['other']
				file.print "#{row['iterationend']},"
				tags.each {|key| file.print "#{(row[key.gsub(/[- :]/, '')].to_f/total*100).round(3)},"}
				file.puts (row['other'].to_f/total*100).round(3)
			else
				file.print "#{row['iterationend']},"
				tags.each {|key| file.print "#{row[key.gsub(/[- :]/, '')]},"}
				file.puts row['other']
			end
		end
	}
	puts "Written #{output_filename}"
end

def averageAgeBySize(db, output_filename)
	File.open(output_filename, 'w') { |file|
		file.puts "size,average age"
		db.execute("select size, avg(julianday(accepted) - julianday(created)) as av_age from stories where accepted != 0 group by size" ) do |row|
			file.puts "#{row['size']},#{row['av_age'].round(3)}"
		end
	}
	puts "Written #{output_filename}"
end

def ages(db, output_filename)
	File.open(output_filename, 'w') { |file|
		file.puts "av age done,av age backlog,av age icebox"
		db.execute("select " +
		"avg(case when accepted != 0 then julianday(accepted) - julianday(created) else null end) as avage_done, " +
		"avg(case when (iterationend != 0 and accepted = 0) then julianday(date('now')) - julianday(created) else null end) as avage_backlog, " +
		"avg(case when iterationend = 0 then julianday(date('now')) - julianday(created) else null end) as avage_icebox " +
		"from stories" ) do |row|
			file.puts "#{row['avage_done'].round(2)},#{row['avage_backlog'].round(2)},#{row['avage_icebox'].round(2)}"
		end
	}
	puts "Written #{output_filename}"
end

def lengths(db, output_filename)
	File.open(output_filename, 'w') { |file|
		file.puts "done,backlog,icebox"
		db.execute("select " +
		"count(case when accepted != 0 then id else null end) as count_done, " +
		"count(case when (iterationend != 0 and accepted = 0) then id else null end) as count_backlog, " +
		"count(case when iterationend = 0 then id else null end) as count_icebox " +
		"from stories" ) do |row|
			file.puts "#{row['count_done']},#{row['count_backlog']},#{row['count_icebox']}"
		end
	}
	puts "Written #{output_filename}"
end


csv_file = "mainstream__development__20131213_1126.csv"
labels = ['bfsf', 'fco', 'smart-answer', 'travel advice']

db_file = "#{csv_file}.db"
db = SQLite3::Database.new(db_file)
puts "Opened #{db_file}"
db.results_as_hash = true
setupDB(db, csv_file)
storySizeAndAgePerSprint(db, "1_sizeandage_#{csv_file}")
storySizePerSprint(db, false, "2a_sizepersprint_#{csv_file}")
storySizePerSprint(db, true, "2b_sizepercentpersprint_#{csv_file}")
typePerSprint(db, false, "3a_typepersprint_#{csv_file}")
typePerSprint(db, true, "3b_typepercentpersprint_#{csv_file}")
allLabels(db, "4_labels_#{csv_file}")
labelsPerSprint(db, false, labels, "5a_labelspersprint_#{csv_file}")
labelsPerSprint(db, true, labels, "5b_labelspercentpersprint_#{csv_file}")
averageAgeBySize(db, "6_agebysize_#{csv_file}")
ages(db, "7_ages_#{csv_file}")
lengths(db, "8_lengths_#{csv_file}")

db.close
	require 'csv'
	require 'sqlite3'

	def setupDB(db, csv_file)
	db.execute("drop table if exists stories")
	db.execute("create table stories(id, labels, iterationend, created, accepted, size, requester, owner, type)")
	CSV.foreach(File.path(csv_file), :headers => true) do \|col\|
	db.execute("insert into stories(id, labels, iterationend, created, accepted, size, requester, owner, type) " +
	"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
	[col['Id'],
	col['Labels'],
	col['Iteration End'] ? Date.parse(col['Iteration End']).to_s : 0,
	Date.parse(col['Created at']).to_s,
	col['Accepted at'] ? Date.parse(col['Accepted at']).to_s : 0,
	col['Estimate'].to_i, col['Requested By'],
	col['Owned By'], col['Story Type']])
	end
	puts "Parsed #{csv_file} and stored in database"
	end

	def storySizeAndAgePerSprint(db, output_filename)
	File.open(output_filename, 'w') { \|file\|
	file.puts "iteration end,count,average size,total points,min age,max age,av age"
	db.execute( "select iterationend, " +
	"count(id) as count, " +
	"avg(size) as avsize, " +
	"sum(size) as points, " +
	"min(julianday(accepted) - julianday(created)) as minage, " +
	"max(julianday(accepted) - julianday(created)) as maxage, " +
	"avg(julianday(accepted) - julianday(created)) as avage " +
	"from stories where accepted != 0 " +
	"group by iterationend" ) do \|row\|
	file.puts "#{row['iterationend']},#{row['count']},#{row['avsize'].round(3)},#{row['points'].round(3)},#{row['minage']},#{row['maxage']},#{row['avage'].round(3)}"
	end
	}
	puts "Written #{output_filename}"
	end

	def storySizePerSprint(db, as_percentage = false, output_filename)
	File.open(output_filename, 'w') { \|file\|
	if as_percentage then
	file.puts "iteration end,% 0s,% 1s,% 2s,% 3s,% 5s,% 8s"
	else
	file.puts "iteration end,0s,1s,2s,3s,5s,8s"
	end
	db.execute( "select iterationend, " +
	"count(case when size = 0 then id else null end) as zeros, " +
	"count(case when size = 1 then id else null end) as ones, " +
	"count(case when size = 2 then id else null end) as twos, " +
	"count(case when size = 3 then id else null end) as threes, " +
	"count(case when size = 5 then id else null end) as fives, " +
	"count(case when size = 8 then id else null end) as eights " +
	"from stories where accepted != 0 " +
	"group by iterationend" ) do \|row\|
	if as_percentage then
	zeros = row['zeros']
	ones = row['ones']
	twos = row['twos']
	threes = row['threes']
	fives = row['fives']
	eights = row['eights']
	total = zeros + ones + twos + threes + fives + eights
	file.puts "#{row['iterationend']},#{(zeros.to_f/total100).round(3)},#{(ones.to_f/total100).round(3)},#{(twos.to_f/total100).round(3)},#{(threes.to_f/total100).round(3)},#{(fives.to_f/total100).round(3)},#{(eights.to_f/total100).round(3)}"
	else
	file.puts "#{row['iterationend']},#{row['zeros']},#{row['ones']},#{row['twos']},#{row['threes']},#{row['fives']},#{row['eights']}"
	end
	end
	}
	puts "Written #{output_filename}"
	end

	def typePerSprint(db, as_percentage = false, output_filename)
	File.open(output_filename, 'w') { \|file\|
	if as_percentage
	file.puts "iteration end,% bugs,% chores,% features"
	else
	file.puts "iteration end,bugs,chores,features"
	end
	db.execute( "select iterationend, " +
	"count(case when type = 'feature' then id else null end) as feature, " +
	"count(case when type = 'chore' then id else null end) as chore, " +
	"count(case when type = 'bug' then id else null end) as bug " +
	"from stories where type != 'release' and accepted != 0 group by iterationend" ) do \|row\|
	if as_percentage then
	features = row['feature']
	chores = row['chore']
	bugs = row['bug']
	total = features + chores + bugs
	file.puts "#{row['iterationend']},#{(bugs.to_f/total100).round(3)},#{(chores.to_f/total100).round(3)},#{(features.to_f/total*100).round(3)}"
	else
	file.puts "#{row['iterationend']},#{row['bug']},#{row['chore']},#{row['feature']}"
	end
	end
	}
	puts "Written #{output_filename}"
	end

	def allLabels(db, output_filename)
	File.open(output_filename, 'w') { \|file\|
	file.print "label,count"
	labels_hash = Hash.new(0)
	db.execute("select labels,count(labels) as count from stories group by labels order by count desc") do \|row\|
	count = row['count']
	labels = row['labels']
	labels.split(",").each{\|label\|
	labels_hash[label.strip] += count
	}
	end
	labels_hash.each{\|key,count\|
	file.puts "#{key},#{count}"
	}
	}
	puts "Written #{output_filename}"
	end


	def labelsPerSprint(db, as_percentage = false, tags, output_filename)
	File.open(output_filename, 'w') { \|file\|
	if as_percentage
	file.print "iteration end,"
	tags.each {\|key\| file.print "% #{key},"}
	file.print "% other\n"
	else
	file.print "iteration end,", tags.join(','), ",other\n"
	end
	sql = "select iterationend, "
	tags.each {\|key\| sql += "count(case when labels like '%#{key}%' then id else null end) as #{key.gsub(/[- :]/, '')},\n"}
	sql += "count(case when "
	tags.each{\|key\| sql += "labels not like '%#{key}%' and "}
	sql += "1 = 1 then id else null end) as other \n"
	sql += "from stories where accepted != 0 group by iterationend"
	db.execute(sql) do \|row\|
	if as_percentage
	total = 0
	tags.each {\|key\| total+=row[key.gsub(/[- :]/, '')]}
	total += row['other']
	file.print "#{row['iterationend']},"
	tags.each {\|key\| file.print "#{(row[key.gsub(/[- :]/, '')].to_f/total*100).round(3)},"}
	file.puts (row['other'].to_f/total*100).round(3)
	else
	file.print "#{row['iterationend']},"
	tags.each {\|key\| file.print "#{row[key.gsub(/[- :]/, '')]},"}
	file.puts row['other']
	end
	end
	}
	puts "Written #{output_filename}"
	end

	def averageAgeBySize(db, output_filename)
	File.open(output_filename, 'w') { \|file\|
	file.puts "size,average age"
	db.execute("select size, avg(julianday(accepted) - julianday(created)) as av_age from stories where accepted != 0 group by size" ) do \|row\|
	file.puts "#{row['size']},#{row['av_age'].round(3)}"
	end
	}
	puts "Written #{output_filename}"
	end

	def ages(db, output_filename)
	File.open(output_filename, 'w') { \|file\|
	file.puts "av age done,av age backlog,av age icebox"
	db.execute("select " +
	"avg(case when accepted != 0 then julianday(accepted) - julianday(created) else null end) as avage_done, " +
	"avg(case when (iterationend != 0 and accepted = 0) then julianday(date('now')) - julianday(created) else null end) as avage_backlog, " +
	"avg(case when iterationend = 0 then julianday(date('now')) - julianday(created) else null end) as avage_icebox " +
	"from stories" ) do \|row\|
	file.puts "#{row['avage_done'].round(2)},#{row['avage_backlog'].round(2)},#{row['avage_icebox'].round(2)}"
	end
	}
	puts "Written #{output_filename}"
	end

	def lengths(db, output_filename)
	File.open(output_filename, 'w') { \|file\|
	file.puts "done,backlog,icebox"
	db.execute("select " +
	"count(case when accepted != 0 then id else null end) as count_done, " +
	"count(case when (iterationend != 0 and accepted = 0) then id else null end) as count_backlog, " +
	"count(case when iterationend = 0 then id else null end) as count_icebox " +
	"from stories" ) do \|row\|
	file.puts "#{row['count_done']},#{row['count_backlog']},#{row['count_icebox']}"
	end
	}
	puts "Written #{output_filename}"
	end


	csv_file = "mainstream__development__20131213_1126.csv"
	labels = ['bfsf', 'fco', 'smart-answer', 'travel advice']

	db_file = "#{csv_file}.db"
	db = SQLite3::Database.new(db_file)
	puts "Opened #{db_file}"
	db.results_as_hash = true
	setupDB(db, csv_file)
	storySizeAndAgePerSprint(db, "1_sizeandage_#{csv_file}")
	storySizePerSprint(db, false, "2a_sizepersprint_#{csv_file}")
	storySizePerSprint(db, true, "2b_sizepercentpersprint_#{csv_file}")
	typePerSprint(db, false, "3a_typepersprint_#{csv_file}")
	typePerSprint(db, true, "3b_typepercentpersprint_#{csv_file}")
	allLabels(db, "4_labels_#{csv_file}")
	labelsPerSprint(db, false, labels, "5a_labelspersprint_#{csv_file}")
	labelsPerSprint(db, true, labels, "5b_labelspercentpersprint_#{csv_file}")
	averageAgeBySize(db, "6_agebysize_#{csv_file}")
	ages(db, "7_ages_#{csv_file}")
	lengths(db, "8_lengths_#{csv_file}")

	db.close