Skip to content

Instantly share code, notes, and snippets.

@vrthra
Created August 21, 2016 00:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vrthra/17643e92a812dfd3835e8e4208385dcb to your computer and use it in GitHub Desktop.
Save vrthra/17643e92a812dfd3835e8e4208385dcb to your computer and use it in GitHub Desktop.
import csv
import sys
import scipy
import scipy.stats
okProjects = []
with open("data/subjects.csv") as csvfile:
pdata = csv.DictReader(csvfile)
for row in pdata:
if int(row['total.mutants']) <= 0:
continue
if int(row['commit.recent'] <= row['commit.initial']):
continue
okProjects.append(row['project'])
ot_others = {}
my_result = []
with open('data/results.mutants.uniq.csv') as csvfile:
data = csv.DictReader(csvfile)
for row in data:
if row['project'] not in okProjects: continue
commitid = row['commitid']
mid = row['project'] + "," + \
row["file"] + ":" + \
row['class'] + "." + row ['method'] + ":" + row ['line'] + ":" + \
row ['mutant'] + ":" + row ['index']
fullname = row['project'] + ":" + row['class'] + "." + row ['method']
if row['isbug'] == 'OT':
ot_others[fullname] = 1
continue
else:
my_result.append({'fullname': fullname, 'mid': mid, 'commitid': commitid})
# checked: build/my.result.csv
#for i in my_result:
# print i['fullname'], i['mid'], i['commitid']
# what we want: we want to get each method (given in fullname), and for
# each method, compute the number of bugfixes and total mutants.
my_methods = {}
for result in my_result:
method = result['fullname']
mid = result['mid']
commitid = result['commitid']
if method not in my_methods: my_methods[method] = { 'mutants' : set(), 'bugfix' : set()}
my_methods[method]['mutants'].add(mid)
my_methods[method]['bugfix'].add(commitid)
print "data/results.mutants.uniq.csv", len(my_methods.keys())
# checked: build/method.res.summary.csv
with open("fullname:results.mutants.uniq.csv", 'w') as f:
for key in my_methods.keys():
print >> f, key
#print f, key, len(my_methods[key]['mutants']), len(my_methods[key]['bugfix'])
# remember, some of the methods are overloaded. strip them out.
method = []
with open('processed/method.uniq.csv') as csvfile:
data = csv.DictReader(csvfile)
for row in data:
if row['projectname'] not in okProjects: continue
m = {}
l = row['CountLineCode']
if l == '': continue
loc = int(l)
if loc == 0: continue
m['methodloc'] = loc
m['fullname'] = row['projectname'] + ":" + row['Name']
method.append(m)
methods = {}
for m in method:
fullname = m['fullname']
methods[fullname] = m['methodloc']
print "processed/method.uniq.csv:", len(methods.keys())
# checked: build/method.csv
with open("fullname:method.uniq.csv", 'w') as f:
for key in methods.keys():
print >> f, key
# for i in methods.keys():
# print i, methods[i]
# what we want? we have my_methods with fullname as key, and we have
# my_result with fullname. Given that it is not the exact same
# data set, we can either pick the processed/method.uniq.csv as the
# ground truth, and fill in the bug fix count from the
# data/results.mutants.uniq.csv with by filling `0` for those methods
# we dont have an entry in `data/results.mutants.uniq.csv` (because
# it is the exact same project, and the `understand` is run at epoch)
# or we can essentially look at only the intersection of method names
# between `method.uniq.csv` and `results.mutants.uniq.csv`. The reason
# second may be preferable is that some of the methods does not
# produce a mutation, and hence may show no commits, even when there
# are commits if the commit did not overlap with any mutants (because
# `results.mutants.uniq.csv` is primarily a mutant # record db).
# on the other hand, first is plausible and we do not bias the results
# by avoiding zero commits.
# Choice 1. Include zero commit lines.
# - But we do not know if any commits are omited.
joined_method = {}
for m in methods.keys():
v = {}
if m in my_methods:
v['bugfix'] = len(my_methods[m]['bugfix'])
v['mutants'] = len(my_methods[m]['mutants'])
else:
v['bugfix'] = 0
v['mutants'] = 0
v['loc'] = methods[m]
joined_method[m] = v
# Choice 2. Discard zero commits.
# - But we discard the zero commits.
err = []
joined_method2 = {}
for m in methods.keys():
v = {}
if not m in my_methods:
if m not in ot_others:
# does not have a mutant on a commit.
err.append(m)
continue
v['bugfix'] = len(my_methods[m]['bugfix'])
v['mutants'] = len(my_methods[m]['mutants'])
v['loc'] = methods[m]
joined_method2[m] = v
print "matching", len(joined_method2.keys())
with open("py.data.err", 'w') as f:
for e in err:
print >> f,e
# checked:
# for i in joined_method.keys():
# print i, joined_method[i]['loc'], joined_method[i]['bugfix'], joined_method[i]['mutants']
print "Choice 1"
allMutantCounts = [joined_method[i]['mutants'] for i in joined_method.keys()]
allBugs = [joined_method[i]['bugfix'] for i in joined_method.keys()]
allLoc = [joined_method[i]['loc'] for i in joined_method.keys()]
print "Mutant count correlations:"
print "B~Mut Tau:",scipy.stats.kendalltau(allBugs,allMutantCounts,)
print "B~Loc Tau:",scipy.stats.kendalltau(allBugs, allLoc)
print "B~Mut r^2:",scipy.stats.pearsonr(allBugs, allMutantCounts)
print "B~Loc r^2:",scipy.stats.pearsonr(allBugs, allLoc)
print ""
# check the R^ and Tau values using this.
with open("py.data.csv", 'w') as f:
f.write("key,mutants,bugfix,loc\n")
for i in joined_method.keys():
j = joined_method[i]
m = j['mutants']
b = j['bugfix']
l = j['loc']
print >> f, ",".join([i,str(m),str(b),str(l)])
joined_method = joined_method2
print "Choice 2"
allMutantCounts = [joined_method[i]['mutants'] for i in joined_method.keys()]
allBugs = [joined_method[i]['bugfix'] for i in joined_method.keys()]
allLoc = [joined_method[i]['loc'] for i in joined_method.keys()]
print "Mutant count correlations:"
print "B~Mut Tau:",scipy.stats.kendalltau(allBugs,allMutantCounts,)
print "B~Loc Tau:",scipy.stats.kendalltau(allBugs, allLoc)
print "B~Mut r^2:",scipy.stats.pearsonr(allBugs, allMutantCounts)
print "B~Loc r^2:",scipy.stats.pearsonr(allBugs, allLoc)
# check the R^ and Tau values using this.
with open("py.data2.csv", 'w') as f:
f.write("key,mutants,bugfix,loc\n")
for i in joined_method.keys():
j = joined_method[i]
m = j['mutants']
b = j['bugfix']
l = j['loc']
print >> f, ",".join([i,str(m),str(b),str(l)])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment