Skip to content

Instantly share code, notes, and snippets.

@timvdalen
Created March 17, 2014 10:26
Show Gist options
  • Save timvdalen/9597034 to your computer and use it in GitHub Desktop.
Save timvdalen/9597034 to your computer and use it in GitHub Desktop.
Scripts for 2IS55 Assignment 3
BASE_DIR=/home/ubuntu/ccfinder-src
SCRATCH_SPACE=/media/output
CCFX=${BASE_DIR}/ubuntu32/ccfx
PICOSEL=${BASE_DIR}/ubuntu32/picosel
SOURCE=/media/output/source
TEMP=temp
TEMP_CCFX=$(TEMP)/ccfx
TEMP_METRICS=$(TEMP)/metrics
TEMP_RESULTS=$(TEMP)/results
VERSIONS=$(shell ls ${SOURCE})
LOCS=$(addsuffix .csv,$(addprefix loc/,$(VERSIONS)))
VERSIONPAIRS=$(foreach a,$(VERSIONS),$(foreach b,$(VERSIONS), $a-$b))
COVERAGES=$(addsuffix .csv,$(addprefix coverage/,$(VERSIONPAIRS)))
all: coverages.csv
locs.csv: $(LOCS)
cat loc/*.csv > locs.csv
loc/%.csv:
$(eval VERSION=$(basename $(@F)))
mkdir -p loc
echo ${VERSION},`(find ${SOURCE}/${VERSION} -name '*.java' -print0 | xargs -0 cat) | wc -l` > $@
coverages.csv: $(COVERAGES)
cat coverage/*.csv > coverages.csv
tempfolders:
mkdir -p ${TEMP_CCFX}
mkdir -p ${TEMP_METRICS}
mkdir -p ${TEMP_RESULTS}
coverage/%.csv: locs.csv tempfolders
if [ "`echo $(basename $(@F)) | cut -f1 -d-`" -lt "`echo $(basename $(@F)) | cut -f2 -d-`" ] ; then \
mkdir -p coverage ; \
#I had some nice variables called versionA,versionB,locA,locB here, but my make experience isn't enough to eval them at run time, so it didn't work with parallel jobs ; \
#Instead, I have now expanded them in the lines below ; \
#versionA=`echo $(basename $(@F)) | cut -f1 -d-` ; \
#versionB=`echo $(basename $(@F)) | cut -f2 -d-` ; \
#Run analysis, filter results, save metrics ; \
${CCFX} d java -w f-w-g+ -b 50 -dn ${SOURCE}/`echo $(basename $(@F)) | cut -f1 -d-` -is -dn ${SOURCE}/`echo $(basename $(@F)) | cut -f2 -d-` -o ${TEMP_CCFX}/$(basename $(@F)) ; \
${CCFX} m ${TEMP_CCFX}/$(basename $(@F)).ccfxd -c -o ${TEMP_METRICS}/clone-$(basename $(@F)).tsv ; \
${PICOSEL} -o ${TEMP_RESULTS}/ids-$(basename $(@F)).txt from ${TEMP_METRICS}/clone-$(basename $(@F)).tsv select CID where RNR .gt. 0.5 ; \
${CCFX} s ${TEMP_CCFX}/$(basename $(@F)).ccfxd -o ${TEMP_CCFX}/filtered-$(basename $(@F)) -ci ${TEMP_RESULTS}/ids-$(basename $(@F)).txt ; \
${CCFX} m ${TEMP_CCFX}/filtered-$(basename $(@F)).ccfxd -w -o ${TEMP_RESULTS}/clone-$(basename $(@F)).tsv ; \
#Load and sum the CLOC metric, calculate coverage, output ; \
grep `echo $(basename $(@F)) | cut -f1 -d-` locs.csv | cut -f2 -d, > ${TEMP_RESULTS}/locA-$(basename $(@F)).txt ; \
grep `echo $(basename $(@F)) | cut -f2 -d-` locs.csv | cut -f2 -d, > ${TEMP_RESULTS}/locB-$(basename $(@F)).txt ; \
awk 'NR>1{CLOC+=$$4} END {print CLOC}' ${TEMP_RESULTS}/clone-$(basename $(@F)).tsv > ${TEMP_RESULTS}/cloc-$(basename $(@F)).txt ; \
#echo versionA,versionB,coverage > $@ ; \
echo `echo $(basename $(@F)) | cut -f1 -d-`,`echo $(basename $(@F)) | cut -f2 -d-`,`echo "scale=3; \`cat ${TEMP_RESULTS}/cloc-$(basename $(@F)).txt\`/(\`cat ${TEMP_RESULTS}/locA-$(basename $(@F)).txt\` + \`cat ${TEMP_RESULTS}/locB-$(basename $(@F)).txt\`)" | bc -l` > $@ #useless use of cat ; \
#Clean up ; \
rm -f ${TEMP_CCFX}/$(basename $(@F)).ccfxd ; \
rm -f ${TEMP_CCFX}/filtered-$(basename $(@F)).ccfxd ; \
rm -f ${TEMP_METRICS}/clone-$(basename $(@F)).tsv ; \
rm -f ${TEMP_RESULTS}/clone-$(basename $(@F)).tsv ; \
rm -f ${TEMP_RESULTS}/ids-$(basename $(@F)).txt ; \
rm -f ${TEMP_RESULTS}/cloc-$(basename $(@F)).txt ; \
rm -f ${TEMP_RESULTS}/locA-$(basename $(@F)).txt ; \
rm -f ${TEMP_RESULTS}/locB-$(basename $(@F)).txt ; \
fi
clean:
rm -rf loc/
rm -f locs.csv
rm -rf coverage/
rm -f coverages.csv
rm -rf temp/
require 'csv'
require 'set'
def lookup_coverage(versionA, versionB, data)
versionA, versionB = versionB, versionA if versionA.to_i > versionB.to_i
data.each do |row|
return row[2].to_f if versionA == row[0] and versionB == row[1]
end
0.to_f
end
data = CSV.read('results.csv')
# Read in values, determine max, determine unique versions
max = -1.0
versions = SortedSet.new
data.each do |row|
if row[2].to_f > max
max = row[2].to_f
end
versions << row[0]
versions << row[1]
end
versions_a = versions.to_a
# Start writing the matrix
matrix = Array.new
0.upto(versions.size) do |j|
row = Array.new
0.upto(versions.size) do |i|
if j == versions.size
# Bottom row
if i == 0
row << ""
else
row << versions_a[i-1]
end
else
if i == 0
# Left column
row << versions_a[j]
elsif i >= j
row << 0.0
else
# The actual center square
versionA = versions_a[i-1]
versionB = versions_a[j]
# Normalize coverage
coverage = (lookup_coverage(versionA, versionB, data)/max).round(3).to_s
row << coverage
end
end
end
matrix << row
end
CSV.open("processed.csv", "wb") do |csv|
matrix.each do |row|
csv << row
end
end
#!/bin/bash
BASE_DIR=/home/ubuntu/ccfinder-src
CCFX=$BASE_DIR/ubuntu32/ccfx
PICOSEL=$BASE_DIR/ubuntu32/picosel
SOURCE=$BASE_DIR/source/source
OUTPUT=output
TEMP_CCFX=$OUTPUT/ccfx
TEMP_METRICS=$OUTPUT/metrics
TEMP_RESULTS=$OUTPUT/results
VERSIONS=$(ls $SOURCE)
# Book keeping
mkdir -p {$TEMP_CCFX,$TEMP_METRICS,$TEMP_RESULTS}
# Get the sum of the file lengths per version
declare -a version_loc
for version in $(echo $VERSIONS)
do
version_loc[$version]=$( (find $SOURCE/$version -name '*.java' -print0 | xargs -0 cat) | wc -l )
done
# Execute analysis per version pair
echo 'VersionA,VersionB,coverage' > results.csv
for versionA in $VERSIONS
do
for versionB in $VERSIONS
do
if [ "$versionA" -ge "$versionB" ]
then
continue
fi
echo "Analyzing $versionA versus $versionB"
$CCFX d java -w f-w-g+ -b 50 -dn $SOURCE/$versionA -is -dn $SOURCE/$versionB -o $TEMP_CCFX/$versionA-$versionB
$CCFX m $TEMP_CCFX/$versionA-$versionB.ccfxd -c -o $TEMP_METRICS/clone-$versionA-$versionB.tsv
$PICOSEL -o $TEMP_RESULTS/ids-$versionA-$versionB.txt from $TEMP_METRICS/clone-$versionA-$versionB.tsv select CID where RNR .gt. 0.5
$CCFX s $TEMP_CCFX/$versionA-$versionB.ccfxd -o $TEMP_CCFX/filtered-$versionA-$versionB -ci $TEMP_RESULTS/ids-$versionA-$versionB.txt
$CCFX m $TEMP_CCFX/filtered-$versionA-$versionB.ccfxd -w -o $TEMP_RESULTS/clone-$versionA-$versionB.tsv
loc_clone=$(awk 'NR>1{CLOC+=$4} END {print CLOC}' $TEMP_RESULTS/clone-$versionA-$versionB.tsv)
coverage=$(echo "scale=3; $loc_clone/(${version_loc[$versionA]} + ${version_loc[$versionB]})" | bc -l)
echo "$versionA,$versionB,$coverage" >> results.csv
# Clean up
rm $TEMP_CCFX/{filtered-,}$versionA-$versionB.ccfxd
rm $TEMP_METRICS/clone-$versionA-$versionB.tsv
rm $TEMP_RESULTS/ids-$versionA-$versionB.txt
rm -f $OUTPUT/*.tmp
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment