Skip to content

Instantly share code, notes, and snippets.

Created September 17, 2014 17:46
Show Gist options
  • Save hongjiang/fcb58e67d56142b06038 to your computer and use it in GitHub Desktop.
Save hongjiang/fcb58e67d56142b06038 to your computer and use it in GitHub Desktop.
if [ $# -eq 0 ];then
echo "please enter classpath dir"
exit -1
if [ ! -d "$1" ]; then
echo "not a directory"
exit -2
tmpfile="/tmp/.cp$(date +%s)"
tmphash="/tmp/.hash$(date +%s)"
declare -a files=(`find "$1" -name "*.jar"`)
for ((i=0; i < ${#files[@]}; i++)); do
jarName=`basename ${files[$i]}`
list=`unzip -l ${files[$i]} | awk -v fn=$jarName '/\.class$/{print $NF,fn}'`
size=`echo "$list" | wc -l`
echo $jarName $size >> $tmphash
echo "$list"
done | sort | awk 'NF{
a[$1]++;m[$1]=m[$1]","$2}END{for(i in a) if(a[i] > 1) print i,substr(m[i],2)
}' > $tmpfile
awk '{print $2}' $tmpfile |
awk -F',' '{i=1;for(;i<=NF;i++) for(j=i+1;j<=NF;j++) print $i,$j}' |
sort | uniq -c | sort -nrk1 | while read line; do
dup=${line%% *}
jars=${line#* }
jar1=${jars% *}
jar2=${jars#* }
len_jar1=`grep -F "$jar1" $tmphash | grep ^"$jar1" | awk '{print $2}'`
len_jar2=`grep -F "$jar2" $tmphash | grep ^"$jar2" | awk '{print $2}'`
len=$(($len_jar1 > $len_jar2 ? $len_jar1 : $len_jar2))
per=$(echo "scale=2; $dup/$len" | bc -l)
echo ${per/./} $dup $jar1 $jar2
done | sort -nr -k1 -k2 |
awk 'NR==1{print "Similarity DuplicateClasses File1 File2"}{print "%"$0}'| column -t
sort $tmpfile | awk '{print $1,"\n\t\t",$2}' > $verbose
echo "See $verbose for more details."
rm -f $tmpfile
rm -f $tmphash
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment