Skip to content

Instantly share code, notes, and snippets.

@sergioro9
Last active February 7, 2019 22:35
Show Gist options
  • Save sergioro9/ed45fc524b570c6af4b76d455fc4dce2 to your computer and use it in GitHub Desktop.
Save sergioro9/ed45fc524b570c6af4b76d455fc4dce2 to your computer and use it in GitHub Desktop.
Command line tool to detect language distribution in a codebase, ignore binary or vendored files.
linguistic() {
IFS_ORIGINAL=$IFS
IFS="?"
FILES=($(find -type f -name "*" -not -path "./.git/*" \
-exec sh -c 'printf "%s " "$(du -b {})"' \; \
-exec file -b {} \; ))
SUM=($(sed 's|\./[^ ]*| |g' <<< ${FILES[@]} \
| sort -k2 \
| awk '{v=$1;$1="";s[$0]+=v}END{for(i in s)print s[i] i "?"}' \
))
NON_EXEC=("${SUM[@]//*relocatable*}")
NON_EXEC=("${NON_EXEC[@]//*executable*}")
IFS=$IFS_ORIGINAL
TOTAL_BYTES=$(awk '{split($0,a,","); sum += a[1]} END {print sum}' <<< "${NON_EXEC[@]}")
printf "Total non-executable bytes: %d\n" "$TOTAL_BYTES"
for (( i=0; i<${#NON_EXEC[@]}; i++ )); do
ITEM="${NON_EXEC[i]/$'\n'/}"
BYTES=$(printf "%s\n" "$ITEM" | cut -d' ' -f1 )
TYPE=$(printf "%s\n" "$ITEM" | cut -d' ' -f2- )
DOTS="..."
[ "${#TYPE}" -ge "$((COLUMNS-${#DOTS}))" ] && TYPE="${TYPE::72}..." || :
if [[ $BYTES > 0 ]]; then
PERCENT=$( echo "scale=20; $BYTES / $TOTAL_BYTES * 100" | bc -l )
printf "%5.2f%% \t%s\n" "$PERCENT" "${TYPE}"
fi
done | sort -nr
EXEC=("$(grep 'executable\|relocatable' <<< ${FILES[@]} \
| grep -v 'text' \
| sed 's|.*\(\./[^ ]*\).*|\1|g')")
NUM_EXEC=$(wc -l <<< $EXEC)
if [[ ${#EXEC} > 0 ]]; then
printf "\nTotal number of binary files %d\n" $NUM_EXEC
printf "Binary files:\n"
printf "%s\n" "$EXEC"
else
printf "No binary files found\n"
fi
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment