Skip to content

Instantly share code, notes, and snippets.

@neuhalje
Last active August 29, 2015 14:15
Show Gist options
  • Save neuhalje/bd6295585bd08874ade2 to your computer and use it in GitHub Desktop.
Save neuhalje/bd6295585bd08874ade2 to your computer and use it in GitHub Desktop.
Counts and groups files by file type (extension).
#!/bin/bash
#
# Usage: histo [path]
#
# Counts and groups files by file type (extension).
#
# Example:
# histo /etc
# ...
# bash 5
# db 5
# pem 5
# local 6
# xml 6
# menu 8
# csh 10
# repo 12
# txt 19
# sh 22
# desktop 26
# ctb 29
# tti 29
# kti 67
# ttb 86
# ktb 97
# conf 351
# pp 396
#
#
# Implementation:
# Finds all files in path (default: current directory ) and groups them by filetype. Type detection is done via extension.
#
# Needs Bash > 4.0
#
if [ "$1" == "" ]
then
root=.
else
root=$1
fi
function file_type_by_extension()
{
# http://stackoverflow.com/a/1403489
fullpath="$1"
filename="${fullpath##*/}" # Strip longest match of */ from start
dir="${fullpath:0:${#fullpath} - ${#filename}}" # Substring from 0 thru pos of filename
base="${filename%.[^.]*}" # Strip shortest match of . plus at least one non-dot char from end
ext="${filename:${#base} + 1}" # Substring from len of base thru end
if [[ -z "$base" && -n "$ext" ]]; then # If we have an extension and no base, it's really the base
base=".$ext"
ext=""
fi
echo $ext
}
function file_type_via_file_util()
{
filename="$1"
file -b "$filename"
}
function print_histo()
{
histo=$1
for ft in "${!histo[@]}"
do
if [ ! -z "${histo["$ft"]}" ]
then
echo -e $ft\\t${histo["$ft"]}
fi
done|sort -t$'\t' -b -n --key=2,2
}
declare -A histo
while read filename
do
# Change to file_type_via_file_util to use the 'file' util to get the filetype
ft=$(file_type_by_extension "$filename")
if [[ ! -z "$ft" ]]
then
if [[ -z ${histo["$ft"]} ]]
then
histo["$ft"]=1
else
histo["$ft"]=$(( ${histo["$ft"]} + 1 ))
fi
fi
done < <(find $root -type f)
print_histo $histo
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment