Last active
August 29, 2015 14:15
-
-
Save neuhalje/bd6295585bd08874ade2 to your computer and use it in GitHub Desktop.
Counts and groups files by file type (extension).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Usage: histo [path] | |
# | |
# Counts and groups files by file type (extension). | |
# | |
# Example: | |
# histo /etc | |
# ... | |
# bash 5 | |
# db 5 | |
# pem 5 | |
# local 6 | |
# xml 6 | |
# menu 8 | |
# csh 10 | |
# repo 12 | |
# txt 19 | |
# sh 22 | |
# desktop 26 | |
# ctb 29 | |
# tti 29 | |
# kti 67 | |
# ttb 86 | |
# ktb 97 | |
# conf 351 | |
# pp 396 | |
# | |
# | |
# Implementation: | |
# Finds all files in path (default: current directory ) and groups them by filetype. Type detection is done via extension. | |
# | |
# Needs Bash > 4.0 | |
# | |
if [ "$1" == "" ] | |
then | |
root=. | |
else | |
root=$1 | |
fi | |
function file_type_by_extension() | |
{ | |
# http://stackoverflow.com/a/1403489 | |
fullpath="$1" | |
filename="${fullpath##*/}" # Strip longest match of */ from start | |
dir="${fullpath:0:${#fullpath} - ${#filename}}" # Substring from 0 thru pos of filename | |
base="${filename%.[^.]*}" # Strip shortest match of . plus at least one non-dot char from end | |
ext="${filename:${#base} + 1}" # Substring from len of base thru end | |
if [[ -z "$base" && -n "$ext" ]]; then # If we have an extension and no base, it's really the base | |
base=".$ext" | |
ext="" | |
fi | |
echo $ext | |
} | |
function file_type_via_file_util() | |
{ | |
filename="$1" | |
file -b "$filename" | |
} | |
function print_histo() | |
{ | |
histo=$1 | |
for ft in "${!histo[@]}" | |
do | |
if [ ! -z "${histo["$ft"]}" ] | |
then | |
echo -e $ft\\t${histo["$ft"]} | |
fi | |
done|sort -t$'\t' -b -n --key=2,2 | |
} | |
declare -A histo | |
while read filename | |
do | |
# Change to file_type_via_file_util to use the 'file' util to get the filetype | |
ft=$(file_type_by_extension "$filename") | |
if [[ ! -z "$ft" ]] | |
then | |
if [[ -z ${histo["$ft"]} ]] | |
then | |
histo["$ft"]=1 | |
else | |
histo["$ft"]=$(( ${histo["$ft"]} + 1 )) | |
fi | |
fi | |
done < <(find $root -type f) | |
print_histo $histo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment