mikeggrant-eumetsat/isilon_recursive_fsa.sh

## isilon_recursive_fsa.sh
#!/bin/bash
#
# if you have to manage an EMC Isilon cluster and want to use the
# File System Analytics (FSA) tool to do a "du --depth X", you'll quickly
# realise they unbelievably haven't implemented a recursive scan.
# This script does that.
#
# Must be run on the isilon cluster machines with sufficient privileges to
# access the FSA reports
#
# known issues:
#  - probably won't like funny characters in directory names..
#
# Refs:
#  https://community.emc.com/community/products/isilon/blog/2016/07/25/insightiq-iiqdataexport-utility-under-onefs-v800
#  https://thesanguy.com/2018/01/09/insightiq-data-export-utility/

if [ $# -ne 4 ] ; then
    echo "Usage: $0 CLUSTER_NAME BASE_DIR MAX_DEPTH OUTPUT_FILE" >&2
    echo >&2
    echo "  CLUSTER_NAME = tcenas, eumetsat [=DSNNAS]" >&2
    echo "  BASE_DIR = /ifs/ERA-CLIM/Repro" >&2
    echo "  MAX_DEPTH = depth to scan to, limited also by FSA resolution" >&2
    echo "  OUTPUT_FILE = where to write the final outputs to" >&2
    echo >&2
    echo "e.g. $0 eumetsat /ifs/ERA-CLIM/Repro 2 results.csv" >&2
    exit 1
fi

CLUSTER_NAME=$1

# must be /ifs/MODULE/dir; verify this
BASE_DIR=$2
echo $BASE_DIR | grep -q '^/ifs/[^/]*/[^/]'
if [ $? -ne 0 ] ; then
    echo "BASE_DIR must be at 3 levels deep (e.g. /ifs/MODULE/xxx) because the FSA export tool requires a different usage for the MODULE level" >&2
    echo "(actually, I've not tested this, so maybe it works - edit the script and try if you like..)" >&2
    exit 1
fi

MAX_DEPTH=$3
OUTPUT_FILE=$4

# make a unique place to dump temporary files
TMPOUT=$(mktemp -d)

# need to get the id number of the latest File System Analytics (FSA) report
# output looks like this
# get the last id
#    Available Reports for: tme-sandbox Time Zone: EDT
#================================================================================
#    |ID |FSA Job Start                |FSA Job End               |Size        |
#================================================================================
#    |473 |Jun 10 2016, 10:00 PM        |Jun 10 2016, 10:30 PM     |92.933G     |
#....
#--------------------------------------------------------------------------------
#    |492 |Jun 13 2016, 10:00 PM        |Jun 13 2016, 10:32 PM     |4.794G      |
#--------------------------------------------------------------------------------
#    |498 |Jun 14 2016, 10:00 PM        |Jun 14 2016, 10:30 PM     |4.816G      |
#================================================================================
#(space/empty line)
iiq_data_export fsa list --reports ${CLUSTER_NAME} > ${TMPOUT}/reports.txt

REPORT_ID=$(tail -n 3 ${TMPOUT}/reports.txt | head -n 1 | cut -f2 -d\|)

echo "Using report id $REPORT_ID ($(tail -n 3 ${TMPOUT}/reports.txt | head -n 1))"

# first get the base dir contents
iiq_data_export fsa export -c ${CLUSTER_NAME} --data-module directories -o ${REPORT_ID} -r "directory:${BASE_DIR}" -n ${TMPOUT}/basedir_with_header.csv
# extract header for later
head -n 1 ${TMPOUT}/basedir_with_header.csv > ${TMPOUT}/header.csv
# strip header for following work
tail -n +2 ${TMPOUT}/basedir_with_header.csv > ${TMPOUT}/level0.csv

# output of all these reports looks like
#path[directory:/ifs/ERA-CLIM/Repro_Temp/mviri/],dir_cnt (count),file_cnt (count),ads_cnt,other_cnt (count),log_size_sum (bytes),phys_size_sum (bytes),log_size_sum_overflow,report_date: 1558306942
#/ifs/ERA-CLIM/Repro/mviri/level0,927,0,0,967506,172233718,2539652608,0
#/ifs/ERA-CLIM/Repro/mviri/level1,894,0,0,933778,166229545,2468796928,0
#/ifs/ERA-CLIM/Repro/mviri/level2,44868,2535868,0,2,1246000052970,2271269414912,0

# for each depth after the first, request reports on each item listed in the previous depth
for depth in $(seq 1 ${MAX_DEPTH}); do
    echo Depth $depth
    # scan through previous report, request dumps on each directory listed and combine into a single report for this level
    for dir in $(cut -f1 -d, ${TMPOUT}/level$(($depth - 1)).csv); do
        iiq_data_export fsa export -c ${CLUSTER_NAME} --data-module directories -o ${REPORT_ID} -r "directory:${dir}" -n ${TMPOUT}/temp_fsa_dump.csv
        tail -n +2 ${TMPOUT}/temp_fsa_dump.csv >> ${TMPOUT}/level${depth}.csv
    done
    rm -f ${TMPOUT}/temp_fsa_dump.csv
done

# final step, combine all levels, sort and add the header.  Dump to stdout.
cat ${TMPOUT}/header.csv > $OUTPUT_FILE
sort ${TMPOUT}/level*.csv >> $OUTPUT_FILE

# clean up
rm -rf $TMPOUT
	#!/bin/bash
	#
	# if you have to manage an EMC Isilon cluster and want to use the
	# File System Analytics (FSA) tool to do a "du --depth X", you'll quickly
	# realise they unbelievably haven't implemented a recursive scan.
	# This script does that.
	#
	# Must be run on the isilon cluster machines with sufficient privileges to
	# access the FSA reports
	#
	# known issues:
	# - probably won't like funny characters in directory names..
	#
	# Refs:
	# https://community.emc.com/community/products/isilon/blog/2016/07/25/insightiq-iiqdataexport-utility-under-onefs-v800
	# https://thesanguy.com/2018/01/09/insightiq-data-export-utility/

	if [ $# -ne 4 ] ; then
	echo "Usage: $0 CLUSTER_NAME BASE_DIR MAX_DEPTH OUTPUT_FILE" >&2
	echo >&2
	echo " CLUSTER_NAME = tcenas, eumetsat [=DSNNAS]" >&2
	echo " BASE_DIR = /ifs/ERA-CLIM/Repro" >&2
	echo " MAX_DEPTH = depth to scan to, limited also by FSA resolution" >&2
	echo " OUTPUT_FILE = where to write the final outputs to" >&2
	echo >&2
	echo "e.g. $0 eumetsat /ifs/ERA-CLIM/Repro 2 results.csv" >&2
	exit 1
	fi

	CLUSTER_NAME=$1

	# must be /ifs/MODULE/dir; verify this
	BASE_DIR=$2
	echo $BASE_DIR \| grep -q '^/ifs/[^/]*/[^/]'
	if [ $? -ne 0 ] ; then
	echo "BASE_DIR must be at 3 levels deep (e.g. /ifs/MODULE/xxx) because the FSA export tool requires a different usage for the MODULE level" >&2
	echo "(actually, I've not tested this, so maybe it works - edit the script and try if you like..)" >&2
	exit 1
	fi

	MAX_DEPTH=$3
	OUTPUT_FILE=$4

	# make a unique place to dump temporary files
	TMPOUT=$(mktemp -d)

	# need to get the id number of the latest File System Analytics (FSA) report
	# output looks like this
	# get the last id
	# Available Reports for: tme-sandbox Time Zone: EDT
	#================================================================================
	# \|ID \|FSA Job Start \|FSA Job End \|Size \|
	#================================================================================
	# \|473 \|Jun 10 2016, 10:00 PM \|Jun 10 2016, 10:30 PM \|92.933G \|
	#....
	#--------------------------------------------------------------------------------
	# \|492 \|Jun 13 2016, 10:00 PM \|Jun 13 2016, 10:32 PM \|4.794G \|
	#--------------------------------------------------------------------------------
	# \|498 \|Jun 14 2016, 10:00 PM \|Jun 14 2016, 10:30 PM \|4.816G \|
	#================================================================================
	#(space/empty line)
	iiq_data_export fsa list --reports ${CLUSTER_NAME} > ${TMPOUT}/reports.txt

	REPORT_ID=$(tail -n 3 ${TMPOUT}/reports.txt \| head -n 1 \| cut -f2 -d\\|)

	echo "Using report id $REPORT_ID ($(tail -n 3 ${TMPOUT}/reports.txt \| head -n 1))"

	# first get the base dir contents
	iiq_data_export fsa export -c ${CLUSTER_NAME} --data-module directories -o ${REPORT_ID} -r "directory:${BASE_DIR}" -n ${TMPOUT}/basedir_with_header.csv
	# extract header for later
	head -n 1 ${TMPOUT}/basedir_with_header.csv > ${TMPOUT}/header.csv
	# strip header for following work
	tail -n +2 ${TMPOUT}/basedir_with_header.csv > ${TMPOUT}/level0.csv

	# output of all these reports looks like
	#path[directory:/ifs/ERA-CLIM/Repro_Temp/mviri/],dir_cnt (count),file_cnt (count),ads_cnt,other_cnt (count),log_size_sum (bytes),phys_size_sum (bytes),log_size_sum_overflow,report_date: 1558306942
	#/ifs/ERA-CLIM/Repro/mviri/level0,927,0,0,967506,172233718,2539652608,0
	#/ifs/ERA-CLIM/Repro/mviri/level1,894,0,0,933778,166229545,2468796928,0
	#/ifs/ERA-CLIM/Repro/mviri/level2,44868,2535868,0,2,1246000052970,2271269414912,0

	# for each depth after the first, request reports on each item listed in the previous depth
	for depth in $(seq 1 ${MAX_DEPTH}); do
	echo Depth $depth
	# scan through previous report, request dumps on each directory listed and combine into a single report for this level
	for dir in $(cut -f1 -d, ${TMPOUT}/level$(($depth - 1)).csv); do
	iiq_data_export fsa export -c ${CLUSTER_NAME} --data-module directories -o ${REPORT_ID} -r "directory:${dir}" -n ${TMPOUT}/temp_fsa_dump.csv
	tail -n +2 ${TMPOUT}/temp_fsa_dump.csv >> ${TMPOUT}/level${depth}.csv
	done
	rm -f ${TMPOUT}/temp_fsa_dump.csv
	done

	# final step, combine all levels, sort and add the header. Dump to stdout.
	cat ${TMPOUT}/header.csv > $OUTPUT_FILE
	sort ${TMPOUT}/level*.csv >> $OUTPUT_FILE

	# clean up
	rm -rf $TMPOUT