Last active
August 29, 2015 14:22
-
-
Save cl4rk3/de65271e98a91ac8d3b6 to your computer and use it in GitHub Desktop.
bash one liners for parsing, counting, and analyzing AA tape manifest
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# there are 73940 GUIDs (assuming that there is one bagit.txt per "package" or GUID) | |
cat cpb-aa.global_lto_report.01.26.2014.csv |grep even_num|grep bagit.txt|sed 's/\//,/g'|awk -F "," '{ print $1","$3","$11 }'|wc -l | |
# creates 3 column list (filemark,tape id,AA GUID) | |
cat cpb-aa.global_lto_report.01.26.2014.csv |grep even_num|grep bagit.txt|sed 's/\//,/g'|awk -F "," '{ print $1","$3","$11 }' > filemarkToTapeToGUID.csv | |
# create 3 column list (tape id,filemark (TAR#),count of packages in TAR) | |
cat filemarkToBoxToGUID.csv|awk -F "," '{ print $1"\t"$2}'|sort|uniq -c|sort -n|awk '{print $3","$2","$1}'|sort -u > tapeTofilemarkTocount.csv | |
# count all files that have more than one file per GUID | |
cat cpb_LTO_even_keepers.07.09.2015.csv | awk -F "," '{ print $4}'|sort|uniq -c|awk '{ print "files,"$1","$2}'|grep -v files,1|sort| awk -F "," '{s+=$2} END {print s}' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment