Last active
December 22, 2015 17:08
-
-
Save drincruz/b7d9b6dc0ccaeff8292c to your computer and use it in GitHub Desktop.
data mining on the command line with bash and python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is the main meat and potatoes of what I really want | |
grep -E DEBUG 2015-10-09.err | grep -E '(anonymous_id|high_scores:)'| sed -e 's/anonymous_id//g' |cut -d ' ' -f 2-| python -c "import fileinput; l0=[int(x.split(' ')[-1].strip()) for x in fileinput.input()];print('searches:{}\thigh_scoring:{}\tratio:{}'.format(l0[0],l0[1],l0[1]/float(l0[0])))" | |
# This is just a bonus to loop through a date range of my log files | |
for d in {0..15}; do today=`date -d "2015-10-09 + $d days" +'%Y-%m-%d'`; echo -n "${today} "; grep -E DEBUG ${today}.err|grep -E '(anonymous_id|high_scores:)'| sed -e 's/anonymous_id//g' |cut -d ' ' -f 2-| python -c "import fileinput; l0=[int(x.split(' ')[-1].strip()) for x in fileinput.input()];print('searches:{}\thigh_scoring:{}\tratio:{}'.format(l0[0],l0[1],l0[1]/float(l0[0])))"; done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment