Skip to content

Instantly share code, notes, and snippets.

@dsc
Forked from ottomata/count_seqs.sh
Last active December 11, 2015 16:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dsc/4628582 to your computer and use it in GitHub Desktop.
Save dsc/4628582 to your computer and use it in GitHub Desktop.
# udp2log:
# separate hosts
cd /a/otto/mobile-sleuth.2/udp2log.orig
for host in cp104{1,2,3,4}; do
grep -P "^$host" mobile.2013-01-13_11.45.00-13.15.00.log | awk '{print $2 " " $3}' | sort -n > ../$host.udp2log.seq
done
# hdfs:
# separate hosts
cd /a/otto/mobile-sleuth.2/hdfs.orig
for host in cp104{1,2,3,4}; do
grep -P "^\d+\s+$host" mobile.hdfs.2013-01-23_12.00.00-13.15.00.log | awk '{print $3 " " $4}' | sort -n > ../$host.hdfs.seq
done
cd /a/otto/mobile-sleuth.2/
format_packetloss_stats.py <(
for host in cp104{1,2,3,4}; do
first_seq=$(head -n 1 $host.udp2log.seq | awk '{print $1}') # && echo $first_seq
last_seq=$(tail -n 1 $host.udp2log.seq | awk '{print $1}') # && echo $last_seq
should_be=$(echo "$last_seq - $first_seq + 1" | bc) #&& echo $should_be
# count lines between $first_seq and $last_seq
hdfs_count=$(sed -n -e "/^$first_seq/,/^$last_seq/p" $host.hdfs.seq | wc -l) #&& echo $hdfs_count
udp2log_count=$(sed -n -e "/^$first_seq/,/^$last_seq/p" $host.udp2log.seq | wc -l) #&& echo $udp2log_count
echo "$host:"
echo " should_be: $should_be"; echo " udp2log_count: $udp2log_count"; echo " hdfs_count: $hdfs_count";
done
)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import yaml
from textwrap import dedent
def calcDiff(stats):
n = stats['should_be']
stats['udp2log_percent'] = (stats['udp2log_count'] / float(n)) - 1.0
stats['hdfs_percent'] = (stats['hdfs_count'] / float(n)) - 1.0
stats['udp2log_diff'] = stats['udp2log_count'] - n
stats['hdfs_diff'] = stats['hdfs_count'] - n
return stats
def calcDiffTotal(data):
totals = {
"section" : "totals",
"should_be" : 0,
"udp2log_count" : 0,
"hdfs_count" : 0,
}
for host, stats in data.iteritems():
stats['section'] = host
calcDiff(stats)
for k in 'should_be hdfs_count udp2log_count'.split():
totals[k] += stats[k]
return calcDiff(totals), data
def formatStats(stats):
return dedent("""
{section}:
should_be: {should_be: >10,}
udp2log: {udp2log_count: >10,} ({udp2log_percent: >+5.3%}, {udp2log_diff: >+6,})
hdfs: {hdfs_count: >10,} ({hdfs_percent: >+5.3%}, {hdfs_diff: >+6,})
"""[1:]).format(**stats)
def printStats(data, outfile):
totals, data = calcDiffTotal(data)
outfile.write(formatStats(totals)+'\n')
for host, stats in sorted(data.items()):
outfile.write(formatStats(stats)[:-1]+'\n')
outfile.write('\n')
testData = yaml.load(dedent('''
cp1041:
should_be: 2183234
udp2log_count: 2073716
hdfs_count: 2073716
cp1042:
should_be: 2185238
udp2log_count: 2075082
hdfs_count: 2075081
cp1043:
should_be: 2197387
udp2log_count: 2086935
hdfs_count: 2086935
cp1044:
should_be: 2197839
udp2log_count: 2086864
hdfs_count: 2086864
'''))
if __name__ == '__main__':
import sys, argparse
parser = argparse.ArgumentParser(description="Formats packetloss stats.")
parser.add_argument("-t", "--test", action="store_true", default=False,
help="Use test data [default: %(default)s]")
parser.add_argument('infile', nargs='?', type=argparse.FileType('rU'), default=sys.stdin,
help="Input data. Must be valid YAML.")
parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout)
args = parser.parse_args()
if args.test:
data = testData
else:
data = yaml.load(args.infile)
printStats(data, args.outfile)
awk 'NR-1{if($1!=(_+1))print $2}{_=$1}' cp1044.udp2log.seq | uniq -c
totals:
should_be: 8,763,698
udp2log: 8,322,597 (-5.033%, -441,101)
hdfs: 8,322,596 (-5.033%, -441,102)
cp1041:
should_be: 2,183,234
udp2log: 2,073,716 (-5.016%, -109,518)
hdfs: 2,073,716 (-5.016%, -109,518)
cp1042:
should_be: 2,185,238
udp2log: 2,075,082 (-5.041%, -110,156)
hdfs: 2,075,081 (-5.041%, -110,157)
cp1043:
should_be: 2,197,387
udp2log: 2,086,935 (-5.027%, -110,452)
hdfs: 2,086,935 (-5.027%, -110,452)
cp1044:
should_be: 2,197,839
udp2log: 2,086,864 (-5.049%, -110,975)
hdfs: 2,086,864 (-5.049%, -110,975)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment