-
-
Save dsc/4628582 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# udp2log: | |
# separate hosts | |
cd /a/otto/mobile-sleuth.2/udp2log.orig | |
for host in cp104{1,2,3,4}; do | |
grep -P "^$host" mobile.2013-01-13_11.45.00-13.15.00.log | awk '{print $2 " " $3}' | sort -n > ../$host.udp2log.seq | |
done | |
# hdfs: | |
# separate hosts | |
cd /a/otto/mobile-sleuth.2/hdfs.orig | |
for host in cp104{1,2,3,4}; do | |
grep -P "^\d+\s+$host" mobile.hdfs.2013-01-23_12.00.00-13.15.00.log | awk '{print $3 " " $4}' | sort -n > ../$host.hdfs.seq | |
done | |
cd /a/otto/mobile-sleuth.2/ | |
format_packetloss_stats.py <( | |
for host in cp104{1,2,3,4}; do | |
first_seq=$(head -n 1 $host.udp2log.seq | awk '{print $1}') # && echo $first_seq | |
last_seq=$(tail -n 1 $host.udp2log.seq | awk '{print $1}') # && echo $last_seq | |
should_be=$(echo "$last_seq - $first_seq + 1" | bc) #&& echo $should_be | |
# count lines between $first_seq and $last_seq | |
hdfs_count=$(sed -n -e "/^$first_seq/,/^$last_seq/p" $host.hdfs.seq | wc -l) #&& echo $hdfs_count | |
udp2log_count=$(sed -n -e "/^$first_seq/,/^$last_seq/p" $host.udp2log.seq | wc -l) #&& echo $udp2log_count | |
echo "$host:" | |
echo " should_be: $should_be"; echo " udp2log_count: $udp2log_count"; echo " hdfs_count: $hdfs_count"; | |
done | |
) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import yaml | |
from textwrap import dedent | |
def calcDiff(stats): | |
n = stats['should_be'] | |
stats['udp2log_percent'] = (stats['udp2log_count'] / float(n)) - 1.0 | |
stats['hdfs_percent'] = (stats['hdfs_count'] / float(n)) - 1.0 | |
stats['udp2log_diff'] = stats['udp2log_count'] - n | |
stats['hdfs_diff'] = stats['hdfs_count'] - n | |
return stats | |
def calcDiffTotal(data): | |
totals = { | |
"section" : "totals", | |
"should_be" : 0, | |
"udp2log_count" : 0, | |
"hdfs_count" : 0, | |
} | |
for host, stats in data.iteritems(): | |
stats['section'] = host | |
calcDiff(stats) | |
for k in 'should_be hdfs_count udp2log_count'.split(): | |
totals[k] += stats[k] | |
return calcDiff(totals), data | |
def formatStats(stats): | |
return dedent(""" | |
{section}: | |
should_be: {should_be: >10,} | |
udp2log: {udp2log_count: >10,} ({udp2log_percent: >+5.3%}, {udp2log_diff: >+6,}) | |
hdfs: {hdfs_count: >10,} ({hdfs_percent: >+5.3%}, {hdfs_diff: >+6,}) | |
"""[1:]).format(**stats) | |
def printStats(data, outfile): | |
totals, data = calcDiffTotal(data) | |
outfile.write(formatStats(totals)+'\n') | |
for host, stats in sorted(data.items()): | |
outfile.write(formatStats(stats)[:-1]+'\n') | |
outfile.write('\n') | |
testData = yaml.load(dedent(''' | |
cp1041: | |
should_be: 2183234 | |
udp2log_count: 2073716 | |
hdfs_count: 2073716 | |
cp1042: | |
should_be: 2185238 | |
udp2log_count: 2075082 | |
hdfs_count: 2075081 | |
cp1043: | |
should_be: 2197387 | |
udp2log_count: 2086935 | |
hdfs_count: 2086935 | |
cp1044: | |
should_be: 2197839 | |
udp2log_count: 2086864 | |
hdfs_count: 2086864 | |
''')) | |
if __name__ == '__main__': | |
import sys, argparse | |
parser = argparse.ArgumentParser(description="Formats packetloss stats.") | |
parser.add_argument("-t", "--test", action="store_true", default=False, | |
help="Use test data [default: %(default)s]") | |
parser.add_argument('infile', nargs='?', type=argparse.FileType('rU'), default=sys.stdin, | |
help="Input data. Must be valid YAML.") | |
parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout) | |
args = parser.parse_args() | |
if args.test: | |
data = testData | |
else: | |
data = yaml.load(args.infile) | |
printStats(data, args.outfile) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
awk 'NR-1{if($1!=(_+1))print $2}{_=$1}' cp1044.udp2log.seq | uniq -c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
totals: | |
should_be: 8,763,698 | |
udp2log: 8,322,597 (-5.033%, -441,101) | |
hdfs: 8,322,596 (-5.033%, -441,102) | |
cp1041: | |
should_be: 2,183,234 | |
udp2log: 2,073,716 (-5.016%, -109,518) | |
hdfs: 2,073,716 (-5.016%, -109,518) | |
cp1042: | |
should_be: 2,185,238 | |
udp2log: 2,075,082 (-5.041%, -110,156) | |
hdfs: 2,075,081 (-5.041%, -110,157) | |
cp1043: | |
should_be: 2,197,387 | |
udp2log: 2,086,935 (-5.027%, -110,452) | |
hdfs: 2,086,935 (-5.027%, -110,452) | |
cp1044: | |
should_be: 2,197,839 | |
udp2log: 2,086,864 (-5.049%, -110,975) | |
hdfs: 2,086,864 (-5.049%, -110,975) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment