Skip to content

Instantly share code, notes, and snippets.

@dsc
dsc / count_seqs.sh
Last active December 11, 2015 16:38 — forked from ottomata/count_seqs.sh
# udp2log:
# separate hosts
cd /a/otto/mobile-sleuth.2/udp2log.orig
for host in cp104{1,2,3,4}; do
grep -P "^$host" mobile.2013-01-13_11.45.00-13.15.00.log | awk '{print $2 " " $3}' | sort -n > ../$host.udp2log.seq
done
# hdfs:
# separate hosts
#!/bin/bash
for host in cp104{1,2,3,4}; do
first_seq=$(head -n 1 $host.udp2log.seq | awk '{print $1}') # && echo $first_seq
last_seq=$(tail -n 1 $host.hdfs.seq | awk '{print $1}') # && echo $last_seq
should_be=$(echo "$last_seq - $first_seq + 1" | bc) #&& echo $should_be
# count lines in blog.hdfs.seq file between $first_seq and $last_seq
hdfs_count=$(sed -n -e "/^$first_seq/,/^$last_seq/p" $host.udp2log.seq | wc -l) #&& echo $hdfs_count
@dsc
dsc / combi.py
Created April 27, 2012 17:56
testcomb.py
# when you asked the question, this is what popped in my head;
# a solution to abstracting the requested problem into indexes
def combi(n, k):
r = []
for i in range(n - k + 1):
for j in range(i + 1, n - k + 2):
s = [i]
for m in range(k - 1):
s.append(j + m)