esoergel/line_counts.py

## line_counts.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Usage:
    $ find . | grep py$ | python line_counts.py
"""
from __future__ import unicode_literals
import subprocess
import numpy

def parse(line):
    if not line:
        return
    count, filename = line.split()
    if filename == 'total':
        return
    return (int(count), filename)

res = subprocess.check_output(['xargs', 'wc', '-l'])
counts_by_file = filter(None, map(parse, res.split('\n')))

print "Biggest 5 files:"
biggest = sorted(counts_by_file)[-5:]
for count, filename in biggest:
    print " ", count, filename

line_counts = [count for count, _ in counts_by_file]
median = int(numpy.median(line_counts))
std = int(numpy.round(numpy.std(line_counts)))

print "matching files:", len(line_counts)
print "total lines:", numpy.sum(line_counts)

print "min:", numpy.min(line_counts)
print "median:", median
print "+1σ:", median + std
print "max:", numpy.max(line_counts)

# I like weighted average best - you're more likely to be working in a file
# with longer lines, so that gives you the best idea how long files will be in your day-to-day
print "weighted avg:", int(numpy.round(numpy.average(line_counts, weights=line_counts)))
	#!/usr/bin/env python
	# -- coding: utf-8 --
	"""
	Usage:
	$ find . \| grep py$ \| python line_counts.py
	"""
	from __future__ import unicode_literals
	import subprocess
	import numpy

	def parse(line):
	if not line:
	return
	count, filename = line.split()
	if filename == 'total':
	return
	return (int(count), filename)

	res = subprocess.check_output(['xargs', 'wc', '-l'])
	counts_by_file = filter(None, map(parse, res.split('\n')))

	print "Biggest 5 files:"
	biggest = sorted(counts_by_file)[-5:]
	for count, filename in biggest:
	print " ", count, filename

	line_counts = [count for count, _ in counts_by_file]
	median = int(numpy.median(line_counts))
	std = int(numpy.round(numpy.std(line_counts)))

	print "matching files:", len(line_counts)
	print "total lines:", numpy.sum(line_counts)

	print "min:", numpy.min(line_counts)
	print "median:", median
	print "+1σ:", median + std
	print "max:", numpy.max(line_counts)

	# I like weighted average best - you're more likely to be working in a file
	# with longer lines, so that gives you the best idea how long files will be in your day-to-day
	print "weighted avg:", int(numpy.round(numpy.average(line_counts, weights=line_counts)))