Skip to content

Instantly share code, notes, and snippets.

Created September 29, 2012 19:48
Show Gist options
  • Save anonymous/d0d956985c4be5251dff to your computer and use it in GitHub Desktop.
Save anonymous/d0d956985c4be5251dff to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# NONAME v0.0.0 (dd.mm.yy) by Bystroushaak (bystrousak@kitakitsune.org)
# This work is licensed under a Creative Commons 3.0
# Unported License (http://creativecommons.org/licenses/by/3.0/cz/).
# Created in Sublime text editor.
#= Imports =====================================================================
import sys
import codecs
import argparse
#= Variables ===================================================================
in_stream = codecs.getreader("utf-8")(sys.stdin)
out_stream = codecs.getwriter("utf-8")(sys.stdout)
#= Main program ================================================================
parser = argparse.ArgumentParser(description = "Purpose of this program is analyze how many times lines repeats at stdin.")
parser.add_argument("-d", "--delimiter", action="store", type=str, default="\t", help = "Set delimiter. Default \t.")
parser.add_argument("-v", "--verbose", action="store_false", default=False, help = "Show progress.")
args = parser.parse_args()
# read data
if args.verbose:
sys.stderr.write("Reading:\n")
data = in_stream.readlines()
if args.verbose:
sys.stderr.write("done\n")
# analyze data
cnt = 0
out = {}
dlen = str(len(data))
for i in data:
i = i.lower().rstrip()
if i in out:
out[i] += 1
else:
out[i] = 1
# progressbar
if args.verbose and cnt % 1000 == 0:
sys.stderr.write("Processing " + str(cnt) + " / " + dlen + "\n")
cnt += 1
# zapis vystup
cnt = 0
dsum = 0
dmed = []
dlen = str(len(out))
for key in out:
out_stream.write(key + args.delimiter + str(out[key]) + "\n")
dsum += out[key]
dmed.append(out[key])
# progressbar
if cnt % 1000 == 0:
out_stream.flush()
if args.verbose:
sys.stderr.write("Saving " + str(cnt) + " / " + dlen + "\n")
cnt += 1
out_stream.flush()
# # print stats
# dmed.sort()
# sys.stderr.write(
# "Stats:\n" +
# "\tMin: " + str(dmed[0]) + "\n" +
# "\tMax: " + str(dmed[-1]) + "\n" +
# "\tAverage: " + str(dsum / cnt) + "\n" +
# "\tMedian: " + str(dmed[len(dmed) / 2]) + "\n"
# )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment