Last active
August 29, 2015 14:09
-
-
Save chronus7/f1730a9df28d9233c822 to your computer and use it in GitHub Desktop.
/r/dailyprogrammer #188 intermediate
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# /r/dailyprogrammer #188 intermediate | |
# http://www.reddit.com/r/dailyprogrammer/comments/2m48nn/ | |
# Title: [2014-11-12] Challenge #188 [Intermediate] Box Plot Generator | |
""" BoxPlot | |
A small script to output a boxplot of input data onto the | |
console. It assumes, the console/terminal displays at least | |
120 columns and 9 lines (cross-platform reasons). | |
-- (2014) Dave J (https://github.com/DaveAtGit) | |
""" | |
# Imports | |
from argparse import ArgumentParser | |
from math import ceil | |
# Statics | |
WIDTH = 120 | |
# << one might want to load the width dynamically. | |
# Functions | |
def handle(name: str, vals: list): | |
""" handles the dataset """ | |
to_print = list() | |
to_print.append(" " + '_'*(len(name)+2)) | |
header = "__/ {} \\".format(name) | |
header += '_'*(WIDTH-len(header)) | |
to_print.append(header) | |
# << save some space for large numbers | |
# << not perfect! | |
width = WIDTH - len(str(max(vals))) | |
vals.sort() | |
l = len(vals) | |
quartiles = list(map(lambda x: vals[ceil(x)], | |
(l/4, l/2, 3*l/4))) | |
iqr = quartiles[2] - quartiles[0] # 0-indexed! | |
variance_low = quartiles[0] - 1.5 * iqr | |
variance_high = quartiles[2] + 1.5 * iqr | |
below_variance = [v for v in vals if v < variance_low] | |
above_variance = [v for v in vals if v > variance_high] | |
whisker_low = min(set(vals) - set(below_variance)) | |
whisker_high = max(set(vals) - set(above_variance)) | |
print_factor = width / (vals[-1] - vals[0]) | |
offset = vals[0] | |
def calc(val): | |
return int(val*print_factor-offset*print_factor) | |
def spacing(len, val): | |
return ' '*len + str(val) | |
vals = sorted(list(set(vals))) # to avoid duplicates. is it save (index out of bound)? | |
calced_qs = list(map(calc, quartiles)) | |
# >> quartiles | |
line = spacing(calced_qs[0], quartiles[0]) | |
line += spacing(calced_qs[1]-len(line), quartiles[1]) | |
line += spacing(calced_qs[2]-len(line), quartiles[2]) | |
to_print.append(line) | |
# >> numbers | |
line = "" | |
specific_vals = below_variance + [whisker_low, whisker_high] + above_variance | |
for val in specific_vals: | |
line += spacing(calc(val)-len(line), val) | |
indices = calc(quartiles[0]), calc(quartiles[2]) | |
newline = "" | |
for i in range(len(line)): | |
c = line[i] | |
if indices[0] <= i <= indices[1]: | |
if i == calc(quartiles[1]): | |
c = "|" | |
else: | |
c = "_" | |
newline += c | |
line = newline | |
to_print.append(line) | |
# >> line | |
outliers = below_variance + above_variance | |
specific_vals = [whisker_low, whisker_high] + quartiles | |
line = "" | |
for val in vals: | |
if val in outliers: | |
line += spacing(calc(val)-len(line), 'x') | |
elif val in specific_vals: | |
line += spacing(calc(val)-len(line), '|') | |
indices_2 = calc(whisker_low), calc(whisker_high) | |
newline = "" | |
for i in range(len(line)): | |
c = line[i] | |
if indices_2[0] <= i <= indices_2[1] and c == ' ': | |
c = '_' | |
newline += c | |
line = newline | |
to_print.append(line) | |
# >> bottom | |
line = "" | |
for val in sorted(specific_vals): | |
line += spacing(calc(val)-len(line), '|') | |
indices_2 = calc(whisker_low), calc(whisker_high) | |
newline = "" | |
for i in range(len(line)): | |
c = line[i] | |
if indices[0] <= i <= indices[1] and c == ' ': | |
c = '_' | |
newline += c | |
line = newline | |
to_print.append(line) | |
# >> below | |
to_print.append(spacing(calced_qs[1], '|')) | |
return '\n'.join(to_print) | |
def main(): | |
""" handles arguments """ | |
ap = ArgumentParser() | |
ap.add_argument("values", nargs="*", type=int, | |
help="Whitespace-seperated list of values.") | |
ap.add_argument("-f", "--files", nargs="+", type=str, | |
help="Files with one line of whitespace-seperated values.") | |
args = ap.parse_args() | |
values = list() | |
if len(args.values) > 0: | |
values.append(("input", args.values)) | |
if args.files: | |
for filepath in args.files: | |
with open(filepath) as f: | |
values.append((filepath, list(map(int, f.readline().strip().split())))) | |
for tpl in values: | |
print(handle(*tpl)) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# /r/dailyprogrammer #188 intermediate | |
# http://www.reddit.com/r/dailyprogrammer/comments/2m48nn/ | |
# Title: [2014-11-12] Challenge #188 [Intermediate] Box Plot Generator | |
""" BoxPlot | |
A small script to output a boxplot of input data onto the | |
console. It assumes, the console/terminal displays at least | |
120 columns and 9 lines (cross-platform reasons). | |
-- (2014) Dave J (https://github.com/DaveAtGit) | |
""" | |
# Imports | |
from argparse import ArgumentParser | |
from math import ceil | |
# Classes | |
class Data: | |
""" container for values """ | |
def __init__(self, values): | |
self.vals = sorted(values) | |
self.Q1 = self[ceil(len(self)/4)] | |
self.Q2 = self[ceil(len(self)/2)] | |
self.Q3 = self[ceil(3*len(self)/4)] | |
self.IQR = self.Q3 - self.Q1 | |
self.__variance = 1.5 * self.IQR | |
self.var_low = self.Q1 - self.__variance | |
self.var_high = self.Q3 + self.__variance | |
self.whisker_low = min(v for v in self if v >= self.var_low) | |
self.whisker_high = max(v for v in self if v <= self.var_high) | |
self.outliers_low = [v for v in self if v < self.var_low] | |
self.outliers_high = [v for v in self if v > self.var_high] | |
def __iter__(self): | |
yield from self.vals | |
def __len__(self): | |
return len(self.vals) | |
def __getitem__(self, index): | |
return self.vals[index] | |
def __str__(self): | |
l = [self.__class__.__name__ + " {"] | |
m = max(map(len,vars(self))) | |
for k in sorted(vars(self)): | |
l.append("\t{:>{m}}: {}".format(k, vars(self)[k], m=m)) | |
l.append("}") | |
return '\n'.join(l) | |
class BoxPlot: | |
""" building the boxplot """ | |
SEP_H = "_" | |
SEP_V = "|" | |
MARK = "x" | |
LINE = "=" | |
NEUTRAL = ' ' | |
HEIGHT = 6 | |
WIDTH = 120 | |
def __init__(self, data): | |
self.data = data | |
self.matrix = None | |
self.factor = None | |
self.offset = None | |
self.__created = False | |
def create(self): | |
""" creates the boxplot | |
Q2 | |
Q1______|___Q3 | |
O Wl | | | Wh O O | |
x |=========|======|===|=========| x x | |
|______|___| | |
| | |
""" | |
if self.matrix: | |
return | |
self.matrix = [[self.NEUTRAL for _ in range(self.WIDTH)] | |
for _ in range(self.HEIGHT)] # thanks /u/basic_bgnr for the idea :P | |
self.factor = self.WIDTH / (self.data[-1] - self.data[0]) | |
self.offset = self.data[0] * self.factor | |
# TODO should try to scale on HEIGHT as well | |
# >> plot | |
for (y1, y2, var) in ((1, 5, self.data.Q2), | |
(2, 4, self.data.Q1), | |
(2, 4, self.data.Q3), | |
(3, 3, self.data.whisker_low), | |
(3, 3, self.data.whisker_high)): | |
self.vline(y1, y2, self.pvalue(var)) | |
for _y in (1, 4): | |
self.hline(self.pvalue(self.data.Q1) + 1, | |
self.pvalue(self.data.Q3) - 1, _y) | |
self.hline(self.pvalue(self.data.whisker_low), | |
self.pvalue(self.data.whisker_high), | |
3, self.LINE) | |
# >> numbers | |
def special(val, y, offset=0): | |
self.text(self.pvalue(val) + offset, y, val) | |
special(self.data.Q2, 0) | |
special(self.data.Q1, 1, -len(str(self.data.Q1))//2+1) | |
special(self.data.Q3, 1, len(str(self.data.Q1))//2) | |
special(self.data.whisker_low, 2) | |
special(self.data.whisker_high, 2) | |
for val in self.data.outliers_low + self.data.outliers_high: | |
_x = self.pvalue(val) | |
_x = _x - 1 if _x == self.WIDTH else 0 if _x < 0 else _x | |
self[3, _x] = self.MARK | |
special(val, 2) | |
def __iter__(self): | |
""" iterates over rows! """ | |
yield from self.matrix | |
def __getitem__(self, inp): | |
if isinstance(inp, tuple): | |
return self.matrix[inp[0]][inp[1]] | |
return self.matrix[inp] | |
def __setitem__(self, inp, value): | |
if isinstance(inp, tuple): | |
self.matrix[inp[0]][inp[1]] = value | |
else: | |
raise TypeError("For item assignment, tuple-coordinates are required.") | |
def hline(self, x1, x2, y, char=SEP_H): | |
r = self[y] | |
for i in range(x1, x2+1): | |
if r[i] == self.NEUTRAL: | |
r[i] = char | |
def vline(self, y1, y2, x, char=SEP_V): | |
for r in range(y1, y2+1): | |
if self[r, x] == self.NEUTRAL: | |
self[r, x] = char | |
def text(self, x, y, text): | |
text = str(text) | |
l = len(text) | |
# >> center it | |
x = x - l//2 | |
# >> check borders | |
if x + l > self.WIDTH: | |
x = self.WIDTH - l | |
elif x - l < 0: | |
x = 0 | |
# >> check surroundings | |
block = lambda _y: self[_y][x:x+l] | |
test = lambda b: all(map(lambda x: x == self.NEUTRAL, b)) | |
check = lambda _y: not test(block(_y)) or _y < 0 or _y >= self.HEIGHT | |
for i in (-1, 2, 1, 1, -3): | |
# above, below (up to 3), failsafe (overwriting existing chars) | |
if check(y): | |
y = y + i | |
else: | |
break | |
for i, _x in enumerate(range(x, x + l)): | |
self[y][_x] = text[i] | |
def pvalue(self, value): | |
return int(value*self.factor - self.offset) | |
def __str__(self): | |
self.create() | |
return '\n'.join(''.join(x).rstrip() for x in self.matrix) | |
# Functions | |
def handle(name, values): | |
""" handles the dataset """ | |
pqueue = list() | |
pqueue.append(' '*3 + '_'*(len(name)+2)) | |
_h = "__/ {} \\".format(name) | |
pqueue.append(_h + '_'*(BoxPlot.WIDTH-len(_h))) | |
data = Data(values) | |
plot = BoxPlot(data) | |
pqueue.append(str(plot)) | |
pqueue.append('_'*BoxPlot.WIDTH) | |
return '\n'.join(pqueue) | |
def main(): | |
""" handles arguments """ | |
ap = ArgumentParser() | |
ap.add_argument("values", nargs="*", type=int, | |
help="Whitespace-seperated list of values.") | |
ap.add_argument("-f", "--files", nargs="+", type=str, | |
help="Files with one line of whitespace-seperated values.") | |
ap.add_argument("-w", "--width", type=int, | |
help="Sets width of output.") | |
args = ap.parse_args() | |
if args.width: | |
BoxPlot.WIDTH = args.width | |
values = list() | |
if len(args.values) > 0: | |
values.append(("input", args.values)) | |
if args.files: | |
for filepath in args.files: | |
with open(filepath) as f: | |
values.append((filepath, list(map(int, f.readline().strip().split())))) | |
for tpl in values: | |
print(handle(*tpl)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment