Skip to content

Instantly share code, notes, and snippets.

@chronus7
Last active August 29, 2015 14:09
Show Gist options
  • Save chronus7/f1730a9df28d9233c822 to your computer and use it in GitHub Desktop.
Save chronus7/f1730a9df28d9233c822 to your computer and use it in GitHub Desktop.
/r/dailyprogrammer #188 intermediate
# -*- coding: utf-8 -*-
# /r/dailyprogrammer #188 intermediate
# http://www.reddit.com/r/dailyprogrammer/comments/2m48nn/
# Title: [2014-11-12] Challenge #188 [Intermediate] Box Plot Generator
""" BoxPlot
A small script to output a boxplot of input data onto the
console. It assumes, the console/terminal displays at least
120 columns and 9 lines (cross-platform reasons).
-- (2014) Dave J (https://github.com/DaveAtGit)
"""
# Imports
from argparse import ArgumentParser
from math import ceil
# Statics
WIDTH = 120
# << one might want to load the width dynamically.
# Functions
def handle(name: str, vals: list):
""" handles the dataset """
to_print = list()
to_print.append(" " + '_'*(len(name)+2))
header = "__/ {} \\".format(name)
header += '_'*(WIDTH-len(header))
to_print.append(header)
# << save some space for large numbers
# << not perfect!
width = WIDTH - len(str(max(vals)))
vals.sort()
l = len(vals)
quartiles = list(map(lambda x: vals[ceil(x)],
(l/4, l/2, 3*l/4)))
iqr = quartiles[2] - quartiles[0] # 0-indexed!
variance_low = quartiles[0] - 1.5 * iqr
variance_high = quartiles[2] + 1.5 * iqr
below_variance = [v for v in vals if v < variance_low]
above_variance = [v for v in vals if v > variance_high]
whisker_low = min(set(vals) - set(below_variance))
whisker_high = max(set(vals) - set(above_variance))
print_factor = width / (vals[-1] - vals[0])
offset = vals[0]
def calc(val):
return int(val*print_factor-offset*print_factor)
def spacing(len, val):
return ' '*len + str(val)
vals = sorted(list(set(vals))) # to avoid duplicates. is it save (index out of bound)?
calced_qs = list(map(calc, quartiles))
# >> quartiles
line = spacing(calced_qs[0], quartiles[0])
line += spacing(calced_qs[1]-len(line), quartiles[1])
line += spacing(calced_qs[2]-len(line), quartiles[2])
to_print.append(line)
# >> numbers
line = ""
specific_vals = below_variance + [whisker_low, whisker_high] + above_variance
for val in specific_vals:
line += spacing(calc(val)-len(line), val)
indices = calc(quartiles[0]), calc(quartiles[2])
newline = ""
for i in range(len(line)):
c = line[i]
if indices[0] <= i <= indices[1]:
if i == calc(quartiles[1]):
c = "|"
else:
c = "_"
newline += c
line = newline
to_print.append(line)
# >> line
outliers = below_variance + above_variance
specific_vals = [whisker_low, whisker_high] + quartiles
line = ""
for val in vals:
if val in outliers:
line += spacing(calc(val)-len(line), 'x')
elif val in specific_vals:
line += spacing(calc(val)-len(line), '|')
indices_2 = calc(whisker_low), calc(whisker_high)
newline = ""
for i in range(len(line)):
c = line[i]
if indices_2[0] <= i <= indices_2[1] and c == ' ':
c = '_'
newline += c
line = newline
to_print.append(line)
# >> bottom
line = ""
for val in sorted(specific_vals):
line += spacing(calc(val)-len(line), '|')
indices_2 = calc(whisker_low), calc(whisker_high)
newline = ""
for i in range(len(line)):
c = line[i]
if indices[0] <= i <= indices[1] and c == ' ':
c = '_'
newline += c
line = newline
to_print.append(line)
# >> below
to_print.append(spacing(calced_qs[1], '|'))
return '\n'.join(to_print)
def main():
""" handles arguments """
ap = ArgumentParser()
ap.add_argument("values", nargs="*", type=int,
help="Whitespace-seperated list of values.")
ap.add_argument("-f", "--files", nargs="+", type=str,
help="Files with one line of whitespace-seperated values.")
args = ap.parse_args()
values = list()
if len(args.values) > 0:
values.append(("input", args.values))
if args.files:
for filepath in args.files:
with open(filepath) as f:
values.append((filepath, list(map(int, f.readline().strip().split()))))
for tpl in values:
print(handle(*tpl))
if __name__ == '__main__':
main()
# -*- coding: utf-8 -*-
# /r/dailyprogrammer #188 intermediate
# http://www.reddit.com/r/dailyprogrammer/comments/2m48nn/
# Title: [2014-11-12] Challenge #188 [Intermediate] Box Plot Generator
""" BoxPlot
A small script to output a boxplot of input data onto the
console. It assumes, the console/terminal displays at least
120 columns and 9 lines (cross-platform reasons).
-- (2014) Dave J (https://github.com/DaveAtGit)
"""
# Imports
from argparse import ArgumentParser
from math import ceil
# Classes
class Data:
""" container for values """
def __init__(self, values):
self.vals = sorted(values)
self.Q1 = self[ceil(len(self)/4)]
self.Q2 = self[ceil(len(self)/2)]
self.Q3 = self[ceil(3*len(self)/4)]
self.IQR = self.Q3 - self.Q1
self.__variance = 1.5 * self.IQR
self.var_low = self.Q1 - self.__variance
self.var_high = self.Q3 + self.__variance
self.whisker_low = min(v for v in self if v >= self.var_low)
self.whisker_high = max(v for v in self if v <= self.var_high)
self.outliers_low = [v for v in self if v < self.var_low]
self.outliers_high = [v for v in self if v > self.var_high]
def __iter__(self):
yield from self.vals
def __len__(self):
return len(self.vals)
def __getitem__(self, index):
return self.vals[index]
def __str__(self):
l = [self.__class__.__name__ + " {"]
m = max(map(len,vars(self)))
for k in sorted(vars(self)):
l.append("\t{:>{m}}: {}".format(k, vars(self)[k], m=m))
l.append("}")
return '\n'.join(l)
class BoxPlot:
""" building the boxplot """
SEP_H = "_"
SEP_V = "|"
MARK = "x"
LINE = "="
NEUTRAL = ' '
HEIGHT = 6
WIDTH = 120
def __init__(self, data):
self.data = data
self.matrix = None
self.factor = None
self.offset = None
self.__created = False
def create(self):
""" creates the boxplot
Q2
Q1______|___Q3
O Wl | | | Wh O O
x |=========|======|===|=========| x x
|______|___|
|
"""
if self.matrix:
return
self.matrix = [[self.NEUTRAL for _ in range(self.WIDTH)]
for _ in range(self.HEIGHT)] # thanks /u/basic_bgnr for the idea :P
self.factor = self.WIDTH / (self.data[-1] - self.data[0])
self.offset = self.data[0] * self.factor
# TODO should try to scale on HEIGHT as well
# >> plot
for (y1, y2, var) in ((1, 5, self.data.Q2),
(2, 4, self.data.Q1),
(2, 4, self.data.Q3),
(3, 3, self.data.whisker_low),
(3, 3, self.data.whisker_high)):
self.vline(y1, y2, self.pvalue(var))
for _y in (1, 4):
self.hline(self.pvalue(self.data.Q1) + 1,
self.pvalue(self.data.Q3) - 1, _y)
self.hline(self.pvalue(self.data.whisker_low),
self.pvalue(self.data.whisker_high),
3, self.LINE)
# >> numbers
def special(val, y, offset=0):
self.text(self.pvalue(val) + offset, y, val)
special(self.data.Q2, 0)
special(self.data.Q1, 1, -len(str(self.data.Q1))//2+1)
special(self.data.Q3, 1, len(str(self.data.Q1))//2)
special(self.data.whisker_low, 2)
special(self.data.whisker_high, 2)
for val in self.data.outliers_low + self.data.outliers_high:
_x = self.pvalue(val)
_x = _x - 1 if _x == self.WIDTH else 0 if _x < 0 else _x
self[3, _x] = self.MARK
special(val, 2)
def __iter__(self):
""" iterates over rows! """
yield from self.matrix
def __getitem__(self, inp):
if isinstance(inp, tuple):
return self.matrix[inp[0]][inp[1]]
return self.matrix[inp]
def __setitem__(self, inp, value):
if isinstance(inp, tuple):
self.matrix[inp[0]][inp[1]] = value
else:
raise TypeError("For item assignment, tuple-coordinates are required.")
def hline(self, x1, x2, y, char=SEP_H):
r = self[y]
for i in range(x1, x2+1):
if r[i] == self.NEUTRAL:
r[i] = char
def vline(self, y1, y2, x, char=SEP_V):
for r in range(y1, y2+1):
if self[r, x] == self.NEUTRAL:
self[r, x] = char
def text(self, x, y, text):
text = str(text)
l = len(text)
# >> center it
x = x - l//2
# >> check borders
if x + l > self.WIDTH:
x = self.WIDTH - l
elif x - l < 0:
x = 0
# >> check surroundings
block = lambda _y: self[_y][x:x+l]
test = lambda b: all(map(lambda x: x == self.NEUTRAL, b))
check = lambda _y: not test(block(_y)) or _y < 0 or _y >= self.HEIGHT
for i in (-1, 2, 1, 1, -3):
# above, below (up to 3), failsafe (overwriting existing chars)
if check(y):
y = y + i
else:
break
for i, _x in enumerate(range(x, x + l)):
self[y][_x] = text[i]
def pvalue(self, value):
return int(value*self.factor - self.offset)
def __str__(self):
self.create()
return '\n'.join(''.join(x).rstrip() for x in self.matrix)
# Functions
def handle(name, values):
""" handles the dataset """
pqueue = list()
pqueue.append(' '*3 + '_'*(len(name)+2))
_h = "__/ {} \\".format(name)
pqueue.append(_h + '_'*(BoxPlot.WIDTH-len(_h)))
data = Data(values)
plot = BoxPlot(data)
pqueue.append(str(plot))
pqueue.append('_'*BoxPlot.WIDTH)
return '\n'.join(pqueue)
def main():
""" handles arguments """
ap = ArgumentParser()
ap.add_argument("values", nargs="*", type=int,
help="Whitespace-seperated list of values.")
ap.add_argument("-f", "--files", nargs="+", type=str,
help="Files with one line of whitespace-seperated values.")
ap.add_argument("-w", "--width", type=int,
help="Sets width of output.")
args = ap.parse_args()
if args.width:
BoxPlot.WIDTH = args.width
values = list()
if len(args.values) > 0:
values.append(("input", args.values))
if args.files:
for filepath in args.files:
with open(filepath) as f:
values.append((filepath, list(map(int, f.readline().strip().split()))))
for tpl in values:
print(handle(*tpl))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment