Skip to content

Instantly share code, notes, and snippets.

@dmcdougall
Created September 17, 2012 11:02
Show Gist options
  • Save dmcdougall/3736707 to your computer and use it in GitHub Desktop.
Save dmcdougall/3736707 to your computer and use it in GitHub Desktop.
New boxplot method
def boxplot(self, x, notch=0, sym='+', vert=1, whis=1.5,
positions=None, widths=None, means=0, fill=0,
linestyle='-', monochrome=0, limits=None,
notchsize=None):
"""
boxplot(x, notch=0, sym='+', vert=1, whis=1.5,
positions=None, widths=None, means=0, fill=0,
linestyle='-', monochrome=0, limits=None,
notchsize=None)
Make a box and whisker plot for each column of x or
each vector in sequence x.
The box extends from the lower to upper quartile values
of the data, with a line at the median. The whiskers
extend from the box to show the range of the data. Flier
points are those past the end of the whiskers.
notch = 0 (default) produces a rectangular box plot.
notch = 1 will produce a notched box plot.
notch = 2 will additionally keep the notch size constant,
replacing the box altogether with a notch if the box
becomes smaller than the box limits.
sym (default '+') is the default symbol for flier points.
Enter an empty string ('') if you don't want to show fliers.
vert = 1 (default) makes the boxes vertical.
vert = 0 makes horizontal boxes. This seems goofy, but
that's how Matlab did it.
whis (default 1.5) defines the length of the whiskers as
a function of the inner quartile range. They extend to the
most extreme data point within ( whis*(75%-25%) ) data range.
positions (default 1,2,...,n) sets the horizontal positions of
the boxes. The ticks and limits are automatically set to match
the positions.
widths is either a scalar or a vector and sets the width of
each box. The default is 0.5, or 0.15*(distance between extreme
positions) if that is smaller.
means = 0 (default) does not indicate the mean of the data.
means = 1 plots a dashed black line in the box indicating
the mean of the data.
fill = 1 fills the box in white
fill = 0 (default) leaves the box open
linestyle sets the line style of the whiskers.
monochrome = 0 (default) uses color in the plot.
monochrome = 1 uses a monochrome color scheme.
limits sets the axis limits for the plot (default = None for
automatic setting)
notchsize = None (default) -- unused unless notch == 2
notchsize fixes a notch to be a constant size when notch == 2
x is an array or a sequence of vectors.
Returns a dict of the lines added, keyed by 'boxes', 'caps',
'whiskers', 'medians', 'fliers', and 'means'.
"""
if not self._hold: self.cla()
holdStatus = self._hold
whiskers, caps, boxes, medians, fliers, means = [], [], [], [], [], []
# convert x to a list of vectors
if hasattr(x, 'shape'):
if len(x.shape) == 1:
if hasattr(x[0], 'shape'):
x = list(x)
else:
x = [x,]
elif len(x.shape) == 2:
nr, nc = x.shape
if nr == 1:
x = [x]
elif nc == 1:
x = [ravel(x)]
else:
x = [x[:,i] for i in range(nc)]
else:
raise ValueError, "input x can have no more than 2 dimensions"
if not hasattr(x[0], '__len__'):
x = [x]
col = len(x)
# get some plot info
if positions is None:
positions = range(1, col + 1)
if widths is None:
distance = max(positions) - min(positions)
widths = min(0.15*max(distance,1.0), 0.5)
if isinstance(widths, float) or isinstance(widths, int):
widths = ones((col,), 'd') * widths
# loop through columns, adding each to plot
self.hold(True)
for i,pos in enumerate(positions):
d = ravel(x[i])
row = len(d)
# get mean
mean = sum(d)/len(d)
# get median and quartiles
q1, med, q3 = prctile(d,[25,50,75])
# get high extreme
iq = q3 - q1
hi_val = q3 + whis*iq
wisk_hi = compress( d <= hi_val , d )
if len(wisk_hi) == 0:
wisk_hi = q3
else:
wisk_hi = max(wisk_hi)
# get low extreme
lo_val = q1 - whis*iq
wisk_lo = compress( d >= lo_val, d )
if len(wisk_lo) == 0:
wisk_lo = q1
else:
wisk_lo = min(wisk_lo)
# get fliers - if we are showing them
flier_hi = []
flier_lo = []
flier_hi_x = []
flier_lo_x = []
if len(sym) != 0:
flier_hi = compress( d > wisk_hi, d )
flier_lo = compress( d < wisk_lo, d )
flier_hi_x = ones(flier_hi.shape[0]) * pos
flier_lo_x = ones(flier_lo.shape[0]) * pos
# get x locations for fliers, whisker, whisker cap and box sides
box_x_min = pos - widths[i] * 0.5
box_x_max = pos + widths[i] * 0.5
wisk_x = ones(2) * pos
cap_x_min = pos - widths[i] * 0.25
cap_x_max = pos + widths[i] * 0.25
cap_x = [cap_x_min, cap_x_max]
# get y location for median, mean
med_y = [med, med]
mean_y = [mean, mean]
# calculate 'regular' plot
no_box = False
if notch == 0:
# make our box vectors
box_x = [box_x_min, box_x_max, box_x_max, box_x_min, box_x_min]
box_y = [q1, q1, q3, q3, q1 ]
# make our median, mean line vectors
med_x = [box_x_min, box_x_max]
mean_x = [box_x_min, box_x_max]
# calculate 'notch' plot
else:
if notch == 1:
notch_max = med + 1.57*iq/sqrt(row)
notch_min = med - 1.57*iq/sqrt(row)
if notch_max > q3:
notch_max = q3
if notch_min < q1:
notch_min = q1
else:
if notchsize is None:
raise ValueError("Must supply notchsize when notch==2")
notch_max = med + notchsize
notch_min = med - notchsize
# force 'no box' if notch size outside of box limits
if notch_max > q3 or notch_min < q1:
no_box = True
# make our notched box vectors
if no_box:
box_x = [box_x_max, cap_x_max, box_x_max]
notch2_x = [box_x_min, cap_x_min, box_x_min]
box_y = [notch_min, med, notch_max]
notch2_y = [notch_max, med, notch_min]
else:
box_x = [box_x_min, box_x_max, box_x_max, cap_x_max, box_x_max, box_x_max, box_x_min, box_x_min, cap_x_min, box_x_min, box_x_min]
box_y = [q1, q1, notch_min, med, notch_max, q3, q3, notch_max, med, notch_min, q1]
# make our median, mean line vectors
med_x = [cap_x_min, cap_x_max]
mean_x = [box_x_min, box_x_max] # doesn't take into account notch shape
if monochrome:
wiskcol = capcol = boxcol = medcol = symcol = 'k'
else:
wiskcol, capcol, boxcol, medcol, symcol = 'b', 'k', 'b', 'r', 'b'
if fill and not no_box:
self.fill(box_x, box_y, facecolor='w')
# vertical or horizontal plot?
if vert:
def doplot(*args):
return self.plot(*args)
else:
def doplot(*args):
shuffled = []
for i in range(0, len(args), 3):
shuffled.extend([args[i+1], args[i], args[i+2]])
return self.plot(*shuffled)
whiskers.extend(doplot(wisk_x, [q1, wisk_lo], wiskcol+linestyle,
wisk_x, [q3, wisk_hi], wiskcol+linestyle))
caps.extend(doplot(cap_x, [wisk_hi, wisk_hi], capcol+'-',
cap_x, [wisk_lo, wisk_lo], capcol+'-'))
boxes.extend(doplot(box_x, box_y, boxcol+'-'))
medians.extend(doplot(med_x, med_y, medcol+'-'))
fliers.extend(doplot(flier_hi_x, flier_hi, symcol+sym,
flier_lo_x, flier_lo, symcol+sym))
if means:
if notch > 0:
nstyle = 'k-'
else:
nstyle = 'k:'
means.extend(doplot(mean_x, mean_y, nstyle))
if notch == 2 and no_box:
boxes.extend(doplot(notch2_x, notch2_y, boxcol+'-'))
# fix our axes/ticks up a little
if 1 == vert:
setticks, setlim = self.set_xticks, self.set_xlim
else:
setticks, setlim = self.set_yticks, self.set_ylim
# use explicit axis limits if provided
if limits is None:
newlimits = min(positions)-0.5, max(positions)+0.5
setlim(newlimits)
setticks(positions)
elif limits != ():
setlim(limits)
setticks([])
# reset hold status
self.hold(holdStatus)
return dict(whiskers=whiskers, caps=caps, boxes=boxes,
medians=medians, fliers=fliers, means=means)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment