Skip to content

Instantly share code, notes, and snippets.

@dimi-tree
Created February 14, 2016 19:32
Show Gist options
  • Save dimi-tree/70fdb789c9391f3f58dc to your computer and use it in GitHub Desktop.
Save dimi-tree/70fdb789c9391f3f58dc to your computer and use it in GitHub Desktop.
Data Science from Scratch
"""Visualizing Data"""
import matplotlib.pyplot as plt
from collections import Counter
## Bar charts
def make_chart_simple_bar_chart(plt):
movies = ["Annie Hall", "Ben-Hur", "Casablanca", "Gandhi", "West Side Story"]
num_oscars = [5, 11, 3, 8, 10]
# bars are by default width 0.8, so we'll add 0.1 to the left coordinates
# so that each bar is centered
xs = [i + 0.1 for i, _ in enumerate(movies)]
# plot bars with left x-coordinates [xs], heights [num_oscars]
plt.bar(xs, num_oscars)
plt.ylabel("# of Academy Awards")
plt.title("My Favorite Movies")
# label x-axis with movie names at bar centers
plt.xticks([i + 0.5 for i, _ in enumerate(movies)], movies)
plt.show()
def make_chart_histogram(plt):
grades = [83,95,91,87,70,0,85,82,100,67,73,77,0]
decile = lambda grade: grade // 10 * 10
histogram = Counter(decile(grade) for grade in grades)
plt.bar([x - 4 for x in histogram.keys()], # shift each bar to the left by 4
histogram.values(), # give each bar its correct height
8) # give each bar a width of 8
plt.axis([-5, 105, 0, 5]) # x-axis from -5 to 105,
# y-axis from 0 to 5
plt.xticks([10 * i for i in range(11)]) # x-axis labels at 0, 10, ..., 100
plt.xlabel("Decile")
plt.ylabel("# of Students")
plt.title("Distribution of Exam 1 Grades")
plt.show()
# !! Note: be judicious when using plt.axis(). When creating bar charts it's a
# good practice to start the y-axis at 0, otherwise the plot can be misleading.
## Scatterplots
def make_chart_scatterplot(plt):
"""When scattering comparable variables, you might get a misleading picture
if you let matplotlib choose the scale ( plt.axis() )."""
test_1_grades = [ 99, 90, 85, 97, 80]
test_2_grades = [100, 85, 60, 90, 70]
plt.scatter(test_1_grades, test_2_grades)
plt.xlabel("test 1 grade")
plt.ylabel("test 2 grade")
plt.title("Axes Are Comparable")
plt.axis("equal") # scattering comparable variables
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment