Skip to content

Instantly share code, notes, and snippets.

@colby-schrauth
Last active July 25, 2017 10:35
Show Gist options
  • Save colby-schrauth/3eaf03f2234b8e85c62aeb531987c988 to your computer and use it in GitHub Desktop.
Save colby-schrauth/3eaf03f2234b8e85c62aeb531987c988 to your computer and use it in GitHub Desktop.
lorenz_and_gini
# Import necessary libraries
from __future__ import division
import numpy as np
import pandas as pd
# Load dataset and store dataframe in variable 'df'
# 25 randomly selected income values w/ a range of $50k – $250k
df = pd.read_csv('http://bit.ly/2eaP6ny', header = None)
df = df
# Sort, and store in variable 'raw_data'
# Use of np.sort automatically converts to numpy array
raw_data = np.sort(df)
# The perfect equality y values. Cumulative percentage of incomes
x = np.linspace(0.0, 1.0, len(raw_data) + 1)
# Create y-axis values for line of perfect equality, which is equal to x
y_pe = x
# Create an empty list to store cumulative % of income
# Start w/ an initial value of '0.0' to match list length of variable 'y_pe'
y = [0.0]
# Compute % income to total income, and append to list y
for data_point in raw_data:
y.append(data_point / (np.sum(raw_data)))
# Calculte cumulative % incomes in list y
y = np.cumsum(y)
# Calculate the area below the perfect equality line
area_perfect = np.trapz(y_pe, x)
# Compute the area using the composite trapezoidal rule
area_lorenz = np.trapz(y, x)
area_lorenz
# Compute the gini coefficient
# Divide the difference of 'area_perfect' and 'area_lorenz' by 'area_perfect'
gini_coeff = (area_perfect - area_lorenz)/area_perfect
# Print gini coefficient (Answer = .19)
print (gini_coeff)
# --------------------------------------------------
# Import visualization libraries
from bokeh.models import SingleIntervalTicker, LinearAxis, HoverTool, CrosshairTool
from bokeh.charts import Bar, output_file, show, output_notebook
from bokeh.plotting import figure, output_file, show
p = figure(x_axis_type=None, y_axis_type=None, plot_width=750, plot_height=750)
ticker = SingleIntervalTicker(interval=.1, num_minor_ticks=10)
xaxis = LinearAxis(ticker=ticker)
yaxis = LinearAxis(ticker=ticker)
p.add_layout(xaxis, 'below')# Add hover to this comma-separated string and see what changes
p.add_layout(yaxis, 'left')
p.line(x, y, line_width=5)
p.xaxis.axis_label = "Cumulative % of Income"
p.xaxis.axis_label_standoff = 25
p.yaxis.axis_label = "Cumulative % of Population"
p.yaxis.axis_label_standoff = 25
# Configure visual properties on a plot's title attribute
p.title.text="Lorenz Curve for Income Distribution, Gini Coefficient = %f" %gini_coeff
p.title.align = "center"
p.title.text_font_size='10pt'
output_notebook()
show(p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment