Last active
March 28, 2016 04:26
-
-
Save MechCoder/9085295de7da15c5a188 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# Predicting the Behavior of the Supreme Court of the United States: A General Approach | |
# ================== | |
# * __Title__: Predicting the Behavior of the Supreme Court of the United States: A General Approach | |
# * __Authors__: [Daniel Martin Katz](http://www.law.msu.edu/faculty_staff/profile.php?prof=780), [Michael J Bommarito II](http://bommaritollc.com/), [Josh Blackman](http://joshblackman.com) | |
# * __Paper URL__: [http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2463244](http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2463244) | |
# * __Blog URL__: [http://lexpredict.com/portfolio/predicting-the-supreme-court/](http://lexpredict.com/portfolio/predicting-the-supreme-court/) | |
# | |
# ## Paper Abstract | |
# Building upon developments in theoretical and applied machine learning, as well as the efforts of various scholars including Guimera and Sales-Pardo (2011), Ruger et al. (2004), and Martin et al. (2004), we construct a model designed to predict the voting behavior of the Supreme Court of the United States. Using the extremely randomized tree method first proposed in Geurts, et al. (2006), a method similar to the random forest approach developed in Breiman (2001), as well as novel feature engineering, we predict more than sixty years of decisions by the Supreme Court of the United States (1953-2013). Using only data available prior to the date of decision, our model correctly identifies 69.7% of the Court’s overall affirm/reverse decisions and correctly forecasts 70.9% of the votes of individual justices across 7,700 cases and more than 68,000 justice votes. Our performance is consistent with the general level of prediction offered by prior scholars. However, our model is distinctive as it is the first robust, generalized,and fully predictive model of Supreme Court voting behavior offered to date. Our model predicts six decades of behavior of thirty Justices appointed by thirteen Presidents. With a more sound methodological foundation, our results represent a major advance for the science of quantitative legal prediction and portend a range of other potential applications, such as those described in Katz (2013). | |
# | |
# ## Source Description | |
# The source and data in this repository allow for the reproduction of the results in this paper. | |
# | |
# ## Data Description | |
# The data used in this paper is available from the [Supreme Court Database (SCDB)](http://scdb.wustl.edu/). | |
# | |
# ## Version | |
# The latest version of this model was relesed in October 2015. | |
# In[1]: | |
# get_ipython().magic('matplotlib inline') | |
# Imports | |
import matplotlib.pyplot as plt | |
import statsmodels.stats.proportion | |
# seaborn | |
import seaborn | |
seaborn.set() | |
seaborn.set_style("darkgrid") | |
# Project imports | |
from model import * | |
# In[2]: | |
# Get raw data | |
raw_data = get_raw_scdb_data("data/input/SCDB_2015_01_justiceCentered_Citation.csv") | |
# Get feature data | |
feature_df = preprocess_raw_data(raw_data, include_direction=True) | |
# In[3]: | |
# Output some diagnostics on features | |
print(raw_data.shape) | |
print(feature_df.shape) | |
assert(raw_data.shape[0] == feature_df.shape[0]) | |
# In[4]: | |
# Output basic quantities for sample | |
ind = raw_data["justice_outcome_disposition"] == -1 | |
raw_data = raw_data[~ind] | |
print(pandas.DataFrame(raw_data["justice_outcome_disposition"].value_counts())) | |
print(pandas.DataFrame(raw_data["justice_outcome_disposition"].value_counts(normalize=True))) | |
# In[5]: | |
# Setup training time period | |
min_training_years = 5 | |
term_range = range(raw_data["term"].min() + min_training_years, | |
raw_data["term"].max()+1) | |
# Setting growing random forest parameters | |
# Number of trees to grow per term | |
trees_per_term = 20 | |
# Number of trees to begin with | |
initial_trees = min_training_years * trees_per_term | |
# Number of years between "forest fires" | |
reset_interval = 9999 | |
# Setup model | |
m = None | |
term_count = 0 | |
for term in term_range: | |
# Diagnostic output | |
print("Term: {0}".format(term)) | |
term_count += 1 | |
# Setup train and test periods | |
train_index = (raw_data.loc[:, "term"] < term).values | |
test_index = (raw_data.loc[:, "term"] == term).values | |
# Setup train data | |
feature_data_train = feature_df.loc[train_index, :] | |
target_data_train = raw_data.loc[train_index, "justice_outcome_disposition"].astype(int).values | |
# Setup test data | |
feature_data_test = feature_df.loc[test_index, :] | |
target_data_test = raw_data.loc[test_index, "justice_outcome_disposition"].astype(int).values | |
# Check if we should rebuild the model based on changing natural court | |
if term_count % reset_interval == 0: | |
# "Forest fire;" grow a new forest from scratch | |
print("Reset interval hit; rebuilding with {0} trees".format(initial_trees + (term_count * trees_per_term))) | |
m = None | |
else: | |
# Check if the justice set has changed | |
if set(raw_data.loc[raw_data.loc[:, "term"] == (term-1), "justice"].unique()) != set(raw_data.loc[raw_data.loc[:, "term"] == (term), "justice"].unique()): | |
# natural Court change; trigger forest fire | |
print("Natural court change; rebuilding with {0} trees".format(initial_trees + (term_count * trees_per_term))) | |
m = None | |
# Build or grow a model depending on initial/reset condition | |
if not m: | |
# Grow an initial forest | |
m = sklearn.ensemble.RandomForestClassifier(n_estimators=initial_trees + (term_count * trees_per_term), | |
class_weight="balanced_subsample", | |
warm_start=True, | |
n_jobs=-1) | |
else: | |
# Grow the forest by increasing the number of trees (requires warm_start=True) | |
m.set_params(n_estimators=initial_trees + (term_count * trees_per_term)) | |
# Fit the forest model | |
m.fit(feature_data_train, | |
target_data_train) | |
# Fit the "dummy" model | |
d = sklearn.dummy.DummyClassifier(strategy="most_frequent") | |
d.fit(feature_data_train, target_data_train) | |
# Perform forest predictions | |
raw_data.loc[test_index, "rf_predicted"] = m.predict(feature_data_test) | |
# Store scores per class | |
scores = m.predict_proba(feature_data_test) | |
raw_data.loc[test_index, "rf_predicted_score_other"] = scores[:, 0] | |
raw_data.loc[test_index, "rf_predicted_score_affirm"] = scores[:, 1] | |
raw_data.loc[test_index, "rf_predicted_score_reverse"] = scores[:, 2] | |
# Store dummy predictions | |
raw_data.loc[test_index, "dummy_predicted"] = d.predict(feature_data_test) | |
# # In[6]: | |
# # Evaluation range | |
evaluation_index = raw_data.loc[:, "term"].isin(term_range) | |
target_actual = raw_data.loc[evaluation_index, "justice_outcome_disposition"] | |
target_predicted = raw_data.loc[evaluation_index, "rf_predicted"] | |
target_dummy = raw_data.loc[evaluation_index, "dummy_predicted"] | |
raw_data.loc[:, "rf_correct"] = numpy.nan | |
raw_data.loc[:, "dummy_correct"] = numpy.nan | |
raw_data.loc[evaluation_index, "rf_correct"] = (target_actual == target_predicted).astype(float) | |
raw_data.loc[evaluation_index, "dummy_correct"] = (target_actual == target_dummy).astype(float) | |
# Compare model | |
print("RF model") | |
print("="*32) | |
print(sklearn.metrics.classification_report(target_actual, target_predicted)) | |
print(sklearn.metrics.confusion_matrix(target_actual, target_predicted)) | |
print(sklearn.metrics.accuracy_score(target_actual, target_predicted)) | |
print("="*32) | |
print("") | |
# Dummy model | |
print("Dummy model") | |
print("="*32) | |
print(sklearn.metrics.classification_report(target_actual, target_dummy)) | |
print(sklearn.metrics.confusion_matrix(target_actual, target_dummy)) | |
print(sklearn.metrics.accuracy_score(target_actual, target_dummy)) | |
print("="*32) | |
print("") | |
# In[7]: | |
# Setup time series | |
rf_correct_ts = raw_data.loc[evaluation_index, :].groupby("term")["rf_correct"].mean() | |
dummy_correct_ts = raw_data.loc[evaluation_index, :].groupby("term")["dummy_correct"].mean() | |
# Plot all accuracies | |
f = plt.figure(figsize=(16, 12)) | |
plt.plot(rf_correct_ts.index, rf_correct_ts, | |
marker='o', alpha=0.75) | |
plt.plot(dummy_correct_ts.index, dummy_correct_ts, | |
marker='>', alpha=0.75) | |
plt.legend(('Random forest', 'Dummy')) | |
# In[8]: | |
# Setup time series | |
rf_spread_ts = rf_correct_ts - dummy_correct_ts | |
# Plot all accuracies | |
f = plt.figure(figsize=(16, 12)) | |
plt.bar(rf_spread_ts.index, rf_spread_ts, | |
alpha=0.75) | |
plt.xlabel("Term") | |
plt.ylabel("Spread (%)") | |
plt.title("Spread over dummy model for justice accuracy") | |
# In[9]: | |
# Feature importance | |
feature_importance_df = pandas.DataFrame(zip(feature_df.columns, m.feature_importances_), | |
columns=["feature", "importance"]) | |
feature_importance_df.sort_values(["importance"], ascending=False).head(10) | |
# In[10]: | |
# Output stats | |
print("t-test:") | |
print("Uncalibrated:") | |
print(scipy.stats.ttest_rel(rf_correct_ts.values, | |
dummy_correct_ts.values)) | |
print("=" * 16) | |
print("ranksum-test:") | |
print("Uncalibrated:") | |
print(scipy.stats.ranksums(rf_correct_ts.values, | |
dummy_correct_ts.values)) | |
print("=" * 16) | |
print("Binomial:") | |
print(statsmodels.stats.proportion.binom_test(raw_data.loc[evaluation_index, "rf_correct"].sum(), | |
raw_data.loc[evaluation_index, "rf_correct"].shape[0], | |
raw_data.loc[evaluation_index, "dummy_correct"].mean(), | |
alternative="larger")) | |
# ## Case Outcomes | |
# In[11]: | |
# Get case-level prediction | |
#scdb_data.loc[evaluation_index, "rf_predicted_case"] = | |
rf_predicted_case = pandas.DataFrame(raw_data.loc[evaluation_index, :] .groupby(["docketId"])["rf_predicted"] .agg(lambda x: x.value_counts().index[0])) | |
rf_predicted_case.columns = ["rf_predicted_case"] | |
dummy_predicted_case = pandas.DataFrame(raw_data.loc[evaluation_index, :] .groupby(["docketId"])["dummy_predicted"] .agg(lambda x: x.value_counts().index[0])) | |
dummy_predicted_case.columns = ["dummy_predicted_case"] | |
# Set DFs | |
rf_predicted_case = raw_data[["docketId", "case_outcome_disposition", "rf_predicted"]].join(rf_predicted_case, on="docketId") | |
dumy_predicted_case = raw_data[["docketId", "dummy_predicted"]].join(dummy_predicted_case, on="docketId") | |
raw_data.loc[:, "rf_predicted_case"] = rf_predicted_case | |
raw_data.loc[:, "dummy_predicted_case"] = dumy_predicted_case | |
# In[12]: | |
# Output case distribution | |
case_outcomes = raw_data.groupby(["docketId"])["case_outcome_disposition"].agg(lambda x: x.mode()) | |
case_outcomes = case_outcomes.apply(lambda x: int(x) if type(x) in [numpy.float64] else None) | |
print(case_outcomes.value_counts()) | |
print(case_outcomes.value_counts(normalize=True)) | |
# In[13]: | |
# Output comparison | |
# Evaluation range | |
evaluation_index = raw_data.loc[:, "term"].isin(term_range) & -raw_data.loc[:, "case_outcome_disposition"].isnull() | |
target_actual = raw_data.loc[evaluation_index, "case_outcome_disposition"] | |
target_predicted = raw_data.loc[evaluation_index, "rf_predicted_case"] | |
target_dummy = raw_data.loc[evaluation_index, "dummy_predicted_case"] | |
raw_data.loc[:, "rf_correct_case"] = numpy.nan | |
raw_data.loc[:, "dummy_correct_case"] = numpy.nan | |
raw_data.loc[evaluation_index, "rf_correct_case"] = (target_actual == target_predicted).astype(float) | |
raw_data.loc[evaluation_index, "dummy_correct_case"] = (target_actual == target_dummy).astype(float) | |
# Compare model | |
print("RF model") | |
print("="*32) | |
print(sklearn.metrics.classification_report(target_actual, target_predicted)) | |
print(sklearn.metrics.confusion_matrix(target_actual, target_predicted)) | |
print(sklearn.metrics.accuracy_score(target_actual, target_predicted)) | |
print("="*32) | |
print("") | |
# Dummy model | |
print("Dummy model") | |
print("="*32) | |
print(sklearn.metrics.classification_report(target_actual, target_dummy)) | |
print(sklearn.metrics.confusion_matrix(target_actual, target_dummy)) | |
print(sklearn.metrics.accuracy_score(target_actual, target_dummy)) | |
print("="*32) | |
print("") | |
# In[14]: | |
# Setup time series | |
rf_correct_case_ts = raw_data.loc[evaluation_index, :].groupby("term")["rf_correct_case"].mean() | |
dummy_correct_case_ts = raw_data.loc[evaluation_index, :].groupby("term")["dummy_correct_case"].mean() | |
# Plot all accuracies | |
f = plt.figure(figsize=(16, 12)) | |
plt.plot(rf_correct_case_ts.index, rf_correct_case_ts, | |
marker='o', alpha=0.75) | |
plt.plot(dummy_correct_case_ts.index, dummy_correct_case_ts, | |
marker='>', alpha=0.75) | |
plt.legend(('Random forest', 'Dummy')) | |
# In[15]: | |
# Setup time series | |
rf_spread_case_ts = rf_correct_case_ts - dummy_correct_case_ts | |
# Plot all accuracies | |
f = plt.figure(figsize=(16, 12)) | |
plt.bar(rf_spread_case_ts.index, rf_spread_case_ts, | |
alpha=0.75) | |
plt.xlabel("Term") | |
plt.ylabel("Spread (%)") | |
plt.title("Spread over dummy model for case accuracy") | |
# In[16]: | |
# Setup time series | |
rf_spread_case_dir_ts = pandas.expanding_sum(numpy.sign(rf_spread_case_ts)) | |
# Plot all accuracies | |
f = plt.figure(figsize=(16, 12)) | |
plt.plot(rf_spread_case_dir_ts.index, rf_spread_case_dir_ts, | |
alpha=0.75) | |
# In[24]: | |
# Output stats | |
print("t-test:") | |
print("Uncalibrated:") | |
print(scipy.stats.ttest_rel(rf_correct_case_ts.values, | |
dummy_correct_case_ts.values)) | |
print("=" * 16) | |
print("ranksum-test:") | |
print("Uncalibrated:") | |
print(scipy.stats.ranksums(rf_correct_case_ts.values, | |
dummy_correct_case_ts.values)) | |
print("=" * 16) | |
print("Binomial:") | |
case_accuracy_data = raw_data.loc[evaluation_index, ["docketId", "rf_correct_case", "dummy_correct_case"]].drop_duplicates() | |
print(statsmodels.stats.proportion.binom_test(case_accuracy_data["rf_correct_case"].sum(), | |
case_accuracy_data["rf_correct_case"].shape[0], | |
case_accuracy_data["dummy_correct_case"].mean(), | |
alternative="larger")) | |
# In[18]: | |
raw_data.loc[raw_data.loc[:, "caseName"] == "MIRANDA v. ARIZONA", | |
["caseName", "justiceName", "case_outcome_disposition", "justice_outcome_disposition", | |
"rf_predicted", "rf_predicted_score_affirm", "rf_predicted_score_reverse", "rf_correct_case", "dummy_correct_case"]].T | |
# In[19]: | |
raw_data.loc[raw_data.loc[:, "caseName"] == "OBERGEFELL v. HODGES", | |
["caseName", "justiceName", "case_outcome_disposition", "justice_outcome_disposition", | |
"rf_predicted", "rf_predicted_score_affirm", "rf_predicted_score_reverse", "rf_correct_case", "dummy_correct_case"]].T | |
# In[20]: | |
raw_data.loc[raw_data.loc[:, "caseName"] == "KING v. BURWELL", | |
["caseName", "justiceName", "case_outcome_disposition", "justice_outcome_disposition", | |
"rf_predicted", "rf_predicted_score_affirm", "rf_predicted_score_reverse", "rf_correct_case", "dummy_correct_case"]].T | |
# In[21]: | |
raw_data.loc[raw_data.loc[:, "caseName"] == 'CITIZENS UNITED v. FEDERAL ELECTION COMMISSION', | |
["caseName", "justiceName", "case_outcome_disposition", "justice_outcome_disposition", | |
"rf_predicted", "rf_predicted_score_affirm", "rf_predicted_score_reverse", "rf_correct_case", "dummy_correct_case"]].T | |
# In[22]: | |
raw_data.loc[raw_data.loc[:, "caseName"] == 'DISTRICT OF COLUMBIA v. HELLER', | |
["caseName", "justiceName", "case_outcome_disposition", "justice_outcome_disposition", | |
"rf_predicted", "rf_predicted_score_affirm", "rf_predicted_score_reverse", "rf_correct_case", "dummy_correct_case"]].T | |
# In[23]: | |
# Write output | |
import cPickle as pickle | |
import time | |
timestamp_suffix = time.strftime("%Y%m%d%H%M%S") | |
raw_data.to_csv("data/output/model_output_{0}_{1}.csv".format(trees_per_term, timestamp_suffix), | |
index=False) | |
# You probably don't want to do this, as the object can be tens of GB | |
#pickle.dump(m, open("scotus_scdb_model_{0}.pickle".format(timestamp_suffix), "w")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment