tpatch/Assignment 4

## Assignment 4
# -*- coding: utf-8 -*-
"""
@author: tpatch
"""

import pandas
import numpy
import seaborn
import matplotlib.pyplot as plt

pandas.set_option('display.float_format', lambda x: '%f'%x)

data = pandas.read_csv('datasets/ool_pds.csv', low_memory=False)
data.columns = map(str.upper, data.columns)

#Set PANDAS to show all columns in DataFrame
pandas.set_option('display.max_columns', None)
#Set PANDAS to show all rows in DataFrame
pandas.set_option('display.max_rows', None)

# bug fix for display formats to avoid run time errors
pandas.set_option('display.float_format', lambda x:'%f'%x)

data["W1_A11"] = data["W1_A11"].convert_objects(convert_numeric=True)
data["W1_K1_B"] = data["W1_K1_B"].convert_objects(convert_numeric=True)
data["W1_K1_A"] = data["W1_K1_A"].convert_objects(convert_numeric=True)
data["W1_K1_C"] = data["W1_K1_C"].convert_objects(convert_numeric=True)
data["W1_K1_D"] = data["W1_K1_D"].convert_objects(convert_numeric=True)
data["W1_B4"] = data["W1_B4"].convert_objects(convert_numeric=True)


# Pull and print variables
recode1 = {1:0, 2:1, 3:2, 4:3, 5:4, 6:5, 7:6, 8:7}
print("Days of week Watching News")
# news = data["W1_A11"].value_counts(sort=True, normalize=True)
data["W1_A11"] = data["W1_A11"].replace(-1, numpy.nan)
c1 = data["W1_A11"].map(recode1)
news = c1.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
print(news)

# Break tv watching days into groups
recodeFreq = {"(0, 2]": "0-1 Days", "(2, 4]": "2-3 Days", "(4, 6]": "4-5 Days", "(6, 8]": "6-7 Days"}
data["TVDAYSFREQ"] = pandas.cut(data.W1_A11, [0, 2, 4, 6, 8])
c1b = data["TVDAYSFREQ"].map(recodeFreq)
daysFreq = c1b.value_counts(normalize=True, dropna=True, sort=True, ascending=True)
print("\nDays of week Watching News - Binned")
print(daysFreq)

#Chart for days watching TV news
c1b = c1b.astype('category')
seaborn.countplot(x=c1b, data=data)
plt.xlabel('Days of TV News Watched per Week')
plt.title('Days of TV News Watched per Week')
# plt.show()

recode2 = {1: "Just About Always", 2: "Most of the Time", 3: "Only Some of the Time", 4: "Never"}
print("\nHow much do you think you can trust the police?")
data["W1_K1_B"] = data["W1_K1_B"].replace(-1, numpy.nan)
c2 = data["W1_K1_B"].map(recode2)
police = c2.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
print(police)

# Chart for police trust
c2 = c2.astype('category')
seaborn.countplot(x=c2, data=data)
plt.xlabel('How much do you think you can trust the police?')
plt.title('How much do you think you can trust the police?')
# plt.show()

print("\nHow much do you think you can trust the government?")
data["W1_K1_A"] = data["W1_K1_A"].replace(-1, numpy.nan)
c3 = data["W1_K1_A"].map(recode2)
govt = c3.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
print(govt)

# Chart for govt trust
c3 = c3.astype('category')
seaborn.countplot(x=c3, data=data)
plt.xlabel('How much do you think you can trust the government?')
plt.title('How much do you think you can trust the government?')
# plt.show()

print("\nHow much do you think you can trust the Legal System?")
data["W1_K1_C"] = data["W1_K1_C"].replace(-1, numpy.nan)
c4 = data["W1_K1_C"].map(recode2)
lsystem = c4.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
print(lsystem)

# Chart for legal system trust
c4 = c4.astype('category')
seaborn.countplot(x=c4, data=data)
plt.xlabel('How much do you think you can trust the legal system?')
plt.title('How much do you think you can trust the legal system?')
# plt.show()

print("\nHow much do you think you can trust the Public Schools?")
data["W1_K1_D"] = data["W1_K1_D"].replace(-1, numpy.nan)
c5 = data["W1_K1_D"].map(recode2)
pschool = c5.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
print(pschool)

# Chart for legal system trust
c5 = c5.astype('category')
seaborn.countplot(x=c5, data=data)
plt.xlabel('How much do you think you can trust public schools?')
plt.title('How much do you think you can trust public schools?')
# plt.show()

recodeAnger = {1: "Extremely Angry", 2: "Very Angry", 3: "Somewhat Angry", 4: "A Little Angry", 5: "Not Angry at All"}
print("\nGenerally speaking, how angry do you feel about the way things are going in the country these days?")
data["W1_B4"] = data["W1_B4"].replace(-1, numpy.nan)
c6 = data["W1_B4"].map(recodeAnger)
anger = c6.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
print(anger)

# Chart for legal system trust
c6 = c6.astype('category')
seaborn.countplot(x=c6, data=data)
plt.xlabel('Generally speaking, how angry do you feel about the way things are going in the country these days?')
plt.title('Generally speaking, how angry do you feel about the way things are going in the country these days?')
# plt.show()

desc1 = data['W1_A11'].describe()
print desc1

desc2 = data['W1_K1_B'].describe()
print desc2

scat1 = seaborn.factorplot(x="W1_A11", y="W1_K1_B", data=data, kind="bar", ci=None)
plt.xlabel('Days of TV News Watched per Week')
plt.ylabel('Police Trust')
plt.show()
	# -- coding: utf-8 --
	"""
	@author: tpatch
	"""

	import pandas
	import numpy
	import seaborn
	import matplotlib.pyplot as plt

	pandas.set_option('display.float_format', lambda x: '%f'%x)

	data = pandas.read_csv('datasets/ool_pds.csv', low_memory=False)
	data.columns = map(str.upper, data.columns)

	#Set PANDAS to show all columns in DataFrame
	pandas.set_option('display.max_columns', None)
	#Set PANDAS to show all rows in DataFrame
	pandas.set_option('display.max_rows', None)

	# bug fix for display formats to avoid run time errors
	pandas.set_option('display.float_format', lambda x:'%f'%x)

	data["W1_A11"] = data["W1_A11"].convert_objects(convert_numeric=True)
	data["W1_K1_B"] = data["W1_K1_B"].convert_objects(convert_numeric=True)
	data["W1_K1_A"] = data["W1_K1_A"].convert_objects(convert_numeric=True)
	data["W1_K1_C"] = data["W1_K1_C"].convert_objects(convert_numeric=True)
	data["W1_K1_D"] = data["W1_K1_D"].convert_objects(convert_numeric=True)
	data["W1_B4"] = data["W1_B4"].convert_objects(convert_numeric=True)


	# Pull and print variables
	recode1 = {1:0, 2:1, 3:2, 4:3, 5:4, 6:5, 7:6, 8:7}
	print("Days of week Watching News")
	# news = data["W1_A11"].value_counts(sort=True, normalize=True)
	data["W1_A11"] = data["W1_A11"].replace(-1, numpy.nan)
	c1 = data["W1_A11"].map(recode1)
	news = c1.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
	print(news)

	# Break tv watching days into groups
	recodeFreq = {"(0, 2]": "0-1 Days", "(2, 4]": "2-3 Days", "(4, 6]": "4-5 Days", "(6, 8]": "6-7 Days"}
	data["TVDAYSFREQ"] = pandas.cut(data.W1_A11, [0, 2, 4, 6, 8])
	c1b = data["TVDAYSFREQ"].map(recodeFreq)
	daysFreq = c1b.value_counts(normalize=True, dropna=True, sort=True, ascending=True)
	print("\nDays of week Watching News - Binned")
	print(daysFreq)

	#Chart for days watching TV news
	c1b = c1b.astype('category')
	seaborn.countplot(x=c1b, data=data)
	plt.xlabel('Days of TV News Watched per Week')
	plt.title('Days of TV News Watched per Week')
	# plt.show()

	recode2 = {1: "Just About Always", 2: "Most of the Time", 3: "Only Some of the Time", 4: "Never"}
	print("\nHow much do you think you can trust the police?")
	data["W1_K1_B"] = data["W1_K1_B"].replace(-1, numpy.nan)
	c2 = data["W1_K1_B"].map(recode2)
	police = c2.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
	print(police)

	# Chart for police trust
	c2 = c2.astype('category')
	seaborn.countplot(x=c2, data=data)
	plt.xlabel('How much do you think you can trust the police?')
	plt.title('How much do you think you can trust the police?')
	# plt.show()

	print("\nHow much do you think you can trust the government?")
	data["W1_K1_A"] = data["W1_K1_A"].replace(-1, numpy.nan)
	c3 = data["W1_K1_A"].map(recode2)
	govt = c3.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
	print(govt)

	# Chart for govt trust
	c3 = c3.astype('category')
	seaborn.countplot(x=c3, data=data)
	plt.xlabel('How much do you think you can trust the government?')
	plt.title('How much do you think you can trust the government?')
	# plt.show()

	print("\nHow much do you think you can trust the Legal System?")
	data["W1_K1_C"] = data["W1_K1_C"].replace(-1, numpy.nan)
	c4 = data["W1_K1_C"].map(recode2)
	lsystem = c4.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
	print(lsystem)

	# Chart for legal system trust
	c4 = c4.astype('category')
	seaborn.countplot(x=c4, data=data)
	plt.xlabel('How much do you think you can trust the legal system?')
	plt.title('How much do you think you can trust the legal system?')
	# plt.show()

	print("\nHow much do you think you can trust the Public Schools?")
	data["W1_K1_D"] = data["W1_K1_D"].replace(-1, numpy.nan)
	c5 = data["W1_K1_D"].map(recode2)
	pschool = c5.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
	print(pschool)

	# Chart for legal system trust
	c5 = c5.astype('category')
	seaborn.countplot(x=c5, data=data)
	plt.xlabel('How much do you think you can trust public schools?')
	plt.title('How much do you think you can trust public schools?')
	# plt.show()

	recodeAnger = {1: "Extremely Angry", 2: "Very Angry", 3: "Somewhat Angry", 4: "A Little Angry", 5: "Not Angry at All"}
	print("\nGenerally speaking, how angry do you feel about the way things are going in the country these days?")
	data["W1_B4"] = data["W1_B4"].replace(-1, numpy.nan)
	c6 = data["W1_B4"].map(recodeAnger)
	anger = c6.value_counts(sort=True, normalize=True, ascending=True, dropna=True)
	print(anger)

	# Chart for legal system trust
	c6 = c6.astype('category')
	seaborn.countplot(x=c6, data=data)
	plt.xlabel('Generally speaking, how angry do you feel about the way things are going in the country these days?')
	plt.title('Generally speaking, how angry do you feel about the way things are going in the country these days?')
	# plt.show()

	desc1 = data['W1_A11'].describe()
	print desc1

	desc2 = data['W1_K1_B'].describe()
	print desc2

	scat1 = seaborn.factorplot(x="W1_A11", y="W1_K1_B", data=data, kind="bar", ci=None)
	plt.xlabel('Days of TV News Watched per Week')
	plt.ylabel('Police Trust')
	plt.show()