davidro/week2.py

## week2.py
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 25 18:35:21 2015
@author: david
"""
import pandas as pd
import numpy as np

# read the data from csv and store it to Pandas DataFrame variable named data
data =  pd.read_csv('addhealth_pds.csv', low_memory=False)

# upper-case all DataFrame column names
data.columns = map(str.upper, data.columns)

# bug fix for display formats to avoid run time errors
pd.set_option('display.float_format', lambda x:'%f'%x)


# check how many rows (observations) and columns (variables) are in DataFrame
print ("\nIn DataSet there is:")
print ("----------------------------------------------")
print ("%s - observations" % len(data))
print ("%s - variables \n" % len(data.columns))


# Exemination of Independent Variables Frequency Distributions
print ("\nExemination of Independent Variables Frequency Distributions")
print ("============================================================================================")

print ("\nVariable H1PF30: You have a lot of good qualities?")
print ("----------------------------------------------")

print ("\nCOUNTS:")
c0 = data["H1PF30"].value_counts().sort_index()
print (c0)

print ("\nPERCENTAGES:")
p0 = data["H1PF30"].value_counts(normalize=True).sort_index()
print (p0)

print ("\nVariable H1PF31: You are physically fit?")
print ("----------------------------------------------")

print ("\nCOUNTS:")
c1 = data["H1PF31"].value_counts().sort_index()
print (c1)

print ("\nPERCENTAGES:")
p1 = data["H1PF32"].value_counts(normalize=True).sort_index()
print (p1)

print ("\nVariable H1PF31: You have a lot to be proud of")
print ("----------------------------------------------")

print ("\nCOUNTS:")
c2 = data["H1PF32"].value_counts().sort_index()
print (c2)

print ("\nPERCENTAGES:")
p2 = data["H1PF32"].value_counts(normalize=True).sort_index()
print (p2)

print ("\nVariable H1PF33: You like your self just the way you are")
print ("----------------------------------------------")

print ("\nCOUNTS:")
c3 = data["H1PF33"].value_counts().sort_index()
print (c3)

print ("\nPERCENTAGES:")
p3 = data["H1PF33"].value_counts(normalize=True).sort_index()
print (p3)

print ("\nVariable H1PF34: You feel like you are doing everything just about right")
print ("----------------------------------------------")

print ("\nCOUNTS:")
c4 = data["H1PF34"].value_counts().sort_index()
print (c4)

print ("\nPERCENTAGES:")
p4 = data["H1PF34"].value_counts(normalize=True).sort_index()
print (p4)

print ("\nVariable H1PF35: You feel socially accepted")
print ("----------------------------------------------")

print ("\nCOUNTS:")
c5 = data["H1PF35"].value_counts().sort_index()
print (c5)

print ("\nPERCENTAGES:")
p5 = data["H1PF35"].value_counts(normalize=True).sort_index()
print (p5)

print ("\nVariable H1PF36: You feel loved and wanted ")
print ("----------------------------------------------")

print ("\nCOUNTS:")
c6 = data["H1PF36"].value_counts().sort_index()
print (c6)

print ("\nPERCENTAGES:")
p6 = data["H1PF36"].value_counts(normalize=True).sort_index()
print (p6)


# Exemination of Dependent Variable Frequency Distributions
print ("\nExemination of Dependent Variable Frequency Distributions")
print ("============================================================================================")

# With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?
print ("\nVariable H1NR6: With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?")
print ("----------------------------------------------")

print ("\nCOUNTS:")
cd = data["H1NR6"].value_counts().sort_index()
print (cd)

print ("\nPERCENTAGES:")
pd = data["H1NR6"].value_counts(normalize=True).sort_index()
print (pd)


# Subset of DataSet that includes only people who had an sexsual relationship, only answers from 1-500 (set converted to list) will be included (pandas .isin function)
print ("\nSubset of DataSet,only people who had an sexsual relationship included")
print ("============================================================================================")
sub1 = data[(data['H1NR6'].isin(list(range(1,501))))]
csd = sub1["H1NR6"].value_counts(normalize=True).sort_index()
print (csd)

print ("\nFrequnecy distributions on subset of data")
print ("----------------------------------------------")

print ("\nCOUNTS:")
csd = sub1["H1NR6"].value_counts().sort_index()
print (csd)

print ("\nPERCENTAGES:")
psd = sub1["H1NR6"].value_counts(normalize=True).sort_index()
print (psd)
	# -- coding: utf-8 --
	"""
	Created on Fri Sep 25 18:35:21 2015
	@author: david
	"""
	import pandas as pd
	import numpy as np

	# read the data from csv and store it to Pandas DataFrame variable named data
	data = pd.read_csv('addhealth_pds.csv', low_memory=False)

	# upper-case all DataFrame column names
	data.columns = map(str.upper, data.columns)

	# bug fix for display formats to avoid run time errors
	pd.set_option('display.float_format', lambda x:'%f'%x)


	# check how many rows (observations) and columns (variables) are in DataFrame
	print ("\nIn DataSet there is:")
	print ("----------------------------------------------")
	print ("%s - observations" % len(data))
	print ("%s - variables \n" % len(data.columns))


	# Exemination of Independent Variables Frequency Distributions
	print ("\nExemination of Independent Variables Frequency Distributions")
	print ("============================================================================================")

	print ("\nVariable H1PF30: You have a lot of good qualities?")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c0 = data["H1PF30"].value_counts().sort_index()
	print (c0)

	print ("\nPERCENTAGES:")
	p0 = data["H1PF30"].value_counts(normalize=True).sort_index()
	print (p0)

	print ("\nVariable H1PF31: You are physically fit?")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c1 = data["H1PF31"].value_counts().sort_index()
	print (c1)

	print ("\nPERCENTAGES:")
	p1 = data["H1PF32"].value_counts(normalize=True).sort_index()
	print (p1)

	print ("\nVariable H1PF31: You have a lot to be proud of")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c2 = data["H1PF32"].value_counts().sort_index()
	print (c2)

	print ("\nPERCENTAGES:")
	p2 = data["H1PF32"].value_counts(normalize=True).sort_index()
	print (p2)

	print ("\nVariable H1PF33: You like your self just the way you are")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c3 = data["H1PF33"].value_counts().sort_index()
	print (c3)

	print ("\nPERCENTAGES:")
	p3 = data["H1PF33"].value_counts(normalize=True).sort_index()
	print (p3)

	print ("\nVariable H1PF34: You feel like you are doing everything just about right")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c4 = data["H1PF34"].value_counts().sort_index()
	print (c4)

	print ("\nPERCENTAGES:")
	p4 = data["H1PF34"].value_counts(normalize=True).sort_index()
	print (p4)

	print ("\nVariable H1PF35: You feel socially accepted")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c5 = data["H1PF35"].value_counts().sort_index()
	print (c5)

	print ("\nPERCENTAGES:")
	p5 = data["H1PF35"].value_counts(normalize=True).sort_index()
	print (p5)

	print ("\nVariable H1PF36: You feel loved and wanted ")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	c6 = data["H1PF36"].value_counts().sort_index()
	print (c6)

	print ("\nPERCENTAGES:")
	p6 = data["H1PF36"].value_counts(normalize=True).sort_index()
	print (p6)


	# Exemination of Dependent Variable Frequency Distributions
	print ("\nExemination of Dependent Variable Frequency Distributions")
	print ("============================================================================================")

	# With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?
	print ("\nVariable H1NR6: With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	cd = data["H1NR6"].value_counts().sort_index()
	print (cd)

	print ("\nPERCENTAGES:")
	pd = data["H1NR6"].value_counts(normalize=True).sort_index()
	print (pd)


	# Subset of DataSet that includes only people who had an sexsual relationship, only answers from 1-500 (set converted to list) will be included (pandas .isin function)
	print ("\nSubset of DataSet,only people who had an sexsual relationship included")
	print ("============================================================================================")
	sub1 = data[(data['H1NR6'].isin(list(range(1,501))))]
	csd = sub1["H1NR6"].value_counts(normalize=True).sort_index()
	print (csd)

	print ("\nFrequnecy distributions on subset of data")
	print ("----------------------------------------------")

	print ("\nCOUNTS:")
	csd = sub1["H1NR6"].value_counts().sort_index()
	print (csd)

	print ("\nPERCENTAGES:")
	psd = sub1["H1NR6"].value_counts(normalize=True).sort_index()
	print (psd)