Skip to content

Instantly share code, notes, and snippets.

@davidro
Created September 28, 2015 06:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidro/c8bf9b0d1df8b24d0997 to your computer and use it in GitHub Desktop.
Save davidro/c8bf9b0d1df8b24d0997 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 25 18:35:21 2015
@author: david
"""
import pandas as pd
import numpy as np
# read the data from csv and store it to Pandas DataFrame variable named data
data = pd.read_csv('addhealth_pds.csv', low_memory=False)
# upper-case all DataFrame column names
data.columns = map(str.upper, data.columns)
# bug fix for display formats to avoid run time errors
pd.set_option('display.float_format', lambda x:'%f'%x)
# check how many rows (observations) and columns (variables) are in DataFrame
print ("\nIn DataSet there is:")
print ("----------------------------------------------")
print ("%s - observations" % len(data))
print ("%s - variables \n" % len(data.columns))
# Exemination of Independent Variables Frequency Distributions
print ("\nExemination of Independent Variables Frequency Distributions")
print ("============================================================================================")
print ("\nVariable H1PF30: You have a lot of good qualities?")
print ("----------------------------------------------")
print ("\nCOUNTS:")
c0 = data["H1PF30"].value_counts().sort_index()
print (c0)
print ("\nPERCENTAGES:")
p0 = data["H1PF30"].value_counts(normalize=True).sort_index()
print (p0)
print ("\nVariable H1PF31: You are physically fit?")
print ("----------------------------------------------")
print ("\nCOUNTS:")
c1 = data["H1PF31"].value_counts().sort_index()
print (c1)
print ("\nPERCENTAGES:")
p1 = data["H1PF32"].value_counts(normalize=True).sort_index()
print (p1)
print ("\nVariable H1PF31: You have a lot to be proud of")
print ("----------------------------------------------")
print ("\nCOUNTS:")
c2 = data["H1PF32"].value_counts().sort_index()
print (c2)
print ("\nPERCENTAGES:")
p2 = data["H1PF32"].value_counts(normalize=True).sort_index()
print (p2)
print ("\nVariable H1PF33: You like your self just the way you are")
print ("----------------------------------------------")
print ("\nCOUNTS:")
c3 = data["H1PF33"].value_counts().sort_index()
print (c3)
print ("\nPERCENTAGES:")
p3 = data["H1PF33"].value_counts(normalize=True).sort_index()
print (p3)
print ("\nVariable H1PF34: You feel like you are doing everything just about right")
print ("----------------------------------------------")
print ("\nCOUNTS:")
c4 = data["H1PF34"].value_counts().sort_index()
print (c4)
print ("\nPERCENTAGES:")
p4 = data["H1PF34"].value_counts(normalize=True).sort_index()
print (p4)
print ("\nVariable H1PF35: You feel socially accepted")
print ("----------------------------------------------")
print ("\nCOUNTS:")
c5 = data["H1PF35"].value_counts().sort_index()
print (c5)
print ("\nPERCENTAGES:")
p5 = data["H1PF35"].value_counts(normalize=True).sort_index()
print (p5)
print ("\nVariable H1PF36: You feel loved and wanted ")
print ("----------------------------------------------")
print ("\nCOUNTS:")
c6 = data["H1PF36"].value_counts().sort_index()
print (c6)
print ("\nPERCENTAGES:")
p6 = data["H1PF36"].value_counts(normalize=True).sort_index()
print (p6)
# Exemination of Dependent Variable Frequency Distributions
print ("\nExemination of Dependent Variable Frequency Distributions")
print ("============================================================================================")
# With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?
print ("\nVariable H1NR6: With how many people, in total, including romantic relationship partners, have you ever had a sexual relationship?")
print ("----------------------------------------------")
print ("\nCOUNTS:")
cd = data["H1NR6"].value_counts().sort_index()
print (cd)
print ("\nPERCENTAGES:")
pd = data["H1NR6"].value_counts(normalize=True).sort_index()
print (pd)
# Subset of DataSet that includes only people who had an sexsual relationship, only answers from 1-500 (set converted to list) will be included (pandas .isin function)
print ("\nSubset of DataSet,only people who had an sexsual relationship included")
print ("============================================================================================")
sub1 = data[(data['H1NR6'].isin(list(range(1,501))))]
csd = sub1["H1NR6"].value_counts(normalize=True).sort_index()
print (csd)
print ("\nFrequnecy distributions on subset of data")
print ("----------------------------------------------")
print ("\nCOUNTS:")
csd = sub1["H1NR6"].value_counts().sort_index()
print (csd)
print ("\nPERCENTAGES:")
psd = sub1["H1NR6"].value_counts(normalize=True).sort_index()
print (psd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment