Skip to content

Instantly share code, notes, and snippets.

@swhume
Created March 19, 2020 20:47
Show Gist options
  • Save swhume/af124c73f2fdb59093559f7afdd80830 to your computer and use it in GitHub Desktop.
Save swhume/af124c73f2fdb59093559f7afdd80830 to your computer and use it in GitHub Desktop.
Initial frequency distribution for AddHealth data analysis
"""
freq_dist.py prints frequency distributions for 3 variables H1DA2, H1FS11, H1FS15
@author: swhume
last_updated 2020-03-18
"""
import pandas
import os
# set path and filename for the data file and load the dataset
data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)) + "\\data", "addhealth_pds.csv")
data = pandas.read_csv(data_file, low_memory=False)
# number of observations (rows) and the number of variables (columns)
print(f"total dataset rows: {len(data)}")
print(f"total dataset columns: {len(data.columns)}\n")
# create hobby variable series (H1DA2)
hobby = pandas.Series(data["H1DA2"])
hobby = pandas.to_numeric(hobby, downcast="signed")
# counts and percentages (i.e. frequency distributions) for the hobby variable (H1DA2)
hobby_count = hobby.value_counts(sort=True)
print(f"hobby count (H1DA2):\n{hobby_count}\n")
hobby_percent = hobby.value_counts(sort=True, normalize=True)
print(f"hobby percentages (H1DA2):\n{hobby_percent}\n")
# create happy variable series (H1FS11)
happy = pandas.Series(data["H1FS11"])
happy = pandas.to_numeric(happy, downcast="signed")
# counts and percentages (i.e. frequency distributions) for the happy variable (H1FS11)
happy_count = happy.value_counts(sort=True)
print(f"times you were happy count (H1FS11):\n{happy_count}\n")
happy_percent = happy.value_counts(sort=True, normalize=True)
print(f"times you were happy percentages (H1FS11):\n{happy_percent}\n")
# create enjoyed life variable series (H1FS15)
enjoy = pandas.Series(data["H1FS15"])
enjoy = pandas.to_numeric(enjoy, downcast="signed")
# counts and percentages (i.e. frequency distributions) for the happy variable (H1FS15)
enjoy_count = enjoy.value_counts(sort=True)
print(f"times you enjoyed life count (H1FS15):\n{enjoy_count}\n")
enjoy_percent = enjoy.value_counts(sort=True, normalize=True)
print(f"times you enjoyed life percentages (H1FS15):\n{enjoy_percent}\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment