swhume/freq_dist.py

## freq_dist.py
"""
freq_dist.py prints frequency distributions for 3 variables H1DA2, H1FS11, H1FS15

@author: swhume
last_updated 2020-03-18
"""
import pandas
import os

# set path and filename for the data file and load the dataset
data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)) + "\\data", "addhealth_pds.csv")
data = pandas.read_csv(data_file, low_memory=False)

# number of observations (rows) and the number of variables (columns)
print(f"total dataset rows: {len(data)}")
print(f"total dataset columns: {len(data.columns)}\n")

# create hobby variable series (H1DA2)
hobby = pandas.Series(data["H1DA2"])
hobby = pandas.to_numeric(hobby, downcast="signed")

# counts and percentages (i.e. frequency distributions) for the hobby variable (H1DA2)
hobby_count = hobby.value_counts(sort=True)
print(f"hobby count (H1DA2):\n{hobby_count}\n")
hobby_percent = hobby.value_counts(sort=True, normalize=True)
print(f"hobby percentages (H1DA2):\n{hobby_percent}\n")

# create happy variable series (H1FS11)
happy = pandas.Series(data["H1FS11"])
happy = pandas.to_numeric(happy, downcast="signed")

# counts and percentages (i.e. frequency distributions) for the happy variable (H1FS11)
happy_count = happy.value_counts(sort=True)
print(f"times you were happy count (H1FS11):\n{happy_count}\n")
happy_percent = happy.value_counts(sort=True, normalize=True)
print(f"times you were happy percentages (H1FS11):\n{happy_percent}\n")

# create enjoyed life variable series (H1FS15)
enjoy = pandas.Series(data["H1FS15"])
enjoy = pandas.to_numeric(enjoy, downcast="signed")

# counts and percentages (i.e. frequency distributions) for the happy variable (H1FS15)
enjoy_count = enjoy.value_counts(sort=True)
print(f"times you enjoyed life count (H1FS15):\n{enjoy_count}\n")
enjoy_percent = enjoy.value_counts(sort=True, normalize=True)
print(f"times you enjoyed life percentages (H1FS15):\n{enjoy_percent}\n")
	"""
	freq_dist.py prints frequency distributions for 3 variables H1DA2, H1FS11, H1FS15

	@author: swhume
	last_updated 2020-03-18
	"""
	import pandas
	import os

	# set path and filename for the data file and load the dataset
	data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)) + "\\data", "addhealth_pds.csv")
	data = pandas.read_csv(data_file, low_memory=False)

	# number of observations (rows) and the number of variables (columns)
	print(f"total dataset rows: {len(data)}")
	print(f"total dataset columns: {len(data.columns)}\n")

	# create hobby variable series (H1DA2)
	hobby = pandas.Series(data["H1DA2"])
	hobby = pandas.to_numeric(hobby, downcast="signed")

	# counts and percentages (i.e. frequency distributions) for the hobby variable (H1DA2)
	hobby_count = hobby.value_counts(sort=True)
	print(f"hobby count (H1DA2):\n{hobby_count}\n")
	hobby_percent = hobby.value_counts(sort=True, normalize=True)
	print(f"hobby percentages (H1DA2):\n{hobby_percent}\n")

	# create happy variable series (H1FS11)
	happy = pandas.Series(data["H1FS11"])
	happy = pandas.to_numeric(happy, downcast="signed")

	# counts and percentages (i.e. frequency distributions) for the happy variable (H1FS11)
	happy_count = happy.value_counts(sort=True)
	print(f"times you were happy count (H1FS11):\n{happy_count}\n")
	happy_percent = happy.value_counts(sort=True, normalize=True)
	print(f"times you were happy percentages (H1FS11):\n{happy_percent}\n")

	# create enjoyed life variable series (H1FS15)
	enjoy = pandas.Series(data["H1FS15"])
	enjoy = pandas.to_numeric(enjoy, downcast="signed")

	# counts and percentages (i.e. frequency distributions) for the happy variable (H1FS15)
	enjoy_count = enjoy.value_counts(sort=True)
	print(f"times you enjoyed life count (H1FS15):\n{enjoy_count}\n")
	enjoy_percent = enjoy.value_counts(sort=True, normalize=True)
	print(f"times you enjoyed life percentages (H1FS15):\n{enjoy_percent}\n")