Skip to content

Instantly share code, notes, and snippets.

@borowis
Created May 15, 2016 22:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save borowis/1d16861d2ec1f6ecc04fd84964259419 to your computer and use it in GitHub Desktop.
Save borowis/1d16861d2ec1f6ecc04fd84964259419 to your computer and use it in GitHub Desktop.
import pandas
variables_of_interest = {
'S3BQ1A5': ('ever used cannabis', {1: 'Yes', 2: 'No', 9: 'Unknown'}),
'S3BQ1A6': ('ever used cocaine/crack', {1: 'Yes', 2: 'No', 9: 'Unknown'}),
'S3BQ1A9A': ('ever used heroin', {1: 'Yes', 2: 'No', 9: 'Unknown'}),
'S3CD5Q13A': ('age at onset of cannabis abuse', {
'5-64': 'Age',
99: 'Unknown',
'BL': 'NA, didnt meet symptom criteria for lifetime cannabis abuse'}),
'S3CD6Q13A': ('age at onset of cocaine abuse', {
'5-52': 'Age',
99: 'Unknown',
'BL': 'NA, didnt meet symptom criteria for lifetime cocaine abuse'}),
'S3CD9Q13A': ('age at onset of heroin abuse', {
'9-62': 'Age',
99: 'Unknown',
'BL': 'NA, didnt meet symptom criteria for lifetime heroin abuse'}),
'S3BD5Q2F': ('age when began using cannabis most', {
'5-64': 'Age',
99: 'Unknown',
'BL': 'NA, never or unknown if ever used cannabis'}),
'S3BD6Q2F': ('age when began using cocaine or crack most', {
'5-64': 'Age',
99: 'Unknown',
'BL': 'NA, never or unknown if ever used cocaine or crack'}),
'S3BD9Q2F': ('age when began using heroin most', {
'12-62': 'Age',
99: 'Unknown',
'BL': 'NA, never or unknown if ever used heroin'})
}
def load_data():
pandas.set_option('display.float_format', lambda x: '%f' % x)
data = pandas.read_csv('nesarc_pds.csv', low_memory=False)
data.columns = map(str.upper, data.columns)
print ('# of observations is ' + str(len(data)))
print ('# of variables is ' + str(len(data.columns)))
return data
def concat_description(tuple_value):
result = tuple_value[0] + '( '
for key, value in tuple_value[1].iteritems():
result += str(key) + ' : ' + value + ' '
result += ')'
return result
def main():
data = load_data()
for code in variables_of_interest:
data[code] = data[code].convert_objects(convert_numeric=True)
for code, tuple_value in variables_of_interest.iteritems():
description = concat_description(tuple_value)
print ('counts for ' + code + ' -- ' + description)
counts = data[code].value_counts(sort=False)
print (counts)
print ('percentages for ' + code + ' -- ' + description)
percentages = data[code].value_counts(sort=False, normalize=True)
print (percentages)
if __name__ == 'main':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment