Skip to content

Instantly share code, notes, and snippets.

@rcquan
Created November 24, 2014 02:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rcquan/ce6a2f959366142a19aa to your computer and use it in GitHub Desktop.
Save rcquan/ce6a2f959366142a19aa to your computer and use it in GitHub Desktop.
***********************************
* Ryan Quan *
* Multigenerational Households *
* August 18, 2014 *
***********************************
/* We are excluding children under 18 from the
analysis to capture only multigenerational family
households of interest.
The variable hhtype1 allows us to do so.
household type (condensed) | Freq. Percent Cum.
----------------------------+-----------------------------------
married couple, no children | 9,127 27.92 27.92
single parent | 2,159 6.60 34.52
other fam., no children | 1,322 4.04 38.57
single adult | 7,857 24.03 62.60
cohab couple, no children | 705 2.16 64.76
non-family, no children | 778 2.38 67.14
unsure, no children | 444 1.36 68.49
married couple w children | 8,872 27.14 95.63
other family w children | 749 2.29 97.92
cohab couple w children | 373 1.14 99.06
non-family w children | 76 0.23 99.30
unsure w children | 230 0.70 100.00
----------------------------+-----------------------------------
Total | 32,692 100.00
*/
drop if inlist(hhtype1, 2, 11, 13, 15, 16, 18) & inlist(famgen_collapsed, 1, 2)
*(12,459 observations)
drop if hhtype1 == .n
*(138 observations)
save Data/famgen_data_for_analysis.dta, replace
***********************************
* Ryan Quan *
* Multigenerational Households *
* August 18, 2014 *
***********************************
clear
use Data/famgen_data_for_analysis.dta
**************************************************
********* LaTeX Tables *************
**************************************************
#delimit;
la var famgen_collapsed "No. of Generations Living in Household";
la def fam 1 "1" 2 "2" 3 "3+", modify;
la val famgen_collapsed fam;
la var born "Imm. Status (%)";
la def imm 1 "Native Born" 2 "Immigrant";
la val born imm;
la var race "Race (%)";
la def rce 1 "White" 2 "Black";
la val race rce;
la var caedu "Education (%)";
la def educa 0 "<12 Years" 1 "12 Years" 2 "12-16 Years" 3 ">16 Years";
la val caedu educa;
la var sex "Sex (%)";
la def mf 1 "Male" 2 "Female";
la val sex mf;
la var viq "Verbal IQ (%)";
la def lh 0 "Low" 1 "High";
la val viq lh;
recode income (1/11 = 1 "<$25,000")
(12 = 2 ">$25,000"), gen(caincome);
la var caincome "Income (%)";
recode age (18/24 = 1 "18-24") (25/34 = 2 "25-34")(35/44 = 3 "35-44")
(45/54 = 4 "45-54")(55/64 = 5 "55-64")(65/89 = 6 "65+"), gen(caage);
la var caage "Age (%)";
/*Descriptives by FAMGEN*/
tabout caage sex race born viq caincome caedu famgen_collapsed using table1.txt,
cells(col) format(1) clab(_) ptotal(none)
replace
style(tex) bt font(bold) cl1(2-5) npos(row) layout(rb) h3(nil)
topf(top.tex) botf(bot.tex) topstr(14cm) botstr(GSS-NDI);
/*Verbal IQ by Immigration Status*/
tabout viq born using table2.txt,
cells(col) format(1) clab(_) ptotal(none) layout(rb) h3(nil)
replace
style(tex) bt font(bold) cl1(2-5) npos(row)
topf(top.tex) botf(bot.tex) topstr(14cm) botstr(GSS-NDI);
/*Verbal IQ by Race*/
tabout viq race using table3.txt,
cells(col) format(1) clab(_) ptotal(none) layout(rb) h3(nil)
replace
style(tex) bt font(bold) cl1(2-5) npos(row)
topf(top.tex) botf(bot.tex) topstr(14cm) botstr(GSS-NDI);
#delimit cr
*/
***********************************
* Ryan Quan *
* Multigenerational Households *
* August 18, 2014 *
***********************************
clear
use Data/famgen_data_for_analysis.dta
stset studytime, failure(death)
#delimit;
xi:
outreg2 using famgen_main_analysis,
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2) replace:
stcox i.famgen_collapsed;
xi:
outreg2 using famgen_main_analysis, append
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2):
stcox i.famgen_collapsed age income i.race i.sex i.caedu;
*Transpose hazard tables to long format;
insheet using famgen_main_analysis.txt, nonames clear;
sxpose, clear firstnames force;
dataout, excel save(famgen_main_analysis) replace;
# delimit cr
/*Model Selection
xi: stcox i.famgen_collapsed age income i.race i.sex i.caedu hompop
/*hompop is not a significant predictor*/
xi: stcox i.famgen_collapsed age income i.race i.sex i.caedu i.caregion
/*caregion is not a significant predictor*/
xi: stcox i.famgen_collapsed age income i.race i.sex i.caedu i.famgen_collapsed*i.sex
/*the interaction term between famgen and sex is not significant*/
xi: stcox i.famgen_collapsed age income i.race i.sex i.caedu i.caregion hompop i.famgen_collapsed*i.sex
*/
***********************************
* Ryan Quan *
* Multigenerational Households *
* August 18, 2014 *
***********************************
/**********************************
*To run this file:*
Save this .do file to the folder where you wish to do your work.
Save the following .do file to a subfolder named "Programs"
famgen_prepare_data.do
famgen_summary_stats.do
famgen_main_analysis.do
famgen_sens_analysis.do
Save the following datasets to a subfolder named "Data"
GSSNDIFinalRelease.dta
famgen_data_for_analysis.dta
You may download the above .dta file by following the link below:
http://publicdata.norc.org/GSS/DOCUMENTS/OTHR/GSSNDIFinalRelease_stata.zip
Edit the local "switches" below to choose which tables to run. For example,
leaving the code as "local summary_stats_switch = 1" will run the summary statistics
program. Changing the code to "local T1_alt_switch=0" will not run that program.
The full log file outputs to famgen_master.log.
**************************************/
qui capture log close
log using famgen_master.log, replace
qui set more off, permanently
qui clear mata
qui clear
qui clear matrix
qui set maxvar 8000
********************************
* SWITCHES
********************************
local exclude_children_switch = 1 // Exclude households w/ children under 18
// for 1st & 2nd gen households ONLY.
local summary_stats_switch = 1 // Summary Statistics
local main_analysis_switch = 1 // Main Analysis
local sens_analysis_switch = 1 // Sensitivity Analysis
local latex_tables_switch = 0 // Output Tables in LaTeX
**************************
* GLOBALS
**************************
global desc_variables age year race income sex educ hhrace region hompop marital hhtype hhtype1
global key_variables famgen health yeardeath death agedeath
global sens_variables stress strsswrk strsshme wkstress happy hapmar wordsum born parborn granborn
global new_variables famgen_collapsed castrsshme cahappy viq caparborn caregion caedu hlth children
**************************
* PREPARE DATA
**************************
** This piece - Programs/prepare_data.do - does the following
***** 1. collapses and recodes variables as necessary
***** 2. creates any other variables needs for the analysis (e.g. categorical)
***** 3. prepares data for Cox models in downstream survival analyses
***** 4. saves a limited local dataset called "famgen_data_for_analysis.dta"
include Programs/famgen_prepare_data.do
*********SAMPLE SIZE IS N=28,350************
if `exclude_children_switch' == 1 {
include Programs/famgen_exclude_children.do
*********SAMPLE SIZE IS N=17,970************
}
***************************
* SUMMARY STATISTICS
****************************
if `summary_stats_switch' == 1 {
include Programs/famgen_summary_stats.do
}
***************************
* MAIN ANALYSIS
****************************
if `main_analysis_switch' == 1 {
include Programs/famgen_main_analysis.do
}
***************************
* SENSITIVITY ANALYSIS
****************************
if `sens_analysis_switch' == 1 {
include Programs/famgen_sens_analysis.do
}
***************************
* LaTeX Tables
****************************
if `latex_tables_switch' == 1 {
include Programs/famgen_latex_tables.do
}
use Data/famgen_data_for_analysis.dta, clear
log close
***********************************
* Ryan Quan *
* Multigenerational Households *
* August 18, 2014 *
***********************************
************************************************
*GSS-NDI Final Release (Nov 2011)
************************************************
clear
use Data/GSSNDIFinalRelease.dta
keep id $desc_variables $key_variables $sens_variables
*****PREP DATA FOR ANALYSIS*****************;
*Create studytime variable for survival analysis
codebook yeardeath
capture drop studytime
gen studytime =.
replace studytime = yeardeath-year if death==1
replace studytime =2008-year if death==0
sum studytime
codebook studytime
tab studytime, missing
/*Adds +1 to studytime so respondents who died
in the same year don't get recorded as 0*/
replace studytime = studytime +1
codebook studytime
*****DATA TRANSFORMATIONS*****************;
******************************
*FAMGEN
******************************
/*There are 7 levels for FAMGEN.
Freq. Numeric Label
15304 1 1 gen
11931 2 2 gens, children
183 3 2 gens, parents
203 4 2 gens, grandchldrn
512 5 3 gens, grandchldrn
205 6 3 gens, chld, par
12 7 4 gens
Because we are interested in the number of generations
living within a household, we collapse the variable into
three categories: 1 generation, 2 generation, and 3+ gens.
*/
tab famgen, missing
capture drop famgen_collapsed
gen famgen_collapsed =.
replace famgen_collapsed = 1 if famgen == 1
replace famgen_collapsed = 2 if inlist(famgen, 2, 3, 4)
replace famgen_collapsed = 3 if inlist(famgen, 5, 6, 7) /*let's collapse 4 gen households into 3*/
replace famgen_collapsed =. if famgen ==.d | famgen ==.i |famgen ==.n
label var famgen_collapsed "Number of Family Generations in Household"
label define fam 1 "1 Gen" 2 "2 Gens" 3 "3+ Gens"
label values famgen_collapsed fam
tab famgen_collapsed, missing
/*
famgen_coll |
apsed | Freq. Percent Cum.
------------+-----------------------------------
1 | 15,304 53.98 53.98
2 | 12,317 43.45 97.43
3 | 729 2.57 100.00
------------+-----------------------------------
Total | 28,350 100.00
*/
******************************
*REGION
******************************
/*New England = Maine, Vermont, New Hampshire, Massachusetts, Connecticut, Rhode Island
Middle Atlantic = New York, New Jersey, Pennsylvania
East North Central = Wisconsin, Illinois, Indiana, Michigan, Ohio
West North Central = Minnesota, Iowa, Missouri, North Dakota, South Dakota, Nebraska, Kansas
South Atlantic = Delaware, Maryland, West Virginia, Virginia, North Carolina, South Carolina, Georgia, Florida, District of Columbia
East South Central = Kentucky, Tennessee, Alabama, Mississippi
West South Central = Arkansas, Oklahoma, Louisiana, Texas
Mountain = Montana, Idaho, Wyoming, Nevada, Utah, Colorado, Arizona, New Mexico
Pacific = Washington, Oregon, California, Alaska, Hawaii*/
gen caregion=.
replace caregion=0 if inlist(region, 6, 7) /*W. South Central and E. South Central*/
replace caregion=1 if inlist(region, 1, 2, 3, 4, 5, 8, 9) /*Not Southern Region*/
label variable caregion "Categorical Region"
label define southern 0 "Southerner" 1 "Non-Southerner"
label values caregion southern
******************************
*EDUCATION
******************************
gen caedu=.
replace caedu=0 if edu<12
replace caedu=1 if edu==12
replace caedu=2 if edu>12 & edu<16
replace caedu=3 if edu>=16
replace caedu=. if edu==.d | edu==.n
la var caedu "Education Level"
la def edl 0 "<12 Years" 1 "12 Years" 2 "13-16 Years" 3 "16+ Years"
la val caedu edl
******************************
*WORDSUM (Verbal IQ)
******************************
sum wordsum
/*60% of population scored 6 or below. We will use that as a cutoff between low and high VIQ*/
gen viq=.
replace viq = 0 if wordsum > 0 & wordsum <=6
replace viq = 1 if wordsum >= 7
label variable viq "Verbal IQ"
label define lowhigh 0 "Low" 1 "High"
label values viq lowhigh
******************************
*PARBORN (Parents born in the United States?)
******************************
/*We exclude folks who do not know where at least one parent is born
because we cannot be sure how to categorize them. N = 129)*/
gen caparborn=.
replace caparborn=0 if parborn==0
replace caparborn=1 if parborn==1 | parborn==2
replace caparborn=2 if parborn==8
replace caparborn=. if inlist(parborn,3, 4, 5, 6, 7, .n)
label define cpb 0 "Both Parents Born in US" 1 "One Parent Born in US" 2 "Neither Parents Born in US"
label values caparborn cpb
label variable caparborn "Parents' Immigration Status"
******************************
*BORN (Respondent born in the United States?)
******************************
label define brn 1 "R Born in US" 2 "R Not Born in US"
label values born brn
******************************
*HEALTH
******************************
**self-rated health, hlth==1 are the healthy people
recode health (1/2=1)(3/4=0), gen(hlth)
******************************
*STRESS
******************************
**stress at home low stress == 1
gen castrsshme=.
replace castrsshme=0 if inlist(strsshme, 1, 2)
replace castrsshme=1 if inlist(strsshme, 3, 4, 5)
la var castrsshme "Stress at Home"
la val castrsshme lowhigh
******************************
*HAPPINESS
******************************
**general happiness
gen cahappy=.
replace cahappy=0 if inlist(happy, 1, 2)
replace cahappy=1 if happy == 3
replace cahappy=. if inlist(happy, .d, .i, .n)
la var cahappy "General Happiness"
la val cahappy lowhigh
******************************
*HAPPINESS
******************************
**are children under 18 present in the household?
gen children =.
replace children = 1 if inlist(hhtype1, 2, 11, 13, 15, 16, 18)
replace children = 0 if inlist(hhtype1, 1, 3, 4, 5, 6, 8)
label variable children "Children Under 18 Present in Household"
label define chld 1 "Yes" 0 "No"
label values children chld
*****DROPPING MISSING VALUES FROM CONTROL VARIABLES*****************;
#delimit cr
drop if age==.n
*(49 observations deleted)
drop if income==.a
*(3125 observations deleted)
drop if sex ==.
*(0 observations deleted)
*Dropping "other" category in race;
drop if race==. | race == 3
*(O observations deleted)
drop if hompop ==.n
*(1 observation deleted)
drop if caedu ==.
*(39 observations deleted)
drop if born ==.d | born ==.n
*(77 observations deleted)
******************************
save Data/famgen_data_for_analysis.dta, replace
***********************************
* Ryan Quan *
* Multigenerational Households *
* August 18, 2014 *
***********************************
clear
use Data/famgen_data_for_analysis.dta
stset studytime, failure(death)
#delimit;
/* ***********************************************
* FULL SAMPLE
* ***********************************************/
xi:
outreg2 using famgen_sens_analysis,
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2) replace:
stcox i.famgen_collapsed age income i.race i.sex i.caedu;
/* ***********************************************
* HEALTH
* ***********************************************/
/*including only respondents who have high self-rated health*/
xi:
outreg2 using famgen_sens_analysis, append
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2):
stcox i.famgen_collapsed age income i.race i.sex i.caedu if hlth ==1;
/* ***********************************************
* VIQ
* ***********************************************/
/*stratified by high/low VIQ*/
xi: bysort viq:
outreg2 using famgen_sens_analysis, append
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2):
stcox i.famgen_collapsed age income i.sex i.caedu if viq !=.;
/* ***********************************************
* RACE
* ***********************************************/
/*stratified by white/black*/
xi: bysort race:
outreg2 using famgen_sens_analysis, append
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2):
stcox i.famgen_collapsed age income i.sex i.caedu if race !=.;
/*stratified by white/black, high/low VIQ*/
xi: bysort race viq:
outreg2 using famgen_sens_analysis, append
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2):
stcox i.famgen_collapsed age income i.sex i.caedu if race !=. & viq !=.;
/* ***********************************************
* IMMIGRATION STATUS
* ***********************************************/
/*stratified by born in the US/not born in the US*/
xi: bysort born:
outreg2 using famgen_sens_analysis, append
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2):
stcox i.famgen_collapsed age income i.race i.sex i.caedu if born !=.;
/*stratified by born in the US/not born in the US, high/low VIQ*/
xi: bysort born viq:
outreg2 using famgen_sens_analysis, append
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2):
stcox i.famgen_collapsed age income i.race i.sex i.caedu if born !=. & viq !=.;
/*stratified by born in the US/not born in the US for both respondent and his or her parents*/
xi: bysort born caparborn:
outreg2 using famgen_sens_analysis, append
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2):
stcox i.famgen_collapsed age income i.race i.sex i.caedu if born !=. & caparborn !=.;
/* ***********************************************
* STRESS
* ***********************************************/
/*stratified by low stress/high stress*/
xi: bysort castrsshme:
outreg2 using famgen_sens_analysis, append
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2):
stcox i.famgen_collapsed age income i.race i.sex i.caedu if castrsshme !=.;
/* ***********************************************
* HAPPINESS
* ***********************************************/
/*stratified by low happiness/high happiness*/
xi: bysort cahappy:
outreg2 using famgen_sens_analysis, append
stats(coef ci) addstat(N, e(N_sub), Deaths, e(N_fail))
keep(i.famgen_collapsed) eform noobs nor2 bdec(2) dec(2):
stcox i.famgen_collapsed age income i.race i.sex i.caedu if cahappy !=.;
*Transpose hazard tables to long format;
insheet using famgen_sens_analysis.txt, nonames clear;
sxpose, force firstnames clear;
dataout, excel save(famgen_sens_analysis) replace;
#delimit cr
***********************************
* Ryan Quan *
* Multigenerational Households *
* August 18, 2014 *
***********************************
***********************************
*TWO-WAY TABULATIONS
***********************************
foreach var in $desc_variables $key_variables $sens_variables $new_variables{
tab `var' famgen_collapsed, missing row col chi2
}
***********************************
*POWER ANALYSIS
***********************************
//store SD of famgen
qui summarize famgen_collapsed
scalar std_famgen = r(sd)
//OVERALL
qui summarize famgen_collapsed
scalar N_famgen = r(N)
qui tab death if death == 1
scalar N_death = r(N)
scalar failprob = (`=N_death' / `=N_famgen')
stpower cox, n(`=N_famgen') power(0.8) alpha (0.05) sd(`=std_famgen') failprob(`=failprob') hr
//SUB-GROUPS
forval i = 1/3{
//stores N for each famgen group
qui tab famgen_collapsed if famgen_collapsed == `i'
scalar N_famgen = r(N)
//stores number of deaths for each famgen group
qui tab death if famgen_collapsed == `i' & death == 1
scalar N_death = r(N)
//calculates probability of death
scalar failprob = (`=N_death' / `=N_famgen')
stpower cox, n(`=N_famgen') power(0.8) alpha (0.05) sd(`=std_famgen') failprob(`=failprob') hr
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment