Skip to content

Instantly share code, notes, and snippets.

@renjiege
Created November 26, 2017 16:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save renjiege/b1565d8bb83ab854b57e75505c7bafbf to your computer and use it in GitHub Desktop.
Save renjiege/b1565d8bb83ab854b57e75505c7bafbf to your computer and use it in GitHub Desktop.
This file shows how I analyze data, generate summary statistics, and produce graphs and tables.
* sexselection_regressions_census1990.do (This file also produces graph results, see section 8)
* First Run sexselection_trimmed_census1990.do
* In this version, DT1-DT2 are added, P value of F-tests are dropped
* Difference between V3: change defintion of D1 and DT1
* For how to get datasets sexselection_merge_culs.dta, check figures_education_quality_culs2001_robustness
clear all
set more off
cap log on
cap cd ~/Documents/Data/China_Census1990
cap cd C:\Users\Renjie\Documents\Data\China_Census1990
global scale "abyear"
global c1 "1949"
global c2 "1951"
local option="scatter"
local outregsetup1= "excel replace se lab nocons nor2 keep( DT1 DT2 D1 D2 byear)"
local outregsetup2= "excel append se lab nocons nor2 keep( DT1 DT2 D1 D2 byear)"
local outregsetup3= "excel append se lab nocons nor2 keep( DT1 DT2 D1 D2 abyear)"
local outregsetup4= "excel replace se lab nocons nor2 keep( DT1 DT2 D1 D2 abyear)"
*************************************
********* Dataset Preparation ********
**************************************
/*
use sexselection_trimmed_census1990.dta,clear
gen sexcomp="1m" if chborn==1 & chbornm==1
replace sexcomp="1f" if chborn==1 & chbornf==1
replace sexcomp="2m" if chborn==2 & chbornm==2
replace sexcomp="2f" if chborn==2 & chbornf==2
replace sexcomp="1m1f" if chborn==2 & chbornf==1 & chbornm==1
replace sexcomp="3m" if chborn==3 & chbornm==3
replace sexcomp="3f" if chborn==3 & chbornf==3
replace sexcomp="2m1f" if chborn==3 & chbornf==1 & chbornm==2
replace sexcomp="1m2f" if chborn==3 & chbornf==2 & chbornm==1
replace sexcomp="4f" if chborn==4 & chbornf==4
replace sexcomp="4m" if chborn==4 & chbornm==4
replace sexcomp="5f" if chborn==5 & chbornf==5
replace sexcomp="5m" if chborn==5 & chbornm==5
replace sexcomp="6f" if chborn==6 & chbornf==6
replace sexcomp="6m" if chborn==6 & chbornm==6
replace sexcomp="other" if chborn>=4 & chborn!=.
gen morefemale=(chbornf>chbornm)
gen onlyfemale1=(sexcomp=="1f") // only girl was born
gen onlyfemale2=(sexcomp=="1f" | sexcomp=="2f" )
gen onlyfemale3=(sexcomp=="1f" | sexcomp=="2f" | sexcomp=="3f")
gen onlyfemale6=(sexcomp=="1f" | sexcomp=="2f" | sexcomp=="3f" | sexcomp=="4f" | sexcomp=="5f" | sexcomp=="6f")
*replace onlyfemale1=. if chborn==0
replace onlyfemale2=. if chborn==0
replace onlyfemale3=. if chborn==0
gen onlymale=(sexcomp=="1m" )
gen onlymale6=(sexcomp=="1m" | sexcomp=="2m" | sexcomp=="3m" | sexcomp=="4m" | sexcomp=="5m" | sexcomp=="6m")
*replace onlymale=. if chborn==0
gen onechild=(chborn==1)
replace onechild=. if chborn==0
gen twochild=(chborn>=2)
replace twochild=. if chborn==0
replace hhtype=. if hhtype==0
gen rural=(hhtype==1)
gen job=(occu>0)
gen highschool=(educ==4)
gen middleschool=(educ>=3)
/* Regressors */
* Linear regressors
gen D1=(${scale}<${c1})
gen D2=(${scale}>${c2})
gen T1=(${scale}-${c1})
gen T2=(${scale}-${c2})
gen DT1=D1*T1
gen DT2=D2*T2
* Quadratic Regressors
gen T1_sq=T1^2
gen T2_sq=T2^2
gen DT1_sq=D1*T1^2
gen DT2_sq=D2*T2^2
keep if ${scale}>1940 & ${scale}<1960
keep if rural==0
preserve
**************************************
** Robust Check for selection into college or other level of schools
keep if educ==4
*keep if educ ==4 | educ==5 // do not work, see 3. second stage results
*keep if educ ==4 | educ==6 // do not work
*keep if educ ==4 | educ==6 | educ ==7 // do not work
**************************************
save highschool_census1990_regressions.dta, replace
restore
preserve
keep if educ==3
save middleschool_census1990_regressions.dta, replace
restore
preserve
keep if educ==2
save primaryschool_census1990_regressions.dta, replace
restore
preserve
keep if educ<=2
save underprimaryschool_census1990_regressions.dta, replace
restore
preserve
keep if educ<=3
save undermiddleschool_census1990_regressions.dta, replace
/* Classify into two groups: first child is a boy; first child is a girl */
/* Method 1 // see evernote Renjie Ge's census question */
restore
keep if educ==4
preserve
gen insample=1 if ((relate=="spouse" | relate=="head" | relate=="child") & (chsurv==children)) | (onlymale6==1 | onlyfemale6==1) | (chsurv<=1)
replace insample=0 if insample==.
ttable2 occu race job byear married chborn, by(insample)
keep if insample==1
replace c1_sex=2 if onlyfemale6==1
replace c1_sex=1 if onlymale6==1
gen firstgirl=1 if c1_sex==2
replace firstgirl=0 if c1_sex==1
gen secboy=(c2_sex==1)
save tempfile1_highschool_census1990.dta, replace
/* Method 2 */
restore
gen insample=1 if ((relate=="spouse" | relate=="head" | relate=="child") & (chsurv==children))
replace insample=0 if insample==.
ttable2 occu race job byear married chborn, by(insample)
keep if insample==1
replace c1_sex=2 if onlyfemale6==1
replace c1_sex=1 if onlymale6==1
gen firstgirl=1 if c1_sex==2
replace firstgirl=0 if c1_sex==1
gen secboy=(c2_sex==1)
save tempfile2_highschool_census1990.dta, replace
xx
*/
************************************************* Summary Statistics *************************************************
*
use highschool_census1990_regressions.dta, clear
keep if byear>=1945 & byear<=1955
gen age=1990-byear
gen han=(race==1)
tabstat onlyfemale1 age job married chborn onechild han if onlyfemale1!=. , col(stat) format(%9.3g) stat(mean count) // empstat>0: no job; empstat==0 has a job; =1 student; =2 housework; =3 entering school; =4 waiting for employment
tabstat onlyfemale1 age job married chborn onechild han, col(stat) format(%9.3g) stat(mean count) // empstat>0: no job; empstat==0 has a job; =1 student; =2 housework; =3 entering school; =4 waiting for employment
use tempfile1_highschool_census1990.dta, clear
keep if byear>=1945 & byear<=1955
gen age=1990-byear
gen han=(race==1)
tabstat onlyfemale1 age job married chborn onechild firstgirl han if firstgirl!=., col(stat) format(%9.3g) stat(mean count)
tabstat twochild onlyfemale1 age job married chborn onechild firstgirl han if firstgirl==1, col(stat) format(%9.3g) stat(mean count)
tabstat twochild onlyfemale1 age job married chborn onechild firstgirl han if firstgirl==0, col(stat) format(%9.3g) stat(mean count)
use "~/Dropbox/workslot/mystata/projects/education_quality/sexselection_cfps_culs.dta", clear
tabstat yhi byear feduy meduy children sib height sib upperclass sentdown, format(%9.3g) stat(mean count) by(gender) nototal
****************************************************************************************************************************************************************
******************************************************************************************************************************************************
****************************************************************************************************************************************************************
// STEPS:
// 1. first-stage results: weighted sample vs non-weighted sample
// 2. full sample versus urban sample only
// 3. different specifications; controls; standard errors; by ethnicity; different measures
// 4. show other school levels do not decrease sex selection
// 5. Test continuity: regressions on number of children; marriage status; occupation; parents' age, education, etc.
// 6. histogram sexselection, Mccray Test
// 7. channels and confounders(sent down)
// 8. Specification test: linear models vs non-linear;
// 9. show the calculated effect of years of schooling on sex selection
// 10. check whether the characteristics of the selected sample are similar to those without selection
****************************************************************************************************************************************************************
****************************************************************************************************************************************************************
****************************************************************************************************************************************************************
****************************************** 0. Setup for CFPS+CULS and Census1990 ***********************************
s
/* a. Merge with CULS years of schooling data */
cap cd ~/Documents/Data/CFPS/2010
cap cd C:\Users\Renjie\Documents\Data\CFPS\2010\
use CFPSclean.dta, clear // for this data, search: clean_CFPS2010.do
drop if byear<0
replace yhi=. if yhi<0
gen abyear=byear+bmon/12 // accurate birth year
gen qbyear=byear+quater/4
replace hukou3=. if hukou3>3 | hukou3<1
replace hukou12=. if hukou12>3 | hukou12<1
keep if (educ==10 & hcomp==1) & hukou3==3
keep if byear>1940 & byear<1960
gen weight=19.62*1000000/(400*2.3) if provcd==11 // "北京市" weights = population in 2010 /number of sample
replace weight=12.99*1000000/(400*2.3) if provcd==12 // "天津市"
replace weight=71.94*1000000/(400*2.3) if provcd==13 // "河北省"
replace weight=35.74*1000000/(400*2.3) if provcd==14 // "山西省"
replace weight=43.75*1000000/(1600*2.3) if provcd==21 // "辽宁省"
replace weight=27.47*1000000/(400*2.3) if provcd==22 // "吉林省"
replace weight=38.33*1000000/(400*2.3) if provcd==23 // "黑龙江省"
replace weight=23.03*1000000/(1600*2.3) if provcd==31 // "上海市"
replace weight=44.62*1000000/(400*2.3) if provcd==36 // "江西省"
replace weight=94.05*1000000/(1600*2.3) if provcd==41 //"河南省"
replace weight=57.28*1000000/(400*2.3) if provcd==42 //"湖北省"
replace weight=65.70*1000000/(400*2.3) if provcd==43 // "湖南省"
replace weight=104.41*1000000/(1600*2.3) if provcd==44 // "广东省"
replace weight=28.85*1000000/(400*2.3) if provcd==50 // "重庆市"
replace weight=34.79*1000000/(400*2.3) if provcd==52 //"贵州省"
replace weight=37.35*1000000/(400*2.3) if provcd==61 // "陕西省"
replace weight=25.60*1000000/(1600*2.3) if provcd==62 // "甘肃省"
foreach var of varlist fedu medu {
replace `var'y= 0 if `var'c==1 & `var'y==.
replace `var'y=6 if `var'c==2 & `var'y==.
replace `var'y=9 if `var'c==3 & `var'y==.
replace `var'y=12 if `var'c==4 & `var'y==.
replace `var'y=15 if `var'c==5 & `var'y==.
replace `var'y=16 if `var'c==6 & `var'y==.
}
rename sibno sib
rename ymid ymi
gen upperclass=(qa6==1 | qa6==3 | qa6==5 | qa6>16)
gen hnormal= (yhi==3 | yhi==4)
keep yhi hnormal ymi byear gender weight abyear qbyear children meduy feduy fbyear mbyear sentdown sib height upperclass
cap cd ~/Dropbox/workslot/mystata/projects/education_quality
append using sexselection_merge_culs.dta
replace feduy=0 if feduy==.
replace meduy=0 if meduy==.
egen peduy= rowmax(feduy meduy)
/* b. Linear regressions for 1st stage results */
cap gen D1=(byear<${c1})
cap gen D2=(byear>${c2})
cap gen T1=(byear-${c1})
cap gen T2=(byear-${c2})
cap gen DT1=D1*T1
cap gen DT2=D2*T2
save sexselection_cfps_culs.dta, replace
*********************************** 1. Test continuity/covariates using CFPS+CULS and Census1990 *******************************
/* CFPS+CULS */
#delimit ;
use "~/Dropbox/workslot/mystata/projects/education_quality/sexselection_cfps_culs.dta", clear;
keep if byear>=1945 & byear<=1955;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
local covariates= "height sib meduy feduy peduy upperclass sentdown ymi"; local replace replace;
replace sib=. if sib<0;
foreach var of varlist `covariates' {;
reg `var' DT1 DT2 D1 D2 byear if gender==0 , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage_covariates, excel `replace' se lab nocons nor2 keep( DT1 DT2 D1 D2 byear) ctitle("`var'") addstat(F-test, `F1');
local replace;
};
/* Census1990 */
#delimit ;
use "~/Documents/Data/China_Census1990/highschool_census1990_regressions.dta", clear;
keep if byear>=1945 & byear<=1955;
reg onlymale DT1 DT2 D1 D2 ${scale} i.occu i.empstat children, ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage_covariates, `outregsetup4' ctitle("Have only a boy") addstat(F-test, `F1');
reg onechild DT1 DT2 D1 D2 ${scale} , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage_covariates, `outregsetup3' ctitle("One Child") addstat(F-test, `F1');
reg children DT1 DT2 D1 D2 ${scale} , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage_covariates, `outregsetup3' ctitle("Number of Children") addstat(F-test, `F1');
reg married DT1 DT2 D1 D2 ${scale} , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage_covariates, `outregsetup3' ctitle("Marrital Status") addstat(F-test, `F1');
reg job DT1 DT2 D1 D2 ${scale} , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage_covariates, `outregsetup3' ctitle("Job Status") addstat(F-test, `F1') see;
#delimit cr
xi: reg onlyfemale1 DT1 DT2 D1 D2 ${scale} i.occu i.empstat children, ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression, excel append se keep( DT1 DT2 D1 D2 abyear children) lab ctitle("With Other Controls") nocons nor2 addstat(F-test, `F1');
************************************************** 2. First Stage Results *****************************************
#delimit ;
use "~/Dropbox/workslot/mystata/projects/education_quality/sexselection_cfps_culs.dta", clear;
replace feduy=99 if feduy==.; replace meduy=99 if meduy==.;
// a. baseline
keep if byear>=1945 & byear<=1955;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
reg yhi DT1 DT2 D1 D2 abyear if gender==0 , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage, `outregsetup1' ctitle("Year of Schooling: Female") addstat(F-test, `F1');
reg yhi DT1 DT2 D1 D2 abyear i.feduy i.meduy children sib if gender==0 , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage, `outregsetup2' ctitle("Year of Schooling: Female with controls") addstat(F-test, `F1') ;
reg hnormal DT1 DT2 D1 D2 abyear if gender==0 , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage, `outregsetup2' ctitle("Pr(education affected): Female") addstat(F-test, `F1');
reg hnormal DT1 DT2 D1 D2 abyear i.feduy i.meduy children sib if gender==0 , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage, `outregsetup2' ctitle("Pr(education affected): Female with controls") addstat(F-test, `F1') see;
// b. adding fbyear & mbyear separately; results do not change
replace mbyear=9 if mbyear<0 | mbyear==.; replace fbyear=9 if fbyear<0 | fbyear==.;
reg yhi DT1 DT2 D1 D2 byear i.mbyear if gender==0 , ro;
lincom DT1-DT2; local tstat=r(estimate)/r(se); local pval=tprob(r(df), abs(`tstat'));
outreg2 using sexselection_regression_1st_stage, `outregsetup2' ctitle("Year of Schooling with controls") adds(DT-DT2, r(estimate), P-value, `pval');
reg yhi DT1 DT2 D1 D2 byear i.fbyear if gender==0 , ro;
lincom DT1-DT2; local tstat=r(estimate)/r(se); local pval=tprob(r(df), abs(`tstat'));
outreg2 using sexselection_regression_1st_stage,`outregsetup2' ctitle("Year of Schooling: Female with controls") adds(DT-DT2, r(estimate), P-value, `pval');
#delimit cr
********************************************* 3. Second Stage Results *****************************************
/* a. Regressions on high school graduates */
#delimit ;
use "~/Documents/Data/China_Census1990/highschool_census1990_regressions.dta", clear;
keep if byear>=1945 & byear<=1955;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
reg onlyfemale1 DT1 DT2 D1 D2 ${scale} , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression, `outregsetup4' ctitle("Without Controls") addstat(F-test, `F1');
reg onlyfemale1 DT1 DT2 D1 D2 ${scale} children, ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression, excel append se keep( DT1 DT2 D1 D2 abyear children) lab ctitle("With Children Controls") nocons nor2 addstat(F-test, `F1');
xi: reg onlyfemale1 DT1 DT2 D1 D2 ${scale} i.occu i.empstat children, ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression, excel append se keep( DT1 DT2 D1 D2 abyear children) lab ctitle("With Other Controls") nocons nor2 addstat(F-test, `F1');
xi: reg onlyfemale1 DT1 DT2 D1 D2 ${scale} i.occu i.empstat children if race==1, ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression, excel append se keep( DT1 DT2 D1 D2 abyear children) lab ctitle("Han Only") nocons nor2 addstat(F-test, `F1') see;
/* b. Regressions on other school levels */
use "~/Documents/Data/China_Census1990/middleschool_census1990_regressions.dta", clear;
keep if byear<=1955 & byear>=1945;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
xi: reg onlyfemale1 DT1 DT2 D1 D2 ${scale} i.occu i.empstat children, ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression, excel replace se keep( DT1 DT2 D1 D2 ${scale} children) lab ctitle("Middle School") nocons nor2 addstat(F-test, `F1');
use "~/Documents/Data/China_Census1990/primaryschool_census1990_regressions.dta", clear;
keep if byear<=1955 & byear>=1945;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
xi: reg onlyfemale1 DT1 DT2 D1 D2 ${scale} i.occu i.empstat children, ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression, excel append se keep( DT1 DT2 D1 D2 ${scale} children) lab ctitle("Primary School") nocons nor2 addstat(F-test, `F1');
* college
use "~/Documents/Data/China_Census1990/college_census1990_regressions.dta", clear;
keep if byear<=1955 & byear>=1945;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
xi: reg onlyfemale1 DT1 DT2 D1 D2 ${scale} , ro;
test DT1-DT2=-DT2=0; local F1=r(p); lincom DT1-DT2; local tstat=r(estimate)/r(se); local pval=tprob(r(df), abs(`tstat'));
outreg2 using sexselection_regression, excel replace se keep( DT1 DT2 D1 D2 ${scale} children)lab ctitle("College") nocons nor2 addstat(F-test, `F1', DT1-DT2, r(estimate), P-value, `pval') ;
**************************************** 4. Robustness: Alternative bandwidth for 1st and 2nd stage ***********************************
#delimit ;
// a. Alternative bandwidth for 1st stage
use "~/Dropbox/workslot/mystata/projects/education_quality/sexselection_cfps_culs.dta", clear;
replace feduy=99 if feduy==.; replace meduy=99 if meduy==.;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
local replace replace; local j=0;
forval year=1942/1944 {;
xi: reg yhi DT1 DT2 D1 D2 byear i.feduy i.meduy children sib if gender==0 & byear<=`year'+16+`j' & byear>=`year' , ro;
test DT1 DT2; local F1=r(p); local Bandwidth=17+`j';
outreg2 using sexselection_regression_robust_bandwidth, excel `replace' nocons se keep( DT1 DT2) addstat(F-test, `F1') ctitle("Year of Schooling: Bandwidth==`Bandwidth'");
local replace; local j=`j'-2;
};
// b. Alternative bandwidth for 2nd stage
use "~/Documents/Data/China_Census1990/highschool_census1990_regressions.dta", clear;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
local j=0;
forval year=1942/1944 {;
xi: reg onlyfemale1 DT1 DT2 D1 D2 ${scale} i.occu i.empstat children if byear<=`year'+16+`j' & byear>=`year', ro;
test DT1 DT2 ; local F1=r(p);local Bandwidth=17+`j';
outreg2 using sexselection_regression_robust_bandwidth, excel append nocons se keep( DT1 DT2) addstat(F-test, `F1') ctitle("Onlyfemale: Bandwidth==`Bandwidth'");
local j=`j'-2;
};
#delimit cr
************************************************ 5. Robustness: Using different measure *********************************************
#delimit ;
/* a. Alternative definition and using Census1990 family sample */
use "~/Documents/Data/China_Census1990/tempfile1_highschool_census1990.dta", clear;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
keep if byear>=1945 & byear<=1955;
reg twochild DT1 DT2 D1 D2 abyear children i.occu i.empstat if firstgirl==0 , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_robust_other_definition, excel replace se lab keep(DT1 DT2) ctitle("Alternative measure") nocons nor2 addstat(F-test, `F1');
reg twochild DT1 DT2 D1 D2 abyear children i.occu i.empstat if firstgirl==1 , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_robust_other_definition, excel append se lab keep(DT1 DT2) ctitle("Alternative measure") nocons nor2 addstat(F-test, `F1');
reg firstgirl DT1 DT2 D1 D2 abyear children i.occu i.empstat , ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_robust_other_definition, excel append se lab keep(DT1 DT2) ctitle("Alternative measure") nocons nor2 addstat(F-test, `F1');
/* b. Weighted Regression for 1st stage result */
use "~/Dropbox/workslot/mystata/projects/education_quality/sexselection_cfps_culs.dta", clear;
replace feduy=99 if feduy==.; replace meduy=99 if meduy==.;
// a. baseline
keep if byear>=1945 & byear<=1955;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
reg yhi DT1 DT2 D1 D2 byear if gender==0 [pweight=weight], ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage_weighted, `outregsetup1' ctitle("Year of Schooling: Female") addstat(F-test, `F1');
reg yhi DT1 DT2 D1 D2 byear i.feduy i.meduy children sib if gender==0 [pweight=weight], ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage_weighted, `outregsetup2' ctitle("Year of Schooling: Female with controls") addstat(F-test, `F1') ;
reg hnormal DT1 DT2 D1 D2 byear if gender==0 [pweight=weight], ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage_weighted, `outregsetup2' ctitle("Pr(education affected): Female") addstat(F-test, `F1');
reg hnormal DT1 DT2 D1 D2 byear i.feduy i.meduy children sib if gender==0 [pweight=weight], ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_1st_stage_weighted, `outregsetup2' ctitle("Pr(education affected): Female with controls") addstat(F-test, `F1') see;
/* c. Other school levels with different bandwidths*/
#delimit ;
use "~/Documents/Data/China_Census1990/middleschool_census1990_regressions.dta", clear;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
local replace replace; local j=0;
forval year=1941/1944 {;
xi: reg onlyfemale1 DT1 DT2 D1 D2 ${scale} i.occu i.empstat children if byear<`year'+18+`j' & byear>`year', ro;
lincom DT1-DT2; local tstat=r(estimate)/r(se); local pval=tprob(r(df), abs(`tstat'));local Bandwidth=17+`j';
outreg2 using sexselection_regression_robust_otherschool_bandwidth, excel append nocons nor2 se keep( DT1 DT2) addstat(DT1-DT2, r(estimate), P-value, `pval') ctitle("Onlyfemale: Bandwidth==`Bandwidth'");
local replace; local j=`j'-2;
};
1
use "~/Documents/Data/China_Census1990/underprimaryschool_census1990_regressions.dta", clear;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
local j=0;
forval year=1941/1944 {;
xi: reg onlyfemale1 DT1 DT2 D1 D2 ${scale} i.occu i.empstat children if byear<`year'+18+`j' & byear>`year', ro;
lincom DT1-DT2; local tstat=r(estimate)/r(se); local pval=tprob(r(df), abs(`tstat'));local Bandwidth=17+`j';
outreg2 using sexselection_regression_robust_otherschool_bandwidth, excel append nocons nor2 se keep( DT1 DT2) addstat(DT1-DT2, r(estimate), P-value, `pval') ctitle("Onlyfemale: Bandwidth==`Bandwidth'");
local j=`j'-2;
};
#delimit cr
********************************* 6. Channels and other explanations: CHIPS1988/ ************************************
// test whether income, wealth, consumption change,
// see sexselection_chips_clean.do
#delimit ;
use "~/Dropbox/workslot/mystata/projects/education_quality/sexselection_chips1988.dta", clear;
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection;
keep if byear>=1945 & byear<=1955;
keep if gender==2;
local covariates = "rooms area pubh pork beef poutry fish totalwage1 totalwage2 wage";
local replace replace;
foreach var of varlist `covariates' {;
reg `var' DT1 DT2 D1 D2 byear, ro;
test DT1 DT2; local F1=r(p);
outreg2 using sexselection_regression_incomechannel, excel `replace' nocons nor2 se ctitle("`var'") addstat(F-test, `F1') keep( DT1 DT2);
local replace;
};
#delimit cr
****************************************************** 7 Graph results ****************************************************************
/* 0. 1st Stage: years of schooling */
use "~/Dropbox/workslot/mystata/projects/education_quality/sexselection_cfps_culs.dta", clear
keep if byear<1960 & byear>1940
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
preserve
collapse (mean) yhi if gender==0 [pweight=weight], by (byear)
twoway (`option' yhi byear) (lfit yhi byear if byear>1940 & byear<=1949) ///
(lfit yhi byear if byear>=1951 & byear<1960) (lfit yhi byear if byear>=1949 & byear<=1951), ///
xline(1949 1951) xlabel(1940 1949 1951 1960, labsize(large)) name(yhifemale) title(Female Sample, size(vlarge)) ///
xtitle(,size(vlarge)) ytitle("Years of Study in High School",size(vlarge)) scheme(s1mono) legend(off)
graph export sexselection_female_yhi.eps, replace
restore
preserve
collapse (mean) yhi [pweight=weight], by (byear)
twoway (`option' yhi byear) (lfit yhi byear if byear>1940 & byear<=1949) ///
(lfit yhi byear if byear>=1951 & byear<1960) (lfit yhi byear if byear>=1949 & byear<=1951), ///
xline(1949 1951) xlabel(1940 1949 1951 1960, labsize(large)) name(yhi) title(Full Sample, size(vlarge)) ///
xtitle(,size(vlarge)) ytitle("Years of Study in High School",size(vlarge)) scheme(s1mono) legend(off)
graph export sexselection_yhi.eps, replace
restore
preserve
collapse (mean) hnormal if gender==0 [pweight=weight], by (byear)
twoway (`option' hnormal byear) (lfit hnormal byear if byear>1940 & byear<=1949) ///
(lfit hnormal byear if byear>=1951 & byear<1960) (lfit hnormal byear if byear>=1949 & byear<=1951), ///
xline(1949 1951) xlabel(1940 1949 1951 1960, labsize(large)) name(hnormal_female) scheme(s1mono) ///
title(Female Sample, size(vlarge)) ytitle("Normal Years of Schooling",size(vlarge)) xtitle(,size(vlarge)) legend(off)
graph export sexselection_female_hnormal.eps, replace
restore
preserve
collapse (mean) hnormal [pweight=weight], by (byear)
twoway (`option' hnormal byear) (lfit hnormal byear if byear>1940 & byear<=1949) ///
(lfit hnormal byear if byear>=1951 & byear<1960) (lfit hnormal byear if byear>=1949 & byear<=1951), ///
xline(1949 1951) xlabel(1940 1949 1951 1960, labsize(large)) name(hnormal) scheme(s1mono) ///
title(Full Sample, size(vlarge)) ytitle("Normal Years of Schooling",size(vlarge)) xtitle(,size(vlarge)) legend(off)
graph export sexselection_hnormal.eps, replace
/* 1. Discontinuity Test: CFPS + CULS */
use "~/Dropbox/workslot/mystata/projects/education_quality/sexselection_cfps_culs.dta", clear
keep if byear<1960 & byear>1940
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
local covariates= "yhi height sib meduy feduy peduy upperclass sentdown"
replace sib=. if sib<0
preserve
collapse (mean) `covariates' if gender==0, by (byear)
la var yhi "Years of Study in High School"
la var height "Height"
la var sib "Number of Siblings"
la var meduy "Mother's Years of Schooling"
la var feduy "Father's Years of Schooling"
la var peduy "Max(fedy, meduy)"
la var upperclass "Upper Social Economic Status"
la var sentdown "Sent-down Experience"
foreach var of varlist `covariates' {
twoway (`option' `var' byear), xline(1949 1951) xlabel(1940 1949 1951 1960, labsize(large)) name(`var'female) scheme(s1mono) legend(off) title(Female Sample,size(vlarge)) xtitle(,size(vlarge)) ytitle(,size(vlarge))
graph export sexselection_female_continuitytest_`var'.eps, replace
}
restore
collapse (mean) `covariates' , by (byear)
la var yhi "Years of Study in High School"
la var height "Height"
la var sib "Number of Siblings"
la var meduy "Mother's Years of Schooling"
la var feduy "Father's Years of Schooling"
la var peduy "Max(fedy, meduy)"
la var upperclass "Upper Social Economic Status"
la var sentdown "Sent-down Experience"
foreach var of varlist `covariates' {
twoway `option' `var' byear, xline(1949 1951) xlabel(1940 1949 1951 1960, labsize(large)) name(`var') scheme(s1mono) legend(off) title(Full Sample,size(vlarge)) xtitle(,size(vlarge)) ytitle(,size(vlarge))
graph export sexselection_continuitytest_`var'.eps, replace
}
/* 2. Discontinuity Test: Census1990 */
use "~/Documents/Data/China_Census1990/highschool_census1990_regressions.dta", clear
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
collapse (mean) onechild chborn if rural==0 & educ==4, by (${scale}) // high school
la var abyear "Birth Year"
twoway (lfit onechild ${scale} if ${scale}>1945 & ${scale}<=1949) ///
(lfit onechild ${scale} if ${scale}>=1949 & ${scale}<=1951) ///
(lfit onechild ${scale} if ${scale}>=1951 & ${scale}<1955) ///
(scatter onechild ${scale} if ${scale}>1945 & ${scale}<1955), ///
xlabel(1945(2)1955) name(onechild) scheme(s1mono) xline(1949 1951) legend(off) ///
ytitle("Having only one child", size(vlarge)) xlabel(1945 1949 1951 1955, labsize(large)) xtitle(,size(vlarge))
graph export census1990_highschool_urban_onechild.eps, replace
use "~/Documents/Data/China_Census1990/highschool_census1990_regressions.dta", clear
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
collapse (mean) onechild chborn if rural==0 & educ==4, by (${scale}) // high school Number of Children
la var abyear "Birth Year"
twoway (scatter chborn ${scale} if ${scale}>1945 & ${scale}<1955), ///
xlabel(1945(2)1955) name(numberofchildren) scheme(s1mono) xline(1949 1951) legend(off) ///
ytitle("Number of Children", size(vlarge)) xlabel(1945 1949 1951 1955, labsize(large)) xtitle(,size(vlarge))
graph export census1990_highschool_urban_chborn.eps, replace
use "~/Documents/Data/China_Census1990/highschool_census1990_regressions.dta", clear
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
collapse (mean) married byear if rural==0 & educ==4, by (abyear) // high school married
la var abyear "Birth Year"
twoway (scatter married abyear if abyear>1945 & abyear<1955), ///
xlabel(1945(2)1955) name(married) scheme(s1mono) xline(1949 1951) legend(off) ///
ytitle("Married", size(vlarge)) xlabel(1945 1949 1951 1955, labsize(large)) xtitle(,size(vlarge))
graph export census1990_marriage_highschool_urban.eps, replace
use "~/Documents/Data/China_Census1990/highschool_census1990_regressions.dta", clear
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
collapse (mean) job byear if rural==0 & educ==4, by (abyear) // high school job
la var abyear "Birth Year"
twoway (scatter job abyear if abyear>1945 & abyear<1955), ///
xlabel(1945(2)1955) name(job) scheme(s1mono) xline(1949 1951) legend(off) ///
ytitle("Employed", size(vlarge)) xlabel(1945 1949 1951 1955, labsize(large)) xtitle(,size(vlarge))
graph export census1990_job_highschool_urban.eps, replace
xx
*/
/* 3. 2nd Stage and Other School Stages */
// high school
use "~/Documents/Data/China_Census1990/highschool_census1990_regressions.dta", clear
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
collapse (mean) onlyfemale1, by (${scale}) // high school
la var abyear "Birth Year"
twoway (lfit onlyfemale1 ${scale} if ${scale}>1945 & ${scale}<=1949) ///
(lfit onlyfemale1 ${scale} if ${scale}>=1949 & ${scale}<=1951) ///
(lfit onlyfemale1 ${scale} if ${scale}>=1951 & ${scale}<1955) ///
(scatter onlyfemale1 ${scale} if ${scale}>1945 & ${scale}<1955, msymbol(cmcircle) ), ///
xlabel(1945(2)1955) name(chborn) scheme(s1mono) xline(1949 1951) legend(off) ///
ytitle("Having only girls")
graph export census1990_highschool_urban.eps, replace
// middle school
use "~/Documents/Data/China_Census1990/middleschool_census1990_regressions.dta", clear
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
la var abyear "Birth Year"
collapse (mean) onlyfemale1, by (abyear) // middle school
twoway (scatter onlyfemale1 abyear if abyear>1945 & abyear<1955, msymbol(cmcircle) ), ///
xlabel(1945(2)1955) name(middleschool) scheme(s1mono) xline(1949 1951) legend(off) ///
ytitle("The only child is a girl")
graph export census1990_middleschool_urban.eps, replace
// under primary school
use "~/Documents/Data/China_Census1990/underprimaryschool_census1990_regressions.dta", clear
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
collapse (mean) onlyfemale1, by (abyear) // under primary school
la var abyear "Birth Year"
twoway (scatter onlyfemale1 abyear if abyear>1945 & abyear<1955, msymbol(cmcircle) ), ///
xlabel(1945(2)1955) name(underprimaryschool) scheme(s1mono) xline(1949 1951) legend(off) ///
ytitle("The only child is a girl")
graph export census1990_underprimaryschool_urban.eps, replace
/* 4. Alternative Measure: given firstgirl, Prob(Twochildren) */
use "~/Documents/Data/China_Census1990/tempfile1_highschool_census1990.dta", clear
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
collapse (mean) twochild if firstgirl==1, by (${scale}) // The sex of the first child (sex selection)
la var ${scale} "Birth Year"
twoway (lfit twochild ${scale} if ${scale}>1945 & ${scale}<=1949) ///
(lfit twochild ${scale} if ${scale}>=1949 & ${scale}<=1951) ///
(lfit twochild ${scale} if ${scale}>=1951 & ${scale}<1955) ///
(scatter twochild ${scale} if ${scale}>1945 & ${scale}<1955, msymbol(cmcircle) ), ///
xlabel(1945(2)1955) name(c1_sexratio) scheme(s1mono) xline(1949 1951) legend(off) ///
ytitle("Two Children")
graph export census1990_highschool_urban_twochildren.eps, replace
/* Alternative Measure: firstgirl */
use "~/Documents/Data/China_Census1990/tempfile1_highschool_census1990.dta", clear
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
collapse (mean) firstgirl c1_sex c2_sex if chborn>0, by (${scale}) // The sex of the first child (sex selection)
la var ${scale} "Birth Year"
gen c1_sexratio=1/(c1_sex-1)-1
twoway (lfit c1_sexratio ${scale} if ${scale}>1945 & ${scale}<=1949) ///
(lfit c1_sexratio ${scale} if ${scale}>=1949 & ${scale}<=1951) ///
(lfit c1_sexratio ${scale} if ${scale}>=1951 & ${scale}<1955) ///
(scatter c1_sexratio ${scale} if ${scale}>1945 & ${scale}<1955, msymbol(cmcircle) ), ///
xlabel(1945(2)1955) name(c1_sexratio) scheme(s1mono) xline(1949 1951) legend(off) ///
ytitle("Sex Ratio of the First child")
graph export census1990_highschool_urban_firstchildsex.eps, replace
/* 5. Mccrary Test */
use "~/Documents/Data/China_Census1990/highschool_census1990_regressions.dta", clear
cd ~/Dropbox/workslot/mylyx/Projects/education_sexselection
keep if abyear>1940 & abyear<1960
keep abyear
la var abyear "Birth Year"
hist abyear, xline(1949 1951, lpattern(dash) lwidth(.5)) xlabel(1940 1949 1951 1960, labsize(large)) scheme(s1mono) bin(80) name(hist) ytitle(Frequency, size(large)) xtitle(Birth Year, size(large))
graph export sexselection_census1990_mccrarytest_hist.eps, replace
local breakpoint 1949 // breakpoint:1949/1951
DCdensity abyear, breakpoint(`breakpoint') generate(Xj Yj r0 fhat se_fhat) h(2) b(0.25) nograph
local bandwidth=r(bandwidth)
local se_theta=round(`r(se)'*1000)/1000
local theta=round(`r(theta)'*1000)/1000
local t=`theta'/`se_theta'
* 1951 does not work, as there seems to be a sudden increase of people
egen hqw2=cut(abyear), at(1945(0.25)1951)
g compteur=1
collapse (sum) compteur, by(hqw2)
g t=hqw2>1949
g h2=hqw2^2
g h3=hqw2^3
g h4=hqw2^4
g h5=hqw2^5
g h6=hqw2^6
g interact1=hqw2*t
reg compteur hqw2 interact1 h2 h3 h4
local m_1d=round(_b[interact]*100)/100
local m_1d_se=round(_se[interact]*100)/100
xx
*/
local cellmpname Xj
local cellvalname Yj
local evalname r0
local cellsmname fhat
local cellsmsename se_fhat
tempvar hi
quietly gen `hi' = `cellsmname' + 1.96*`cellsmsename'
tempvar lo
quietly gen `lo' = `cellsmname' - 1.96*`cellsmsename'
la var `cellmpname' "Birth Year"
la var `evalname' "Birth Year"
gr twoway (scatter `cellvalname' `cellmpname' if `cellmpname'<=1960 & `cellmpname'>1940, msymbol(cmcircle) msize(medlarge)) ///
(line `cellsmname' `evalname' if `evalname' < `breakpoint', lcolor(black) lwidth(medthick)) ///
(line `cellsmname' `evalname' if `evalname' > `breakpoint', lcolor(black) lwidth(medthick)) ///
(line `hi' `evalname' if `evalname' < `breakpoint', lcolor(black) lwidth(vthin)) ///
(line `lo' `evalname' if `evalname' < `breakpoint', lcolor(black) lwidth(vthin)) ///
(line `hi' `evalname' if `evalname' > `breakpoint', lcolor(black) lwidth(vthin)) ///
(line `lo' `evalname' if `evalname' > `breakpoint', lcolor(black) lwidth(vthin)), ///
xline(`breakpoint', lpattern(dash) lwidth(.5)) xlabel(1940 `breakpoint' 1960,labsize(vlarge)) legend(off) scheme(s1mono) name(density) ///
ttext( 0.15 1945 "McCrary Tests:" "Discontinuity est.= `theta'(`se_theta')", just(left) size(medsmall)) ///
xtitle(Birth Year, size(vlarge)) ytitle(Density Function, size(vlarge))
graph export sexselection_census1990_mccrarytest_density_`breakpoint'.eps, replace
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment