Aaronmoralesshildrick/SS154 HW 2

## SS154 HW 2
/* Q1 */

/* Data cleaning */

egen P = anymatch(personid), values (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) /*gives a 1 value to all people from 1 to 15 */
keep if P /* leave the 15 perople only */
drop P

gen P = 0
bysort personid (personid): replace P= 1 if _n == 1 /*assigns a one for the
first observation of every person */
keep if P == 1 /* just grabs the first one */
drop P

/* generate the lists for x1 and x2*/
global x1 educ potexper ability
global x2 mothered fathered siblings
global y logwage

/*a */
regress $y $x1
*b*
regress $y $x1 $x2


/*c*/
regress $y $x1 $x2
regress $y $x1 $x2, noconstant
/*Get R^2 by doing 1 - (SSE/SST)*/

/*d*/
regress $y $x1 $x2,
regress $y $x1 $x2, noconstant
/* Get adjusted by doing 1 - (SSE/(N-K)/SST/(N-1))*/


*e*
/*
MR 1 - Variables explain y
MR 2 - assumption 2 leads to believe errors are distributed arround 0

*/

/*MR 5*/
corr $y $x1 /*get the correlations between variables, comment on high values */
corr $y $x1 $x2

/*MR 6, Make use of histograms of the residuals with a fitted normal distribution to see the fit*/
regress $y $x1
predict r, resid
hist r, freq normal

regress $y $x1 $x2
predict r2, resid
hist r2, freq normal

/* Q2 */

gen quant_pc = gasexp/(gasp*pop)


/*a*/
regress quant_pc income puc ps ppt pnc pn pd gasp year


/* b
test of hypothesis*/
regress gasexp pnc puc
test _b[pnc] == _b[puc] /* Fail to reject null --> not stat sig diff from each other*/


/*c*/
quietly regress quant_pc income puc ps ppt pnc pn pd gasp year
margins, eyex(gasp) at(year==2004)
margins, eyex(income) at(year==2004)
margins, eyex(ppt) at(year==2004)


/*d*/
gen lnquant_pc = ln(quant_pc)
gen lnincome = ln(income)
gen lnpuc = ln(puc)
gen lnps =ln(ps)
gen lnppt =ln(ppt)
gen lnpnc =ln(pnc)
gen lnpn =ln(pn)
gen lnpd =ln(pd)
gen lngasp=ln(gasp)

regress lnquant_pc lnincome lnpuc lnps lnppt lnpnc lnpn lnpd lngasp year
/*compare values of margins to the log log model*/

/*e*/
corr puc ps ppt pnc pn pd gasp
corr lnpuc lnps lnppt lnpnc lnpn lnpd lngasp

/*f*/
/*Normalize into 2004 */
gen ngasp = gasp * 100/gasp[52]
gen npnc = pnc * 100/pnc[52]
gen npuc = puc * 100/puc[52]
gen nppt = ppt * 100/ppt[52]
gen npd = pd * 100/pd[52]
gen npn = pn * 100/pn[52]
gen nps = ps * 100/ps[52]

regress quant_pc ngasp npnc npuc nppt npd npn nps income year

/*Do the same for the logs */
gen lnngasp = ln(ngasp)
gen lnnpnc = ln(npnc)
gen lnnpuc = ln(npuc)
gen lnnppt = ln(nppt)
gen lnnpd = ln(npd)
gen lnnpn = ln(npn)
gen lnnps = ln(nps)

regress lnquant_pc lnngasp lnnpnc lnnpuc lnnppt lnnpd lnnpn lnnps lnincome year
	/* Q1 */

	/* Data cleaning */

	egen P = anymatch(personid), values (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) /gives a 1 value to all people from 1 to 15 /
	keep if P /* leave the 15 perople only */
	drop P

	gen P = 0
	bysort personid (personid): replace P= 1 if _n == 1 /*assigns a one for the
	first observation of every person */
	keep if P == 1 /* just grabs the first one */
	drop P

	/* generate the lists for x1 and x2*/
	global x1 educ potexper ability
	global x2 mothered fathered siblings
	global y logwage

	/a /
	regress $y $x1
	b
	regress $y $x1 $x2


	/c/
	regress $y $x1 $x2
	regress $y $x1 $x2, noconstant
	/Get R^2 by doing 1 - (SSE/SST)/

	/d/
	regress $y $x1 $x2,
	regress $y $x1 $x2, noconstant
	/* Get adjusted by doing 1 - (SSE/(N-K)/SST/(N-1))*/


	e
	/*
	MR 1 - Variables explain y
	MR 2 - assumption 2 leads to believe errors are distributed arround 0

	*/

	/MR 5/
	corr $y $x1 /get the correlations between variables, comment on high values /
	corr $y $x1 $x2

	/MR 6, Make use of histograms of the residuals with a fitted normal distribution to see the fit/
	regress $y $x1
	predict r, resid
	hist r, freq normal

	regress $y $x1 $x2
	predict r2, resid
	hist r2, freq normal

	/* Q2 */

	gen quant_pc = gasexp/(gasp*pop)


	/a/
	regress quant_pc income puc ps ppt pnc pn pd gasp year


	/* b
	test of hypothesis*/
	regress gasexp pnc puc
	test _b[pnc] == _b[puc] /* Fail to reject null --> not stat sig diff from each other*/


	/c/
	quietly regress quant_pc income puc ps ppt pnc pn pd gasp year
	margins, eyex(gasp) at(year==2004)
	margins, eyex(income) at(year==2004)
	margins, eyex(ppt) at(year==2004)


	/d/
	gen lnquant_pc = ln(quant_pc)
	gen lnincome = ln(income)
	gen lnpuc = ln(puc)
	gen lnps =ln(ps)
	gen lnppt =ln(ppt)
	gen lnpnc =ln(pnc)
	gen lnpn =ln(pn)
	gen lnpd =ln(pd)
	gen lngasp=ln(gasp)

	regress lnquant_pc lnincome lnpuc lnps lnppt lnpnc lnpn lnpd lngasp year
	/compare values of margins to the log log model/

	/e/
	corr puc ps ppt pnc pn pd gasp
	corr lnpuc lnps lnppt lnpnc lnpn lnpd lngasp

	/f/
	/Normalize into 2004 /
	gen ngasp = gasp * 100/gasp[52]
	gen npnc = pnc * 100/pnc[52]
	gen npuc = puc * 100/puc[52]
	gen nppt = ppt * 100/ppt[52]
	gen npd = pd * 100/pd[52]
	gen npn = pn * 100/pn[52]
	gen nps = ps * 100/ps[52]

	regress quant_pc ngasp npnc npuc nppt npd npn nps income year

	/Do the same for the logs /
	gen lnngasp = ln(ngasp)
	gen lnnpnc = ln(npnc)
	gen lnnpuc = ln(npuc)
	gen lnnppt = ln(nppt)
	gen lnnpd = ln(npd)
	gen lnnpn = ln(npn)
	gen lnnps = ln(nps)

	regress lnquant_pc lnngasp lnnpnc lnnpuc lnnppt lnnpd lnnpn lnnps lnincome year