Intro to Panel Data
****************************************
** Panel data estimates in Stata
****************************************
** FE (within) estimator
ssc install bcuse
bcuse wagepan
xtset nr year
xtreg lwage exper expersq, fe
** Manually demeaning the data
foreach x of varlist lwage exper expersq {
egen mean_`x'=mean(`x')
egen demean_`x'=`x'-mean_`x'
}
reg demean_lwage demean_exper demean_expersq
** First differencing
** note: the "d." operator only works with identical time gaps (1 in this case)
reg d.lwage d.exper d.expersq, noconstant
Basics of DD
****************************************
** Differences-in-Differences in Stata
****************************************
insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
** Looking at the data
gen perc_unins=uninsured/adult_pop
keep if expand_year=="2014" | expand_year=="NA"
drop if expand_ever=="NA"
collapse (mean) perc_unins, by(year expand_ever)
graph twoway (connected perc_unins year if expand_ever=="FALSE", color(black) lpattern(solid)) ///
(connected perc_unins year if expand_ever=="TRUE", color(black) lpattern(dash)), ///
xline(2013.5) ///
ytitle("Fraction Uninsured") xtitle("Year") legend(off) text(0.15 2017 "Non-expansion", place(e)) text(0.08 2017 "Expansion", place(e))
** Simple 2x2 DD
gen post=(year>=2014)
gen treat=(expand_ever=="TRUE")
gen treat_post=(expand=="TRUE")
reg perc_unins treat post treat_post
** note - Stata 17 has didregress that will do a lot of this automatically
TWFE and Event Studies
****************************************
** Event Studies in Stata
****************************************
ssc install reghdfe
** Common treatment timing
insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
keep if expand_year=="2014" | expand_year=="NA"
drop if expand_ever=="NA"
gen post=(year>=2014)
gen treat=(expand_ever=="TRUE")
gen treat_post=(expand=="TRUE")
reghdfe perc_unins treat##ib2013.year, absorb(state)
gen coef = .
gen se = .
forvalues i = 2012(1)2018 {
replace coef = _b[1.treat#`i'.year] if year == `i'
replace se = _se[1.treat#`i'.year] if year == `i'
}
* Make confidence intervals
gen ci_top = coef+1.96*se
gen ci_bottom = coef - 1.96*se
* Limit ourselves to one observation per year
keep year coef se ci_*
duplicates drop
* Create connected scatterplot of coefficients
* with CIs included with rcap
* and a line at 0 from function
twoway (sc coef year, connect(line)) (rcap ci_top ci_bottom year) ///
(function y = 0, range(2012 2018)), xtitle("Year") ///
caption("Estimates and 95% CI from Event Study")
** Differential treatment timing
insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
drop if expand_ever=="NA"
replace expand_year="." if expand_year=="NA"
destring expand_year, replace
gen event_time=year-expand_year
replace event_time=-1 if event_time==.
forvalues l = 0/4 {
gen L`l'event = (event_time==`l')
}
forvalues l = 1/2 {
gen F`l'event = (event_time==-`l')
}
gen F3event=(event_time<=-3)
reghdfe perc_unins F3event F2event L0event L1event L2event L3event L4event, absorb(state year) cluster(state)
gen coef = .
gen se = .
forvalues i = 2(1)3 {
replace coef = _b[F`i'event] if F`i'event==1
replace se = _se[F`i'event] if F`i'event==1
}
forvalues i = 0(1)4 {
replace coef = _b[L`i'event] if L`i'event==1
replace se = _se[L`i'event] if L`i'event==1
}
replace coef = 0 if F1event==1
replace se=0 if F1event==1
* Make confidence intervals
gen ci_top = coef+1.96*se
gen ci_bottom = coef - 1.96*se
* Limit ourselves to one observation per year
keep if event_time>=-3 & event_time<=4
keep event_time coef se ci_*
duplicates drop
* Create connected scatterplot of coefficients
* with CIs included with rcap
* and a line at 0 from function
sort event_time
twoway (sc coef event_time, connect(line)) (rcap ci_top ci_bottom event_time) ///
(function y = 0, range(-3 4)), xtitle("Time") ///
caption("Estimates and 95% CI from Event Study") xlabel(-3(1)4)
Recent DD Advancements
****************************************
** Recent DD Estimators in Stata
****************************************
ssc install event_plot
** Callaway and Sant'Anna
ssc install csdid
ssc install drdid
insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
egen stategroup=group(state)
drop if expand_ever=="NA"
replace expand_year="0" if expand_year=="NA"
destring expand_year, replace
csdid perc_unins, ivar(stategroup) time(year) gvar(expand_year) notyet
estat event, estore(cs)
event_plot cs, default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") xlabel(-6(1)4) title("Callaway and Sant'Anna (2020)")) stub_lag(T+#) stub_lead(T-#) together
** Callaway and Sant'Anna
ssc install csdid
ssc install drdid
insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
egen stategroup=group(state)
drop if expand_ever=="NA"
replace expand_year="0" if expand_year=="NA"
destring expand_year, replace
csdid perc_unins, ivar(stategroup) time(year) gvar(expand_year) notyet
estat event, estore(cs)
event_plot cs, default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") xlabel(-6(1)4) title("Callaway and Sant'Anna (2020)")) stub_lag(T+#) stub_lead(T-#) together
** Sun and Abraham
ssc install eventstudyinteract
ssc install avar
insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
drop if expand_ever=="NA"
egen stategroup=group(state)
replace expand_year="." if expand_year=="NA"
destring expand_year, replace
gen event_time=year-expand_year
gen nevertreated=(event_time==.)
forvalues l = 0/4 {
gen L`l'event = (event_time==`l')
}
forvalues l = 1/2 {
gen F`l'event = (event_time==-`l')
}
gen F3event=(event_time<=-3)
eventstudyinteract perc_unins F3event F2event L0event L1event L2event L3event L4event, vce(cluster stategroup) absorb(stategroup year) cohort(expand_year) control_cohort(nevertreated)
event_plot e(b_iw)#e(V_iw), default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") xlabel(-3(1)4) title("Sun and Abraham (2020)")) stub_lag(L#event) stub_lead(F#event) plottype(scatter) ciplottype(rcap) together
** de Chaisemartin and D'Haultfoeuille
ssc install did_multiplegt
insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
drop if expand_ever=="NA"
egen stategroup=group(state)
replace expand_year="." if expand_year=="NA"
destring expand_year, replace
gen event_time=year-expand_year
gen nevertreated=(event_time==.)
gen treat=(event_time>=0 & event_time!=.)
did_multiplegt perc_unins stategroup year treat, robust_dynamic dynamic(4) placebo(3) breps(100) cluster(stategroup)
event_plot e(estimates)#e(variances), default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") ///
title("de Chaisemartin and D'Haultfoeuille (2020)") xlabel(-3(1)4)) stub_lag(Effect_#) stub_lead(Placebo_#) together