Intro to Panel Data

****************************************
** Panel data estimates in Stata
****************************************

** FE (within) estimator
ssc install bcuse
bcuse wagepan
xtset nr year
xtreg lwage exper expersq, fe


** Manually demeaning the data
foreach x of varlist lwage exper expersq {
  egen mean_`x'=mean(`x')
  egen demean_`x'=`x'-mean_`x'
}
reg demean_lwage demean_exper demean_expersq


** First differencing
** note: the "d." operator only works with identical time gaps (1 in this case)
reg d.lwage d.exper d.expersq, noconstant

Basics of DD

****************************************
** Differences-in-Differences in Stata
****************************************
insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear


** Looking at the data
gen perc_unins=uninsured/adult_pop
keep if expand_year=="2014" | expand_year=="NA"
drop if expand_ever=="NA"
collapse (mean) perc_unins, by(year expand_ever)
graph twoway (connected perc_unins year if expand_ever=="FALSE", color(black) lpattern(solid)) ///
  (connected perc_unins year if expand_ever=="TRUE", color(black) lpattern(dash)), ///
  xline(2013.5) ///
    ytitle("Fraction Uninsured") xtitle("Year") legend(off) text(0.15 2017 "Non-expansion", place(e)) text(0.08 2017 "Expansion", place(e))


** Simple 2x2 DD
gen post=(year>=2014)
gen treat=(expand_ever=="TRUE")
gen treat_post=(expand=="TRUE")

reg perc_unins treat post treat_post

** note - Stata 17 has didregress that will do a lot of this automatically

TWFE and Event Studies

****************************************
** Event Studies in Stata
****************************************
ssc install reghdfe

** Common treatment timing
insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
keep if expand_year=="2014" | expand_year=="NA"
drop if expand_ever=="NA"
gen post=(year>=2014)
gen treat=(expand_ever=="TRUE")
gen treat_post=(expand=="TRUE")

reghdfe perc_unins treat##ib2013.year, absorb(state)
gen coef = .
gen se = .
forvalues i = 2012(1)2018 {
    replace coef = _b[1.treat#`i'.year] if year == `i'
    replace se = _se[1.treat#`i'.year] if year == `i'
}

* Make confidence intervals
gen ci_top = coef+1.96*se
gen ci_bottom = coef - 1.96*se

* Limit ourselves to one observation per year
keep year coef se ci_*
duplicates drop

* Create connected scatterplot of coefficients
* with CIs included with rcap 
* and a line at 0 from function
twoway (sc coef year, connect(line)) (rcap ci_top ci_bottom year) ///
    (function y = 0, range(2012 2018)), xtitle("Year") ///
    caption("Estimates and 95% CI from Event Study")



** Differential treatment timing
insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
drop if expand_ever=="NA"
replace expand_year="." if expand_year=="NA"
destring expand_year, replace
gen event_time=year-expand_year
replace event_time=-1 if event_time==.

forvalues l = 0/4 {
    gen L`l'event = (event_time==`l')
}
forvalues l = 1/2 {
    gen F`l'event = (event_time==-`l')
}
gen F3event=(event_time<=-3)

reghdfe perc_unins F3event F2event L0event L1event L2event L3event L4event, absorb(state year) cluster(state)
gen coef = .
gen se = .
forvalues i = 2(1)3 {
    replace coef = _b[F`i'event] if F`i'event==1
    replace se = _se[F`i'event] if F`i'event==1
}
forvalues i = 0(1)4 {
    replace coef = _b[L`i'event] if L`i'event==1
    replace se = _se[L`i'event] if L`i'event==1
}
replace coef = 0 if F1event==1
replace se=0 if F1event==1

* Make confidence intervals
gen ci_top = coef+1.96*se
gen ci_bottom = coef - 1.96*se

* Limit ourselves to one observation per year
keep if event_time>=-3 & event_time<=4
keep event_time coef se ci_*
duplicates drop

* Create connected scatterplot of coefficients
* with CIs included with rcap 
* and a line at 0 from function
sort event_time
twoway (sc coef event_time, connect(line)) (rcap ci_top ci_bottom event_time) ///
    (function y = 0, range(-3 4)), xtitle("Time") ///
    caption("Estimates and 95% CI from Event Study") xlabel(-3(1)4)

Recent DD Advancements

****************************************
** Recent DD Estimators in Stata
****************************************
ssc install event_plot


** Callaway and Sant'Anna
ssc install csdid
ssc install drdid

insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
egen stategroup=group(state)
drop if expand_ever=="NA"
replace expand_year="0" if expand_year=="NA"
destring expand_year, replace

csdid perc_unins, ivar(stategroup) time(year) gvar(expand_year) notyet
estat event, estore(cs)
event_plot cs, default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") xlabel(-6(1)4) title("Callaway and Sant'Anna (2020)")) stub_lag(T+#) stub_lead(T-#) together



** Callaway and Sant'Anna
ssc install csdid
ssc install drdid

insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
egen stategroup=group(state)
drop if expand_ever=="NA"
replace expand_year="0" if expand_year=="NA"
destring expand_year, replace

csdid perc_unins, ivar(stategroup) time(year) gvar(expand_year) notyet
estat event, estore(cs)
event_plot cs, default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") xlabel(-6(1)4) title("Callaway and Sant'Anna (2020)")) stub_lag(T+#) stub_lead(T-#) together



** Sun and Abraham
ssc install eventstudyinteract
ssc install avar

insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
drop if expand_ever=="NA"
egen stategroup=group(state)
replace expand_year="." if expand_year=="NA"
destring expand_year, replace
gen event_time=year-expand_year
gen nevertreated=(event_time==.)

forvalues l = 0/4 {
    gen L`l'event = (event_time==`l')
}
forvalues l = 1/2 {
    gen F`l'event = (event_time==-`l')
}
gen F3event=(event_time<=-3)
eventstudyinteract perc_unins F3event F2event L0event L1event L2event L3event L4event, vce(cluster stategroup) absorb(stategroup year) cohort(expand_year) control_cohort(nevertreated)

event_plot e(b_iw)#e(V_iw), default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") xlabel(-3(1)4) title("Sun and Abraham (2020)")) stub_lag(L#event) stub_lead(F#event) plottype(scatter) ciplottype(rcap) together



** de Chaisemartin and D'Haultfoeuille
ssc install did_multiplegt

insheet using "https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt", clear
gen perc_unins=uninsured/adult_pop
drop if expand_ever=="NA"
egen stategroup=group(state)
replace expand_year="." if expand_year=="NA"
destring expand_year, replace
gen event_time=year-expand_year
gen nevertreated=(event_time==.)
gen treat=(event_time>=0 & event_time!=.)

did_multiplegt perc_unins stategroup year treat, robust_dynamic dynamic(4) placebo(3) breps(100) cluster(stategroup) 
event_plot e(estimates)#e(variances), default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") ///
title("de Chaisemartin and D'Haultfoeuille (2020)") xlabel(-3(1)4)) stub_lag(Effect_#) stub_lead(Placebo_#) together