Intro to Panel Data
########################################
## Panel data estimates in R
########################################
## FE (within) estimator
library(readstata13)
library(fixest)
library(tidyverse)
wagepan <- read.dta13("http://fmwww.bc.edu/ec-p/data/wooldridge/wagepan.dta")
feols(lwage~exper + expersq | nr, data=wagepan)
## Manually demeaning the data
wagepan <- wagepan %>%
group_by(nr) %>%
mutate(demean_lwage=lwage - mean(lwage),
demean_exper=exper - mean(exper),
demean_expersq=expersq - mean(expersq))
summary(lm(demean_lwage~demean_exper + demean_expersq, data=wagepan))
## First differencing
wagepan <- wagepan %>%
group_by(nr) %>%
arrange(year) %>%
mutate(fd_lwage=lwage - lag(lwage),
fd_exper=exper - lag(exper),
fd_expersq=expersq - lag(expersq)) %>%
na.omit()
summary(lm(fd_lwage~0 + fd_exper + fd_expersq, data=wagepan))
Basics of DD
########################################
## Difference-in-Differences in R
########################################
library(tidyverse)
library(modelsummary)
mcaid.data <- read_tsv("https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt")
## Looking at the data
ins.plot.dat <- mcaid.data %>% filter(expand_year==2014 | is.na(expand_year), !is.na(expand_ever)) %>%
mutate(perc_unins=uninsured/adult_pop) %>%
group_by(expand_ever, year) %>% summarize(mean=mean(perc_unins))
ins.plot <- ggplot(data=ins.plot.dat, aes(x=year,y=mean,group=expand_ever,linetype=expand_ever)) +
geom_line() + geom_point() + theme_bw() +
geom_vline(xintercept=2013.5, color="red") +
geom_text(data = ins.plot.dat %>% filter(year == 2016),
aes(label = c("Non-expansion","Expansion"),
x = year + 1,
y = mean)) +
guides(linetype=FALSE) +
labs(
x="Year",
y="Fraction Uninsured",
title="Share of Uninsured over Time"
)
## Simple 2x2 DD
reg.dat <- mcaid.data %>% filter(expand_year==2014 | is.na(expand_year), !is.na(expand_ever)) %>%
mutate(perc_unins=uninsured/adult_pop,
post = (year>=2014),
treat=post*expand_ever)
dd.ins.reg <- lm(perc_unins ~ post + expand_ever + post*expand_ever, data=reg.dat)
msummary(dd.ins.reg)
TWFE and Event Studies
########################################
## Event Studies in R
########################################
library(tidyverse)
library(modelsummary)
library(data.table)
library(fixest)
mcaid.data <- read_tsv("https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt")
## Common treatment timing
reg.dat <- as.data.table(mcaid.data) %>%
filter(expand_year==2014 | is.na(expand_year), !is.na(expand_ever)) %>%
mutate(perc_unins=uninsured/adult_pop,
post = (year>=2014),
treat=post*expand_ever)
mod.twfe <- feols(perc_unins~i(year, expand_ever, ref=2013) | State + year,
cluster=~State,
data=reg.dat)
iplot(mod.twfe,
xlab = 'Time to treatment',
main = 'Event study')
## Differential treatment timing
reg.dat <- as.data.table(mcaid.data) %>%
filter(!is.na(expand_ever)) %>%
mutate(perc_unins=uninsured/adult_pop)
reg.dat[, time_to_treat := ifelse(expand_ever==TRUE, year-expand_year, -1)]
reg.dat <- reg.dat %>%
mutate(time_to_treat=ifelse(time_to_treat<=-3,-3,time_to_treat))
mod.twfe <- feols(perc_unins~i(time_to_treat, expand_ever, ref=-1) | State + year,
cluster=~State,
data=reg.dat)
iplot(mod.twfe,
xlab = 'Time to treatment',
main = 'Event study')
Recent DD Advancements
########################################
## Recent DD Estimators in R
########################################
library(tidyverse)
library(modelsummary)
library(data.table)
library(fixest)
mcaid.data <- read_tsv("https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt")
## Callaway and Sant'Anna
library(did)
library(DRDID)
reg.dat <- mcaid.data %>%
filter(!is.na(expand_ever)) %>%
mutate(perc_unins=uninsured/adult_pop,
post = (year>=2014),
treat=post*expand_ever,
expand_year=ifelse(is.na(expand_year),0,expand_year)) %>%
filter(!is.na(perc_unins)) %>%
group_by(State) %>%
mutate(stategroup=cur_group_id()) %>% ungroup()
mod.cs <- att_gt(yname="perc_unins", tname="year", idname="stategroup",
gname="expand_year",
data=reg.dat, panel=TRUE, est_method="dr",
allow_unbalanced_panel=TRUE)
mod.cs.event <- aggte(mod.cs, type="dynamic")
ggdid(mod.cs.event)
## Sun and Abraham
reg.dat <- mcaid.data %>%
filter(!is.na(expand_ever)) %>%
mutate(perc_unins=uninsured/adult_pop,
post = (year>=2014),
treat=post*expand_ever,
expand_year = ifelse(expand_ever==FALSE, 10000, expand_year),
time_to_treat = ifelse(expand_ever==FALSE, -1, year-expand_year),
time_to_treat = ifelse(time_to_treat < -3, -3, time_to_treat))
mod.sa <- feols(perc_unins~sunab(expand_year, time_to_treat) | State + year,
cluster=~State,
data=reg.dat)
iplot(mod.sa,
xlab = 'Time to treatment',
main = 'Event study')
## de Chaisemartin and D'Haultfoeuille
library(DIDmultiplegt)
reg.dat <- mcaid.data %>%
filter(!is.na(expand_ever)) %>%
mutate(perc_unins=uninsured/adult_pop,
treat=case_when(
expand_ever==FALSE ~ 0,
expand_ever==TRUE & expand_year<year ~ 0,
expand_ever==TRUE & expand_year>=year ~ 1))
mod.ch <- did_multiplegt(df=reg.dat, Y="perc_unins", G="State", T="year", D="treat",
placebo=3, dynamic=4, brep=50, cluster="State")
mod.ch