Intro to Panel Data

########################################
## Panel data estimates in R
########################################
  
## FE (within) estimator
library(readstata13)
library(fixest)
library(tidyverse)
wagepan <- read.dta13("http://fmwww.bc.edu/ec-p/data/wooldridge/wagepan.dta")
feols(lwage~exper + expersq | nr, data=wagepan)


## Manually demeaning the data
wagepan <- wagepan %>%
  group_by(nr) %>%
  mutate(demean_lwage=lwage - mean(lwage),
         demean_exper=exper - mean(exper),
         demean_expersq=expersq - mean(expersq))
summary(lm(demean_lwage~demean_exper + demean_expersq, data=wagepan))


## First differencing
wagepan <- wagepan %>%
  group_by(nr) %>%
  arrange(year) %>%
  mutate(fd_lwage=lwage - lag(lwage),
         fd_exper=exper - lag(exper),
         fd_expersq=expersq - lag(expersq)) %>%
  na.omit()
summary(lm(fd_lwage~0 + fd_exper + fd_expersq, data=wagepan))

Basics of DD

########################################
## Difference-in-Differences in R
########################################
library(tidyverse)  
library(modelsummary)
mcaid.data <- read_tsv("https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt")

## Looking at the data
ins.plot.dat <- mcaid.data %>% filter(expand_year==2014 | is.na(expand_year), !is.na(expand_ever)) %>%
  mutate(perc_unins=uninsured/adult_pop) %>%
  group_by(expand_ever, year) %>% summarize(mean=mean(perc_unins))

ins.plot <- ggplot(data=ins.plot.dat, aes(x=year,y=mean,group=expand_ever,linetype=expand_ever)) + 
  geom_line() + geom_point() + theme_bw() +
  geom_vline(xintercept=2013.5, color="red") +
  geom_text(data = ins.plot.dat %>% filter(year == 2016), 
            aes(label = c("Non-expansion","Expansion"),
                x = year + 1,
                y = mean)) +
  guides(linetype=FALSE) +
  labs(
    x="Year",
    y="Fraction Uninsured",
    title="Share of Uninsured over Time"
  )


## Simple 2x2 DD
reg.dat <- mcaid.data %>% filter(expand_year==2014 | is.na(expand_year), !is.na(expand_ever)) %>%
  mutate(perc_unins=uninsured/adult_pop,
         post = (year>=2014), 
         treat=post*expand_ever)

dd.ins.reg <- lm(perc_unins ~ post + expand_ever + post*expand_ever, data=reg.dat)
msummary(dd.ins.reg)

TWFE and Event Studies

########################################
## Event Studies in R
########################################
library(tidyverse)
library(modelsummary)
library(data.table)
library(fixest)
mcaid.data <- read_tsv("https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt")

## Common treatment timing
reg.dat <- as.data.table(mcaid.data) %>% 
  filter(expand_year==2014 | is.na(expand_year), !is.na(expand_ever)) %>%
  mutate(perc_unins=uninsured/adult_pop,
         post = (year>=2014), 
         treat=post*expand_ever)

mod.twfe <- feols(perc_unins~i(year, expand_ever, ref=2013) | State + year,
                  cluster=~State,
                  data=reg.dat)
iplot(mod.twfe, 
      xlab = 'Time to treatment',
      main = 'Event study')


## Differential treatment timing
reg.dat <- as.data.table(mcaid.data) %>% 
  filter(!is.na(expand_ever)) %>%
  mutate(perc_unins=uninsured/adult_pop)
reg.dat[, time_to_treat := ifelse(expand_ever==TRUE, year-expand_year, -1)]

reg.dat <- reg.dat %>%
  mutate(time_to_treat=ifelse(time_to_treat<=-3,-3,time_to_treat))

mod.twfe <- feols(perc_unins~i(time_to_treat, expand_ever, ref=-1) | State + year,
                  cluster=~State,
                  data=reg.dat)
iplot(mod.twfe, 
      xlab = 'Time to treatment',
      main = 'Event study')

Recent DD Advancements

########################################
## Recent DD Estimators in R
########################################
library(tidyverse)
library(modelsummary)
library(data.table)
library(fixest)
mcaid.data <- read_tsv("https://raw.githubusercontent.com/imccart/empirical-methods/main/data/medicaid-expansion/mcaid-expand-data.txt")


## Callaway and Sant'Anna
library(did)
library(DRDID)

reg.dat <- mcaid.data %>% 
  filter(!is.na(expand_ever)) %>%
  mutate(perc_unins=uninsured/adult_pop,
         post = (year>=2014), 
         treat=post*expand_ever,
         expand_year=ifelse(is.na(expand_year),0,expand_year)) %>%
  filter(!is.na(perc_unins)) %>%
  group_by(State) %>%
  mutate(stategroup=cur_group_id()) %>% ungroup()

mod.cs <- att_gt(yname="perc_unins", tname="year", idname="stategroup",
                 gname="expand_year",
                 data=reg.dat, panel=TRUE, est_method="dr",
                 allow_unbalanced_panel=TRUE)
mod.cs.event <- aggte(mod.cs, type="dynamic")
ggdid(mod.cs.event)


## Sun and Abraham
reg.dat <- mcaid.data %>% 
  filter(!is.na(expand_ever)) %>%
  mutate(perc_unins=uninsured/adult_pop,
         post = (year>=2014), 
         treat=post*expand_ever,
         expand_year = ifelse(expand_ever==FALSE, 10000, expand_year),
         time_to_treat = ifelse(expand_ever==FALSE, -1, year-expand_year),
         time_to_treat = ifelse(time_to_treat < -3, -3, time_to_treat))

mod.sa <- feols(perc_unins~sunab(expand_year, time_to_treat) | State + year,
                cluster=~State,
                data=reg.dat)
iplot(mod.sa,
      xlab = 'Time to treatment',
      main = 'Event study')


## de Chaisemartin and D'Haultfoeuille
library(DIDmultiplegt)
reg.dat <- mcaid.data %>% 
  filter(!is.na(expand_ever)) %>%
  mutate(perc_unins=uninsured/adult_pop,
         treat=case_when(
           expand_ever==FALSE ~ 0,
           expand_ever==TRUE & expand_year<year ~ 0,
           expand_ever==TRUE & expand_year>=year ~ 1))

mod.ch <- did_multiplegt(df=reg.dat, Y="perc_unins", G="State", T="year", D="treat",
                         placebo=3, dynamic=4, brep=50, cluster="State")
mod.ch