D = 0
, or after treatment and D = 1
. If we control for time, we’re effectively controlling for treatment#Create our data
diddata <- tibble(year = sample(2002:2010,10000,replace=T)) %>%
mutate(D = year >= 2007) %>% mutate(Y = 2*D + .5*year + rnorm(10000))
#Now, control for year
diddata <- diddata %>% group_by(year) %>% mutate(D.r = D - mean(D), Y.r = Y - mean(Y))
#What's the difference with and without treatment?
diddata %>% group_by(D) %>% summarize(Y=mean(Y))
## # A tibble: 2 x 2
## D Y
## <lgl> <dbl>
## 1 FALSE 1002.
## 2 TRUE 1006.
#And controlling for time?
diddata %>% group_by(D.r) %>% summarize(Y=mean(Y.r))
## # A tibble: 1 x 2
## D.r Y
## <dbl> <dbl>
## 1 0 1.84e-15
#Create our data
diddata <- tibble(year = sample(2002:2010,10000,replace=T),
group = sample(c('TreatedGroup','UntreatedGroup'),10000,replace=T)) %>%
mutate(after = (year >= 2007)) %>%
#Only let the treatment be applied to the treated group
mutate(D = after*(group=='TreatedGroup')) %>%
mutate(Y = 2*D + .5*year + rnorm(10000))
#Now, get before-after differences for both groups
means <- diddata %>% group_by(group,after) %>% summarize(Y=mean(Y))
#Before-after difference for untreated, has time effect only
bef.aft.untreated <- filter(means,group=='UntreatedGroup',after==1)$Y - filter(means,group=='UntreatedGroup',after==0)$Y
#Before-after for treated, has time and treatment effect
bef.aft.treated <- filter(means,group