D = 0
, or after treatment and D = 1
. If we control for time, we’re effectively controlling for treatment#Create our data
diddata <- tibble(year = sample(2002:2010,10000,replace=T)) %>%
mutate(D = year >= 2007) %>% mutate(Y = 2*D + .5*year + rnorm(10000))
#Now, control for year
diddata <- diddata %>% group_by(year) %>% mutate(D.r = D - mean(D), Y.r = Y - mean(Y))
#What's the difference with and without treatment?
diddata %>% group_by(D) %>% summarize(Y=mean(Y))
## # A tibble: 2 x 2
## D Y
## <lgl> <dbl>
## 1 FALSE 1002.
## 2 TRUE 1006.
## # A tibble: 1 x 2
## D.r Y
## <dbl> <dbl>
## 1 0 -1.44e-14
#Create our data
diddata <- tibble(year = sample(2002:2010,10000,replace=T),
group = sample(c('TreatedGroup','UntreatedGroup'),10000,replace=T)) %>%
mutate(after = (year >= 2007)) %>%
#Only let the treatment be applied to the treated group
mutate(D = after*(group=='TreatedGroup')) %>%
mutate(Y = 2*D + .5*year + rnorm(10000))
#Now, get before-after differences for both groups
means <- diddata %>% group_by(group,after) %>% summarize(Y=mean(Y))
#Before-after difference for untreated, has time effect only
bef.aft.untreated <-(means %>% filter(group=='UntreatedGroup',after==1) %>% pull(Y)) - (means %>% filter(group=='UntreatedGroup',after==0) %>% pull(Y))
#Before-after for treated, has time and treatment effect
bef.aft.treated <- (means %>% filter(group=='TreatedGroup',after==1) %>% pull(Y)) - (means %>% filter(group=='TreatedGroup',after==0) %>% pull(Y))
#Difference-in-Difference! Take the Time + Treatment effect, and remove the Time effect
DID <- bef.aft.treated - bef.aft.untreated
DID
## [1] 2.002812
#Create our data
diddata <- tibble(year = sample(2002:2010,10000,replace=T),
group = sample(c('TreatedGroup','UntreatedGroup'),10000,replace=T)) %>%
mutate(after = (year >= 2007)) %>%
#Only let the treatment be applied to the treated group
mutate(D = after*(group=='TreatedGroup')) %>%
mutate(Y = 2*D + .5*year + (group == 'TreatedGroup') + rnorm(10000))
#Now, get before-after differences for both groups
means <- diddata %>% group_by(group,after) %>% summarize(Y=mean(Y))
#Before-after difference for untreated, has time effect only
bef.aft.untreated <-(means %>% filter(group=='UntreatedGroup',after==1) %>% pull(Y)) - (means %>% filter(group=='UntreatedGroup',after==0) %>% pull(Y))
#Before-after for treated, has time and treatment effect
bef.aft.treated <- (means %>% filter(group=='TreatedGroup',after==1) %>% pull(Y)) - (means %>% filter(group=='TreatedGroup',after==0) %>% pull(Y))
#Difference-in-Difference! Take the Time + Treatment effect, and remove the Time effect
DID <- bef.aft.treated - bef.aft.untreated
DID
## [1] 2.0551
load('mariel.RData')
#Take the log of wage and create our "after treatment" and "treated group" variables
df <- mutate(df,lwage = log(hourwage),
after = year >= 81,
miami = smsarank == 26)
#Then we can do our difference in difference!
means <- df %>% group_by(after,miami) %>% summarize(lwage = mean(lwage),unemp=mean(unemp))
means
## # A tibble: 4 x 4
## # Groups: after [2]
## after miami lwage unemp
## <lgl> <lgl> <dbl> <dbl>
## 1 FALSE FALSE 1.88 0.0619
## 2 FALSE TRUE 1.74 0.0547
## 3 TRUE FALSE 1.84 0.0794
## 4 TRUE TRUE 1.72 0.0854
means$lwage[4] - means$lwage[2]
= -0.019. Uh oh!means$lwage[3] - means$lwage[1]
= -0.046means$unemp[4] - means$unemp[2]
= 0.031means$unemp[3] - means$unemp[1]
= 0.018read_csv('http://nickchk.com/eitc.csv')
after
for years 1994+, and treated
if they have any children
work
within year
and treated
. ggplot()
average work
(y
) separately against year
(x
) for treated and untreated (color
), then geom_vline(aes(xintercept= 1994))
to add a vertical line at treatment. Any concerns they’re already trending together/apart in 1991-1993?work
df <- read_csv('http://nickchk.com/eitc.csv') %>%
mutate(after = year >= 1994,
treated = children > 0)
plotdata <- df %>% group_by(treated,year) %>%
summarize(work = mean(work))
ggplot(plotdata, aes(x = year, y = work, color = treated)) +
geom_line() +
geom_vline(aes(xintercept = 1994))
# They don't appear to be trending away or towards each other before 1994. Good!
#Now DID:
did <- df %>% group_by(treated,after) %>% summarize(work = mean(work))
untreat.diff <- did$work[2]-did$work[1]
treat.diff <- did$work[4]-did$work[3]
did.estimate <- treat.diff - untreat.diff
## [1] 0.04687313