GATE <- Above <- Test -> earn
back doorTest
in the normal way?Test
back door, we have effectively random assignment, like an experiment!Test
back door, we’re closing all back doorsrdd.data <- tibble(test = runif(1000)*100) %>%
mutate(GATE = test >= 75) %>% mutate(earn = runif(1000)*40+10*GATE+test/2)
#Choose a "bandwidth" of how wide around the cutoff to look (arbitrary in our example)
#Bandwidth of 2 with a cutoff of 75 means we look from 75-2 to 75+2
bandwidth <- 2
#Just look within the bandwidth
rdd <- rdd.data %>% filter(abs(75-test) < bandwidth) %>%
#Create a variable indicating we're above the cutoff
mutate(above = test >= 75) %>%
#And compare our outcome just below the cutoff to just above
group_by(above) %>% summarize(earn = mean(earn))
rdd
#Our effect looks just about right (10 is the truth)
rdd$earn[2] - rdd$earn[1]
## # A tibble: 2 x 2
## above earn
## <lgl> <dbl>
## 1 FALSE 55.2
## 2 TRUE 66.0
## [1] 10.80055
above
should have no relationship with any back door variable after focusing around the cutoffrdd.data <- tibble(test = runif(500)*100) %>%
mutate(backdoor=rnorm(500)+test/50) %>% mutate(GATE = test + backdoor >= 75) %>%
mutate(earn = runif(500)*40+10*GATE+5*backdoor+test/2)
bandwidth <- 2
rdd <- rdd.data %>% filter(abs(75-test) < bandwidth) %>%
#Create a variable indicating we're above the cutoff
mutate(above = test >= 75) %>%
#And compare our outcome just below the cutoff to just above
group_by(above) %>% summarize(backdoor = mean(backdoor))
rdd
## # A tibble: 2 x 2
## above backdoor
## <lgl> <dbl>
## 1 FALSE 1.22
## 2 TRUE 1.57
#Not a lot of difference!
rdd$backdoor[2] - rdd$backdoor[1]
## [1] 0.3516092
politicaldata
package, and load data(house_results)
hr76
and hr16
with only 1976 and 2016repadv76
equal to rep
vote minus dem
for 1976, and filter only to those with !is.na(repadv75)
repwins16
equal to rep > dem
for 2016, and filter !is.na(repwins16)
select()
only district
,repadv76
, repwins16
, and inner_join()
the two data setsrepwins16
mean above and below repadv76=0
with a bandwidth of .04#install.packages('politicaldata')
library(politicaldata)
data(house_results)
hr76 <- filter(house_results,year==1976) %>%
mutate(repadv76 = rep - dem) %>%
filter(!is.na(repadv76)) %>%
select(district,repadv76)
hr16 <- filter(house_results,year==2016) %>%
mutate(repwins16 = rep > dem) %>%
filter(!is.na(repwins16)) %>%
select(district,repwins16)
fulldata <- inner_join(hr76,hr16)
bandwidth <- .04
fulldata %>% filter(abs(repadv76-0)<=.04) %>%
mutate(above = repadv76 > 0) %>%
group_by(above) %>% summarize(repwins16=mean(repwins16))
## # A tibble: 2 x 2
## above repwins16
## <lgl> <dbl>
## 1 FALSE 0.737
## 2 TRUE 0.889