GATE <- Above <- Test -> earn back doorTest in the normal way?Test back door, we have effectively random assignment, like an experiment!Test back door, we’re closing all back doorsrdd.data <- tibble(test = runif(1000)*100) %>%
mutate(GATE = test >= 75) %>% mutate(earn = runif(1000)*40+10*GATE+test/2)
#Choose a "bandwidth" of how wide around the cutoff to look (arbitrary in our example)
#Bandwidth of 2 with a cutoff of 75 means we look from 75-2 to 75+2
bandwidth <- 2
#Just look within the bandwidth
rdd <- rdd.data %>% filter(abs(75-test) < bandwidth) %>%
#Create a variable indicating we're above the cutoff
mutate(above = test >= 75) %>%
#And compare our outcome just below the cutoff to just above
group_by(above) %>% summarize(earn = mean(earn))
rdd
#Our effect looks just about right (10 is the truth)
rdd$earn[2] - rdd$earn[1]## # A tibble: 2 x 2
## above earn
## <lgl> <dbl>
## 1 FALSE 55.2
## 2 TRUE 66.0
## [1] 10.80055
above should have no relationship with any back door variable after focusing around the cutoffrdd.data <- tibble(test = runif(500)*100) %>%
mutate(backdoor=rnorm(500)+test/50) %>% mutate(GATE = test + backdoor >= 75) %>%
mutate(earn = runif(500)*40+10*GATE+5*backdoor+test/2)
bandwidth <- 2
rdd <- rdd.data %>% filter(abs(75-test) < bandwidth) %>%
#Create a variable indicating we're above the cutoff
mutate(above = test >= 75) %>%
#And compare our outcome just below the cutoff to just above
group_by(above) %>% summarize(backdoor = mean(backdoor))
rdd## # A tibble: 2 x 2
## above backdoor
## <lgl> <dbl>
## 1 FALSE 1.22
## 2 TRUE 1.57
#Not a lot of difference!
rdd$backdoor[2] - rdd$backdoor[1]## [1] 0.3516092
politicaldata package, and load data(house_results)hr76 and hr16 with only 1976 and 2016repadv76 equal to rep vote minus dem for 1976, and filter only to those with !is.na(repadv75)repwins16 equal to rep > dem for 2016, and filter !is.na(repwins16)select() only district,repadv76, repwins16, and inner_join() the two data setsrepwins16 mean above and below repadv76=0 with a bandwidth of .04#install.packages('politicaldata')
library(politicaldata)
data(house_results)
hr76 <- filter(house_results,year==1976) %>%
mutate(repadv76 = rep - dem) %>%
filter(!is.na(repadv76)) %>%
select(district,repadv76)
hr16 <- filter(house_results,year==2016) %>%
mutate(repwins16 = rep > dem) %>%
filter(!is.na(repwins16)) %>%
select(district,repwins16)
fulldata <- inner_join(hr76,hr16)
bandwidth <- .04
fulldata %>% filter(abs(repadv76-0)<=.04) %>%
mutate(above = repadv76 > 0) %>%
group_by(above) %>% summarize(repwins16=mean(repwins16))## # A tibble: 2 x 2
## above repwins16
## <lgl> <dbl>
## 1 FALSE 0.737
## 2 TRUE 0.889