library(SortedEffects)
# Data on being denied for a mortgage.
data(mortgage)
# Save the formula to reuse later
fm <- deny ~ black + p_irat + hse_inc + ccred + mcred + pubrec + ltv_med +
ltv_high + denpmi + selfemp + single + hischl
# spe() for "Sorted Partial Effects"
m <- spe(fm, data = mortgage,
var = 'black', # black is the treatment variable
method = 'logit',
us = c(2:98)/100, # Get the distribution from the 2nd to 98th percentile
b = 500, bc = TRUE) # Use bootstrapped SEs and bias-correction
t <- c("deny", "p_irat", "black", "hse_inc", "ccred", "mcred", "pubrec",
"denpmi", "selfemp", "single", "hischl", "ltv_med", "ltv_high")
classify <- ca(fm, t = t, data = mortgage, var = 'black', method = 'logit',
cl = 'both', # Get WHO the most and least are, not how different they are
b = 500, bc = TRUE)
## Using 1 CPUs now.
denpmi
) had smallest effects of black
, those who were single
had the biggestGroup | Most | Least | Ratio |
---|---|---|---|
denpmi | 0.0061150 | 0.0476881 | 0.1282283 |
hischl | 0.9321339 | 0.9961003 | 0.9357831 |
hse_inc | 0.2805148 | 0.2103088 | 1.3338234 |
mcred | 2.0154780 | 1.3658700 | 1.4756001 |
p_irat | 0.3892355 | 0.2494585 | 1.5603213 |
ccred | 4.7913548 | 1.2826253 | 3.7355843 |
selfemp | 0.1734995 | 0.0422644 | 4.1051013 |
deny | 0.4480543 | 0.0987269 | 4.5383183 |
black | 0.3863966 | 0.0638143 | 6.0550127 |
single | 0.6120107 | 0.0953380 | 6.4193784 |
ltv_high | 0.1164659 | 0.0134757 | 8.6426423 |
pubrec | 0.4616331 | 0.0510643 | 9.0402392 |
ltv_med | 0.5977683 | 0.0539089 | 11.0884909 |
\[ Y = \beta_0 + \beta_1X + \varepsilon \] \[ \beta_0 = \gamma_{00} + \nu_{00} \] \[ \beta_1 = \gamma_{10} + \gamma_{11}W + \nu_{01} \]
## Linear mixed model fit by REML ['lmerMod']
## Formula: deny ~ p_irat + hse_inc + ccred + mcred + pubrec + ltv_med +
## ltv_high + denpmi + selfemp + single + hischl + (single +
## hischl | black)
## Data: mortgage
##
## REML criterion at convergence: 722.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4138 -0.4242 -0.1776 -0.0076 3.9268
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## black (Intercept) 3.803e-03 0.0616722
## single 6.488e-07 0.0008055 -1.00
## hischl 6.975e-05 0.0083517 -1.00 1.00
## Residual 7.726e-02 0.2779482
## Number of obs: 2380, groups: black, 2
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.011466 0.069643 -0.165
## p_irat 0.449568 0.086586 5.192
## hse_inc -0.072378 0.095866 -0.755
## ccred 0.031008 0.003673 8.441
## mcred 0.015003 0.011154 1.345
## pubrec 0.198644 0.023095 8.601
## ltv_med 0.032981 0.012356 2.669
## ltv_high 0.191246 0.033116 5.775
## denpmi 0.704414 0.041051 17.160
## selfemp 0.060240 0.017941 3.358
## single 0.034120 0.011944 2.857
## hischl -0.134258 0.045705 -2.937
##
## Correlation of Fixed Effects:
## (Intr) p_irat hse_nc ccred mcred pubrec ltv_md ltv_hg denpmi selfmp
## p_irat -0.155
## hse_inc -0.042 -0.781
## ccred -0.090 -0.073 0.077
## mcred -0.267 0.073 -0.106 -0.127
## pubrec 0.019 -0.054 0.012 -0.252 0.003
## ltv_med -0.015 -0.063 0.020 -0.020 -0.150 -0.043
## ltv_high 0.031 -0.048 0.003 -0.019 -0.074 -0.044 0.180
## denpmi 0.017 -0.030 0.020 -0.006 -0.018 -0.058 -0.076 -0.073
## selfemp -0.032 -0.078 0.064 0.012 0.040 -0.039 0.075 0.001 0.009
## single -0.018 0.018 -0.043 0.022 -0.154 0.016 0.025 -0.018 -0.005 0.009
## hischl -0.746 0.017 0.051 0.013 0.045 -0.001 -0.007 -0.033 -0.023 -0.002
## single
## p_irat
## hse_inc
## ccred
## mcred
## pubrec
## ltv_med
## ltv_high
## denpmi
## selfemp
## single
## hischl -0.058
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
library(grf)
mortgage <- mortgage %>% mutate(holdout = runif(n()) > .5)
holdout <- mortgage %>% filter(holdout)
training <- mortgage %>% filter(!holdout)
W = training %>% pull(black) %>% as.matrix()
X = training %>% select(p_irat, hse_inc, ccred, mcred, pubrec,
denpmi, selfemp, single, hischl, ltv_med, ltv_high) %>% as.matrix()
Y = training %>% pull(deny) %>% as.matrix()
m <- causal_forest(X, Y, W, tune.parameters = 'alpha')
name | Bottom | Top | Ratio |
---|---|---|---|
selfemp | 0.2000000 | 0.1166667 | 0.5833333 |
single | 0.4000000 | 0.3166667 | 0.7916667 |
hischl | 0.9833333 | 0.9666667 | 0.9830508 |
hse_inc | 0.2026083 | 0.2374417 | 1.1719245 |
mcred | 1.5666667 | 1.9333333 | 1.2340426 |
p_irat | 0.2522483 | 0.3322183 | 1.3170289 |
black | 0.1333333 | 0.2333333 | 1.7500000 |
ltv_high | 0.0333333 | 0.0833333 | 2.5000000 |
ccred | 1.9500000 | 5.3333333 | 2.7350427 |
pubrec | 0.0500000 | 0.2166667 | 4.3333333 |
denpmi | 0.0000000 | 0.0333333 | Inf |
ltv_med | 0.0000000 | 0.3333333 | Inf |
Laws
time
is a confounder, but you can’t control for itdiddata <- tibble(Group=c(rep("C",2500),rep("T",2500)),
Time=rep(c(rep("Before",1250),rep("After",1250)),2)) %>%
mutate(Treated = (Group == "T") & Time == "After") %>%
mutate(Y = 2*(Group == "T") + 1.5*(Time == "After") + 3*Treated + rnorm(5000))
did <- diddata %>% group_by(Group,Time) %>% summarize(Y = mean(Y))
before.after.control <- did$Y[1] - did$Y[2]
before.after.treated <- did$Y[3] - did$Y[4]
did.effect <- before.after.treated - before.after.control
did.effect
## [1] 2.936178
## TreatedTRUE
## 2.936178
D
that is assigned based on a cutoff in a running variable, we can use regression discontinuityY
and treatment shouldn’t jump at cutoff - they should be balancedZ
. No back doors in that variation! We have a causal effect