This is probably not worthwhile further investigating. It’s quite niche.
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 4.0.1 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelsummary)
library(here)
here() starts at /Users/stijnmasschelein/Dropbox/github/site
N <- 100
nsim <- 1000
ntotal <- N * nsim
Residuals
sim_data <-
tibble(
sim = rep(1:nsim, each = N),
x1 = rnorm(ntotal),
x2 = rnorm(ntotal),
z1 = rnorm(ntotal)
) %>%
mutate(
z = rnorm(ntotal, z1, 5),
y = rnorm(ntotal, x1 + (z - z1), 3)
) %>%
nest(.by = sim)
test_data <- pull(sim_data, data)[[1]]
two_step <- function(data){
lm1 <- lm(z ~ z1, data = data)
resid_z <- residuals(lm1)
lm2 <- lm(y ~ x1 + x2 + resid_z, data = data)
coefs <- summary(lm2)$coefficients
return(coefs[, 4])
}
two_step(test_data)
(Intercept) x1 x2 resid_z
8.725596e-01 1.580489e-03 8.085737e-01 1.849745e-23
pvalues <- sim_data %>%
mutate(pvalues = map(.x = data, .f = ~ two_step(.))) %>%
unnest_wider(pvalues) %>%
pivot_longer(cols = c(x1, x2, resid_z),
values_to = "pvalue", names_to = "variable") %>%
select(sim, pvalue, variable)
pvalues %>%
mutate(is_sign = if_else(pvalue < 0.05, 1, 0)) %>%
summarise(proportion = mean(is_sign), .by = variable)
# A tibble: 3 × 2
variable proportion
<chr> <dbl>
1 x1 0.89
2 x2 0.0390
3 resid_z 1
Absolute Residuals
sim_data <-
tibble(
sim = rep(1:nsim, each = N),
x1 = rnorm(ntotal),
x2 = rnorm(ntotal),
z1 = rnorm(ntotal)
) %>%
mutate(
z = rnorm(ntotal, z1, 5),
y = rnorm(ntotal, x1 + abs(z - z1), 3)
) %>%
nest(.by = sim)
test_data <- pull(sim_data, data)[[1]]
two_step_abs <- function(data){
lm1 <- lm(z ~ z1, data = data)
abs_resid_z <- abs(residuals(lm1))
lm2 <- lm(y ~ x1 + x2 + abs_resid_z, data = data)
coefs <- summary(lm2)$coefficients
return(coefs[, 4])
}
two_step_abs(test_data)
(Intercept) x1 x2 abs_resid_z
1.650355e-01 1.403305e-05 1.818971e-01 7.174093e-14
pvalues <- sim_data %>%
mutate(pvalues = map(.x = data, .f = ~ two_step_abs(.))) %>%
unnest_wider(pvalues) %>%
pivot_longer(cols = c(x1, x2, abs_resid_z),
values_to = "pvalue", names_to = "variable") %>%
select(sim, pvalue, variable)
pvalues %>%
mutate(is_sign = if_else(pvalue < 0.05, 1, 0)) %>%
summarise(proportion = mean(is_sign), .by = variable)
# A tibble: 3 × 2
variable proportion
<chr> <dbl>
1 x1 0.874
2 x2 0.046
3 abs_resid_z 1