Generated Independent

Author

Stijn Masschelein

This is probably not worthwhile further investigating. It’s quite niche.

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.2.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(modelsummary)
library(here)

here() starts at /Users/stijnmasschelein/Dropbox/github/site

N <- 100
nsim <- 1000
ntotal <- N * nsim

Residuals

sim_data <-
  tibble(
    sim = rep(1:nsim, each = N),
    x1 = rnorm(ntotal),
    x2 = rnorm(ntotal),
    z1 = rnorm(ntotal)
  ) %>%
  mutate(
    z = rnorm(ntotal, z1, 5),
    y = rnorm(ntotal, x1 + (z - z1), 3)
  ) %>%
  nest(.by = sim)
test_data <- pull(sim_data, data)[[1]]

two_step <- function(data){
  lm1 <- lm(z ~ z1, data = data)
  resid_z <- residuals(lm1)
  lm2 <- lm(y ~ x1 + x2 + resid_z, data = data)
  coefs <- summary(lm2)$coefficients
  return(coefs[, 4])
}
two_step(test_data)

 (Intercept)           x1           x2      resid_z 
8.725596e-01 1.580489e-03 8.085737e-01 1.849745e-23

pvalues <- sim_data %>%
  mutate(pvalues = map(.x = data, .f = ~ two_step(.))) %>%
  unnest_wider(pvalues) %>%
  pivot_longer(cols = c(x1, x2, resid_z),
               values_to = "pvalue", names_to = "variable") %>%
  select(sim, pvalue, variable)

pvalues %>%
  mutate(is_sign = if_else(pvalue < 0.05, 1, 0)) %>%
  summarise(proportion = mean(is_sign), .by = variable)

# A tibble: 3 × 2
  variable proportion
  <chr>         <dbl>
1 x1           0.89  
2 x2           0.0390
3 resid_z      1

Absolute Residuals

sim_data <-
  tibble(
    sim = rep(1:nsim, each = N),
    x1 = rnorm(ntotal),
    x2 = rnorm(ntotal),
    z1 = rnorm(ntotal)
  ) %>%
  mutate(
    z = rnorm(ntotal, z1, 5),
    y = rnorm(ntotal, x1 + abs(z - z1), 3)
  ) %>%
  nest(.by = sim)
test_data <- pull(sim_data, data)[[1]]

two_step_abs <- function(data){
  lm1 <- lm(z ~ z1, data = data)
  abs_resid_z <- abs(residuals(lm1))
  lm2 <- lm(y ~ x1 + x2 + abs_resid_z, data = data)
  coefs <- summary(lm2)$coefficients
  return(coefs[, 4])
}
two_step_abs(test_data)

 (Intercept)           x1           x2  abs_resid_z 
1.650355e-01 1.403305e-05 1.818971e-01 7.174093e-14

pvalues <- sim_data %>%
  mutate(pvalues = map(.x = data, .f = ~ two_step_abs(.))) %>%
  unnest_wider(pvalues) %>%
  pivot_longer(cols = c(x1, x2, abs_resid_z),
               values_to = "pvalue", names_to = "variable") %>%
  select(sim, pvalue, variable)

pvalues %>%
  mutate(is_sign = if_else(pvalue < 0.05, 1, 0)) %>%
  summarise(proportion = mean(is_sign), .by = variable)

# A tibble: 3 × 2
  variable    proportion
  <chr>            <dbl>
1 x1               0.874
2 x2               0.046
3 abs_resid_z      1