Method Package - Control Variables

	(1)	(2)
* p < 0.1, p < 0.05, * p < 0.01
(Intercept)	2.976***	0.036
	(0.408)	(0.310)
CG	0.081	0.130**
	(0.073)	(0.052)
TI		10.433***
		(0.344)
Num.Obs.	1000	1000
R2	0.001	0.480

Causal Graph

Simulation

N <- 1000
ds <- tibble(TI = rbinom(N, 1, .25)) %>%
  mutate(CG = rnorm(N, .5 - TI, .2),
         Performance = rnorm(N, TI + 0 * CG, 1))

lm1 <- lm(Performance ~ CG, data = ds)
lm2 <- lm(Performance ~ CG + TI, data = ds)

msummary(list(lm1, lm2), stars = stars,
         gof_omit = gof_omit, output = "markdown")

	(1)	(2)
* p < 0.1, p < 0.05, * p < 0.01
(Intercept)	0.386***	-0.216**
	(0.038)	(0.085)
CG	-0.790***	0.296*
	(0.071)	(0.154)
TI		1.334***
		(0.169)
Num.Obs.	1000	1000
R2	0.111	0.164

Fixed effects as a special case

Definition

Effects that are the same for every industry, year, firm, or individual can be adjusted for by using fixed effects.

Benefits

We do not need to measure the specific variables and can just use indicators variables for each category (e.g. for each different industry).

See more in chapter 16 of Huntington-Klein (2021)

Fixed effects (for industry)

Nind <- 20
N <- 5000
di <- tibble(
  ind_number = 1:Nind,
  ind_CG = rnorm(Nind, 0, 1),
  ind_performance = rnorm(Nind, 0, 1)
)
ds <- tibble(
    ind_number = sample(1:Nind, N, replace = TRUE)) %>%
  left_join(
    di, by = "ind_number") %>%
  mutate(
    CG = rnorm(N, .5 + ind_CG, .2),
    Performance = rnorm(N, 0 * CG + ind_performance, 1)
  )

glimpse(di, width = 50)

Rows: 20
Columns: 3
$ ind_number      <int> 1, 2, 3, 4, 5, 6, 7, 8, …
$ ind_CG          <dbl> 0.9489205, 1.0251399, -0…
$ ind_performance <dbl> -0.72158433, -0.15900596…

glimpse(ds, width = 50)

Rows: 5,000
Columns: 5
$ ind_number      <int> 2, 12, 2, 1, 13, 12, 10,…
$ ind_CG          <dbl> 1.0251399, 0.6569699, 1.…
$ ind_performance <dbl> -0.15900596, 0.06778212,…
$ CG              <dbl> 1.71052843, 0.93799977, …
$ Performance     <dbl> 0.673275578, -0.00905015…

lm1 <- lm(Performance ~ CG, data = ds)
lm2 <- lm(Performance ~ CG + factor(ind_number), data = ds)
library(fixest)
fe <- feols(Performance ~ CG | ind_number, data = ds)

msummary(list(lm1, lm2, fe), gof_omit = gof_omit, stars = stars, output = "markdown")

	(1)	(2)	(3)
* p < 0.1, p < 0.05, * p < 0.01
(Intercept)	-0.536***	-0.709***
	(0.021)	(0.119)
CG	0.377***	-0.019	-0.019
	(0.018)	(0.070)	(0.079)
factor(ind_number)2		0.626***
		(0.090)
factor(ind_number)3		-0.923***
		(0.117)
factor(ind_number)4		-0.200**
		(0.092)
factor(ind_number)5		-0.118
		(0.137)
factor(ind_number)6		-1.163***
		(0.277)
factor(ind_number)7		0.353**
		(0.172)
factor(ind_number)8		-0.218
		(0.157)
factor(ind_number)9		1.484***
		(0.086)
factor(ind_number)10		1.708***
		(0.142)
factor(ind_number)11		0.909***
		(0.136)
factor(ind_number)12		0.815***
		(0.091)
factor(ind_number)13		0.868***
		(0.090)
factor(ind_number)14		2.120***
		(0.100)
factor(ind_number)15		-2.404***
		(0.150)
factor(ind_number)16		2.314***
		(0.150)
factor(ind_number)17		-1.187***
		(0.201)
factor(ind_number)18		0.315***
		(0.087)
factor(ind_number)19		0.161
		(0.189)
factor(ind_number)20		-0.839***
		(0.229)
Num.Obs.	5000	5000	5000
R2	0.083	0.578	0.578
R2 Within			0.000
Std.Errors			by: ind_number
FE: ind_number			X

Nind <- 20
N <- 5000
correl <- -0.5
di <- tibble(
    ind_number = 1:Nind,
    ind_CG = rnorm(Nind, 0, 1)) %>%
  mutate(
    ind_performance = sqrt(1 - correl^2) * rnorm(Nind, 0, 1) + correl * ind_CG)
ds <- tibble(
    ind_number = sample(1:Nind, N, replace = TRUE)) %>%
  left_join(
    di, by = "ind_number") %>%
  mutate(
    CG = rnorm(N, .5 + ind_CG, .2),
    Performance = rnorm(N, 0 * CG + ind_performance, 1)
  )Nind <- 20
N <- 5000
correl <- -0.5
di <- tibble(
    ind_number = 1:Nind,
    ind_CG = rnorm(Nind, 0, 1)) %>%
  mutate(
    ind_performance = sqrt(1 - correl^2) * rnorm(Nind, 0, 1) + correl * ind_CG)
ds <- tibble(
    ind_number = sample(1:Nind, N, replace = TRUE)) %>%
  left_join(
    di, by = "ind_number") %>%
  mutate(
    CG = rnorm(N, .5 + ind_CG, .2),
    Performance = rnorm(N, 0 * CG + ind_performance, 1)
  )Nind <- 20
N <- 5000
correl <- -0.5
di <- tibble(
    ind_number = 1:Nind,
    ind_CG = rnorm(Nind, 0, 1)) %>%
  mutate(
    ind_performance = sqrt(1 - correl^2) * rnorm(Nind, 0, 1) + correl * ind_CG)
ds <- tibble(
    ind_number = sample(1:Nind, N, replace = TRUE)) %>%
  left_join(
    di, by = "ind_number") %>%
  mutate(
    CG = rnorm(N, .5 + ind_CG, .2),
    Performance = rnorm(N, 0 * CG + ind_performance, 1)
  )

glimpse(di, width = 50)

Rows: 20
Columns: 3
$ ind_number      <int> 1, 2, 3, 4, 5, 6, 7, 8, …
$ ind_CG          <dbl> -0.8936526, 0.8608516, -…
$ ind_performance <dbl> 1.7328501, -1.7181229, -…

glimpse(ds, width = 50)

Rows: 5,000
Columns: 5
$ ind_number      <int> 17, 2, 14, 3, 11, 8, 9, …
$ ind_CG          <dbl> -1.2216953, 0.8608516, -…
$ ind_performance <dbl> 1.9136760, -1.7181229, -…
$ CG              <dbl> -0.7540503, 1.5890434, 0…
$ Performance     <dbl> 1.6361572, -2.1426585, -…

lm1 <- lm(Performance ~ CG, data = ds)
fe <- feols(Performance ~ CG | ind_number, data = ds)

msummary(list(lm1, fe), gof_omit = gof_omit, stars = stars)

	(1)	(2)
* p < 0.1, p < 0.05, * p < 0.01
(Intercept)	0.425***
	(0.019)
CG	-0.703***	0.181**
	(0.016)	(0.084)
Num.Obs.	5000	5000
R2	0.278	0.536
R2 Within		0.001
Std.Errors		by: ind_number
FE: ind_number		X

What do fixed effects do?

Code

fe_plot <-
  ggplot(ds, aes(y = Performance, x = CG)) +
  geom_point()
plot(fe_plot)

Code

fe_colour <-
  ggplot(ds, aes(y = Performance, x = CG,
                colour = factor(ind_number))) +
  geom_point() + theme(legend.position="none") 
plot(fe_colour)

Code

fe_demean <- group_by(ds, ind_number) %>%
  mutate(Performance2 = Performance - mean(Performance),
         CG2 = CG - mean(CG)) %>%
  ggplot(aes(y = Performance2, x = CG2,
             colour = factor(ind_number))) +
  geom_point() + theme(legend.position="none") 
plot(fe_demean)

Speedboat Racing Example (Booth and Yamamura 2017)

Mixed-sex and single-sex races determined by lottery (Randomisation)
7 race courses
Multiple races in the same month and location

Results of Speedboat Races

Code

load(here("data", "booth_yamamura.Rdata"))
table <- as_tibble(table) %>%
  select(p_id, women_dat, time, ltime, mix_ra, course,
         race_id, yrmt_locid)
table_clean <- filter(table, complete.cases(table)) %>%
  select(ltime, women_dat, mix_ra, course, p_id, race_id,
         yrmt_locid)
ltime_reg <- feols(ltime ~ women_dat : mix_ra + mix_ra
                   | course + p_id + yrmt_locid,
                   cluster = "race_id",
                   data = table_clean)
msummary(ltime_reg, gof_omit = gof_omit, stars = stars)

	(1)
* p < 0.1, p < 0.05, * p < 0.01
mix_ra	-0.002***
	(0.000)
women_dat × mix_ra	0.007***
	(0.001)
Num.Obs.	142346
R2	0.361
R2 Within	0.001
Std.Errors	by: race_id
FE: course	X
FE: p_id	X
FE: yrmt_locid	X

This requires an explanation of interactions. Luckily, it’s relatively simple with two discrete variables.

| ltime | man    | woman  |
|-------|:------:|:------:|
| same  | 0      | 0      |
| mixed | -0.002 | 0.005  |

	(1)	(2)
* p < 0.1, p < 0.05, * p < 0.01
(Intercept)	-0.074*	-0.013
	(0.043)	(0.030)
corp_gov	1.048***	-0.476***
	(0.042)	(0.036)
market_return		0.503***
		(0.007)
Num.Obs.	5000	5000
R2	0.110	0.562

	(1)	(2)
* p < 0.1, p < 0.05, * p < 0.01
(Intercept)	3.437***	-0.659***
	(0.125)	(0.043)
corp_gov	0.004	0.621***
	(0.100)	(0.045)
survival		4.096***
		(0.142)
corp_gov × survival		-0.616***
		(0.118)
Num.Obs.	820	5000
R2	0.000	0.266

Control Variables

Causal Graphs

An Example of a Causal Graph

Difference with equilibrium models

Assignment: CSR report

Measurement error and control variables

Causal Graph

Simulation

Confounders and control variables

Causal Graph

Simulation

Fixed effects as a special case

Fixed effects (for industry)

What do fixed effects do?

Speedboat Racing Example (Booth and Yamamura 2017)

Results of Speedboat Races

Colliders and bad controls

Bad Controls, Survival Bias, Selection Bias, Self-Selection Bias

Example in the assignment

Simulation Bad Control

Survival Bias

Visualisation of Colliders (and Interactions)

Pitching Template

Pitching Format

Pitching Format

References