Setup

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lubridate)
library(here)

here() starts at /Users/stijnmasschelein/Library/CloudStorage/Dropbox/Teaching/lecturenotes/method_package

library(fixest)
library(modelsummary)

`modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
  backend. Learn more at: https://vincentarelbundock.github.io/tinytable/

Revert to `kableExtra` for one session:

  options(modelsummary_factory_default = 'kableExtra')
  options(modelsummary_factory_latex = 'kableExtra')
  options(modelsummary_factory_html = 'kableExtra')

Silence this message forever:

  config_modelsummary(startup_message = FALSE)

gof_omit <- "Adj|RMS|IC"
i_am("freaky_friday/regressions.qmd")

here() starts at /Users/stijnmasschelein/Library/CloudStorage/Dropbox/Teaching/lecturenotes/method_package

Data

main <- readRDS(here("data", "freaky_friday", "main.RDS")) %>%
  mutate(friday = if_else(weekday == "Fri", 1, 0),
         year = year(anndat),
         month = month(anndat),
         quarter = (month - 1) %/% 3 + 1) %>%
  mutate(sign = case_when(surprise > 0 ~ "positive",
                          surprise < 0 ~ "negative",
                          surprise == 0 ~ "zero")) %>%
  mutate(
    quintile = ntile(surprise, 5),
    .by = c(sign, year)) %>%
  mutate(
    quantile = case_when(sign == "positive" ~ 6 + quintile,
                         sign == "negative" ~ quintile,
                         sign == "zero" ~ 6
                         )
  ) %>%
  glimpse()

Rows: 130,358
Columns: 28
$ ticker       <chr> "A2", "A2", "A2", "A2", "A2", "A2", "A2", "AA0A", "AA0A",…
$ actual       <dbl> -0.0400, -0.1100, -0.1100, -0.0500, -0.0700, -0.1000, -0.…
$ pdf          <chr> "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D…
$ anndats_act  <date> 2004-10-21, 2005-01-26, 2005-04-27, 2005-07-27, 2005-10-…
$ gvkey        <chr> "001081", "001081", "001081", "001081", "001081", "001081…
$ permno       <dbl> 10560, 10560, 10560, 10560, 10560, 10560, 10560, 10656, 1…
$ cusip        <chr> "00392410", "00392410", "00392410", "00392410", "00392410…
$ rdq          <date> 2004-10-21, 2005-01-26, 2005-04-27, 2005-07-27, 2005-10-…
$ anndat       <date> 2004-10-21, 2005-01-26, 2005-04-27, 2005-07-27, 2005-10-…
$ N            <int> 1, 2, 3, 2, 4, 5, 4, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 3, 2, …
$ median       <dbl> -0.09000, -0.08965, -0.05740, -0.03700, -0.10610, -0.0783…
$ mean         <dbl> -0.0900000, -0.0896500, -0.0680000, -0.0370000, -0.100550…
$ mean_days    <dbl> 21.000000, 8.000000, 6.666667, 13.500000, 7.000000, 9.200…
$ car_short    <dbl> 0.097290562, 0.036462040, -0.063605028, -0.004177245, -0.…
$ car_long     <dbl> 0.01028628, -0.20959780, 0.05661647, -0.18368201, 0.20741…
$ date_minus5  <date> 2004-10-16, 2005-01-21, 2005-04-22, 2005-07-22, 2005-10-…
$ date         <date> 2004-10-15, 2005-01-21, 2005-04-22, 2005-07-22, 2005-10-…
$ prc          <dbl> 5.63, 5.89, 4.53, 5.00, 3.26, 4.02, 4.28, 16.75, 15.66, 1…
$ market_value <dbl> 2478185.2, 2592630.8, 1993992.8, 2200875.0, 1434970.5, 17…
$ surprise     <dbl> 0.0088809947, -0.0034550085, -0.0116114790, -0.0026000000…
$ weekday      <ord> Thu, Wed, Wed, Wed, Wed, Wed, Wed, Thu, Thu, Wed, Thu, Th…
$ friday       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, …
$ year         <dbl> 2004, 2005, 2005, 2005, 2005, 2006, 2006, 2003, 2003, 200…
$ month        <dbl> 10, 1, 4, 7, 10, 2, 4, 5, 11, 2, 2, 5, 9, 11, 3, 5, 11, 3…
$ quarter      <dbl> 4, 1, 2, 3, 4, 1, 2, 2, 4, 1, 1, 2, 3, 4, 1, 2, 4, 1, 2, …
$ sign         <chr> "positive", "negative", "negative", "negative", "positive…
$ quintile     <int> 5, 2, 1, 3, 5, 2, 3, 3, 3, 4, 2, 1, 3, 2, 5, 2, 4, 5, 5, …
$ quantile     <dbl> 11, 2, 1, 3, 11, 2, 3, 9, 9, 4, 2, 6, 9, 2, 11, 2, 10, 11…

The main regression table only compares the bottom quintile to the top quintile. Dellavigna and Pollet (2009) also specify a number of indicator variables based on year, month, and the size of the firm (adjusted for the average of the logarithm of the size).

Dellavigna and Pollet (2009) also control for the volatility of earnings surprises where volatility is measured by the standard deviation in the 4 years before the earnings announcement. It turns out that this is a quite computational expensive exercise. Below, I first write a function to select the necessary earnings surprises and than calculate the standard deviation. Than, I use the function with the dtplyr package which makes use of the faster data.table package to do the data cleaning. The only extra step we need to make is to make the main data compatible with the package through the lazy_dt function and use the new compatible main_dt in our function. You can see that the two functions give the same result on a test firm and that we decrease the run time of the function by a factor of 2.

Warning

It turns out that the speed gain is in the advantage of the tidyverse implementation on my Mac Mini. This could be because I have installed a more recent version on my Mac Mini of the tidyverse. I am going to investigate this further at some point. Nevertheless it shows the advantage of timing the different implementations. So I am going to leave them here.

get_volatility <- function(gvkey_in, anndat_in, lag = 4){
  anndat_min_lag <- anndat_in - lag * 365 - 1
  surprises <- main %>%
    filter(gvkey == gvkey_in, anndat < anndat_in,
           anndat >= anndat_min_lag) %>%
    filter(!is.na(surprise)) %>%
    pull(surprise)
  volatility <- if_else(length(surprises) >= 4, sd(surprises),
                        NA_real_)
  return(volatility)
}
library(dtplyr)
main_dt <- lazy_dt(main)
get_volatility_dt <- function(gvkey_in, anndat_in, lag = 4){
  anndat_min_lag <- anndat_in - lag * 365 - 1
  surprises <- main_dt %>%
    filter(gvkey == gvkey_in, anndat < anndat_in,
           anndat >= anndat_min_lag) %>%
    filter(!is.na(surprise)) %>%
    pull(surprise)
  volatility <- if_else(length(surprises) >= 4, sd(surprises),
                        NA_real_)
  return(volatility)
}
test_key <- "001081"
test_anndat <- ymd("2006-02-15")
get_volatility(gvkey = test_key, anndat_in = test_anndat)

[1] 0.008746968

get_volatility_dt(gvkey = test_key, anndat_in = test_anndat)

[1] 0.008746968

microbenchmark::microbenchmark(vol = get_volatility(gvkey = test_key, anndat_in = test_anndat),
                               vol_dt = get_volatility_dt(gvkey = test_key, anndat_in = test_anndat),
                               times = 20)

Unit: milliseconds
   expr      min       lq     mean   median       uq      max neval
    vol 4.667570 4.904584 6.097149 5.253576 5.722228 15.40677    20
 vol_dt 6.277119 6.860226 7.460642 7.185351 7.599177 13.49717    20

We can now use the faster function to calculate the volatility measures with the other control variables.

subset <- main %>%
  filter(quantile %in% c(1,11)) %>%
  mutate(top = if_else(quantile == 11, 1, 0),
         log_size = log(market_value),
         volatility = pmap_dbl(list(gvkey, anndat),
                               ~ get_volatility(..1, ..2),
                               .progress = TRUE)) %>%
  mutate(log_size_adj = log_size - mean(log_size, na.rm = T),
         .by = c(quarter, year)) %>%
  mutate(size_decile = ntile(log_size_adj, 10)) %>%
  mutate(vol_decile = ntile(volatility, 10),
         .by = year)
saveRDS(subset, here("data", "freaky_friday", "subset.RDS"))

Regressions

Table 2

The tables do not really replicate which is interesting to me. For a number of reasons.

The results are more consistent. I wonder whether I got rid of more outliers earlier. Remember I did end up with less observations. One interpretation is that I have cleaned the data better, the other is that I got rid of important, influential observations by being too strict when cleaning the data.
The results for the short term CAR are consistent with the figure. Friday market reactions to bottom quantile surprises are more positive than non-friday market reactions and the sign flips for top quantile surprises.
I also lose substantially more observations due to the inclusion of the volatility measures. I do not know exactly why that is the case.

Panel A: Short Term CAR

subset <- readRDS(here("data", "freaky_friday", "subset.RDS"))
model1a <- feols(car_short ~ friday * top,
                 cluster = "anndat",
                 data = subset)
model2a <- feols(car_short ~ friday * top | (year[top] + month[top] + size_decile[top]),
                 cluster = "anndat",
                 data = subset)

The variable 'top' has been removed because of collinearity (see $collin.var).

model3a <- feols(car_short ~ friday * top | (year[top] + month[top] + size_decile[top] + vol_decile[top]),
                 cluster = "anndat",
                 data = subset)

NOTE: 6,818 observations removed because of NA values (Fixed-effects: 6,818).
The variable 'top' has been removed because of collinearity (see $collin.var).

msummary(list(model1a, model2a, model3a), gof_omit = gof_omit, stars = TRUE)

	(1)	(2)	(3)
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001
(Intercept)	-0.036***
	(0.001)
friday	0.014***	0.012***	0.013**
	(0.003)	(0.003)	(0.004)
top	0.061***
	(0.002)
friday × top	-0.023***	-0.020***	-0.021***
	(0.004)	(0.004)	(0.005)
Num.Obs.	22417	22417	15599
R2	0.086	0.095	0.110
R2 Within		0.001	0.001
Std.Errors	by: anndat	by: anndat	by: anndat
FE: size_decile		X	X
FE: year		X	X
FE: month		X	X
FE: vol_decile			X

Panel B: Long Term CAR

model1b <- feols(car_long ~ friday * top,
                 cluster = "anndat",
                 data = subset)
model2b <- feols(car_long ~ friday * top | (year[top] + month[top] + size_decile[top]),
                 cluster = "anndat",
                 data = subset)

The variable 'top' has been removed because of collinearity (see $collin.var).

model3b <- feols(car_long ~ friday * top | (year[top] + month[top] + size_decile[top] + vol_decile[top]),
                 cluster = "anndat",
                 data = subset)

NOTE: 6,818 observations removed because of NA values (Fixed-effects: 6,818).
The variable 'top' has been removed because of collinearity (see $collin.var).

msummary(list(model1b, model2b, model3b), gof_omit = gof_omit, stars = TRUE)

	(1)	(2)	(3)
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001
(Intercept)	-0.023***
	(0.005)
friday	-0.010	-0.011	-0.021
	(0.014)	(0.013)	(0.015)
top	0.038***
	(0.004)
friday × top	0.040**	0.042**	0.051**
	(0.015)	(0.014)	(0.017)
Num.Obs.	22417	22417	15599
R2	0.006	0.036	0.041
R2 Within		0.001	0.001
Std.Errors	by: anndat	by: anndat	by: anndat
FE: size_decile		X	X
FE: year		X	X
FE: month		X	X
FE: vol_decile			X

Table 3

main_extra <- main %>%
  mutate(log_size = log(market_value))  %>%
  mutate(log_size_adj = log_size - mean(log_size, na.rm = T),
         .by = c(quarter, year)) %>%
  mutate(size_decile = ntile(log_size_adj, 10))

model1 <- feols(car_short ~ friday * quantile,
                cluster = "anndat",
                data = main_extra)
model2 <- feols(car_short ~ friday + friday : quantile
                | (year[quantile] + month[quantile] + size_decile[quantile]),
                cluster = "anndat",
                data = main_extra)
model3 <- feols(car_long ~ friday * quantile,
                cluster = "anndat",
                data = main_extra)
model4 <- feols(car_long ~ friday  + friday : quantile
                | (year[quantile] + month[quantile] + size_decile[quantile]),
                cluster = "anndat",
                data = main_extra)

msummary(list(model1, model2, model3, model4), gof_omit = gof_omit, stars = TRUE)

	(1)	(2)	(3)	(4)
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001
(Intercept)	-0.041***		-0.015***
	(0.001)		(0.002)
friday	0.016***	0.014***	-0.016*	-0.015*
	(0.002)	(0.002)	(0.008)	(0.007)
quantile	0.006***		0.003***
	(0.000)		(0.000)
friday × quantile	-0.002***	-0.002***	0.003**	0.003***
	(0.000)	(0.000)	(0.001)	(0.001)
Num.Obs.	130358	130358	130358	130358
R2	0.054	0.057	0.002	0.015
R2 Within		0.000		0.000
Std.Errors	by: anndat	by: anndat	by: anndat	by: anndat
FE: size_decile		X		X
FE: year		X		X
FE: month		X		X