Regressions

Author

Stijn Masschelein

Setup

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.1     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.1     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(here)
here() starts at /Users/stijnmasschelein/Dropbox/Teaching/lecturenotes/method_package
library(fixest)
library(modelsummary)
gof_omit <- "Adj|RMS|IC"
i_am("freaky_friday/regressions.qmd")
here() starts at /Users/stijnmasschelein/Dropbox/Teaching/lecturenotes/method_package

Data

main <- readRDS(here("data", "freaky_friday", "main.RDS")) %>%
  mutate(friday = if_else(weekday == "Fri", 1, 0),
         year = year(anndat),
         month = month(anndat),
         quarter = (month - 1) %/% 3 + 1) %>%
  mutate(sign = case_when(surprise > 0 ~ "positive",
                          surprise < 0 ~ "negative",
                          surprise == 0 ~ "zero")) %>%
  mutate(
    quintile = ntile(surprise, 5),
    .by = c(sign, year)) %>%
  mutate(
    quantile = case_when(sign == "positive" ~ 6 + quintile,
                         sign == "negative" ~ quintile,
                         sign == "zero" ~ 6
                         )
  ) %>%
  glimpse()
Rows: 130,759
Columns: 28
$ ticker       <chr> "A2", "A2", "A2", "A2", "A2", "A2", "AA0G", "AA0H", "AA0H…
$ actual       <dbl> -0.11, -0.11, -0.05, -0.07, -0.10, -0.04, -0.45, 0.01, 0.…
$ pdf          <chr> "D", "D", "D", "D", "D", "D", "P", "D", "D", "D", "D", "D…
$ anndats_act  <date> 2005-01-26, 2005-04-27, 2005-07-27, 2005-10-26, 2006-02-…
$ gvkey        <chr> "001081", "001081", "001081", "001081", "001081", "001081…
$ permno       <dbl> 10560, 10560, 10560, 10560, 10560, 10560, 88784, 10574, 1…
$ cusip        <chr> "00392410", "00392410", "00392410", "00392410", "00392410…
$ rdq          <date> 2005-01-26, 2005-04-27, 2005-07-27, 2005-10-26, 2006-02-…
$ anndat       <date> 2005-01-26, 2005-04-27, 2005-07-27, 2005-10-26, 2006-02-…
$ N            <int> 2, 3, 2, 4, 5, 1, 2, 1, 1, 1, 1, 2, 1, 5, 3, 2, 1, 4, 2, …
$ median       <dbl> -0.08965, -0.05740, -0.03700, -0.10610, -0.07830, -0.0900…
$ mean         <dbl> -0.0896500, -0.0680000, -0.0370000, -0.1005500, -0.076580…
$ mean_days    <dbl> 8.000000, 6.666667, 13.500000, 7.000000, 9.200000, 21.000…
$ car_short    <dbl> 0.036461999, -0.063605082, -0.004176757, -0.014869448, -0…
$ car_long     <dbl> -0.20959886, 0.05661849, -0.18368085, 0.20741889, 0.10569…
$ date_minus5  <date> 2005-01-21, 2005-04-22, 2005-07-22, 2005-10-21, 2006-01-…
$ date         <date> 2005-01-21, 2005-04-22, 2005-07-22, 2005-10-21, 2006-01-…
$ prc          <dbl> 5.89, 4.53, 5.00, 3.26, 4.02, 5.63, 14.50, 11.47, 10.85, …
$ market_value <dbl> 2592630.7, 1993992.8, 2200875.0, 1434970.5, 1769503.5, 24…
$ surprise     <dbl> -0.0034550086, -0.0116114785, -0.0026000000, 0.0110736197…
$ weekday      <ord> Wed, Wed, Wed, Wed, Wed, Thu, Tue, Tue, Tue, Wed, Tue, We…
$ friday       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ year         <dbl> 2005, 2005, 2005, 2005, 2006, 2004, 2001, 2002, 2002, 200…
$ month        <dbl> 1, 4, 7, 10, 2, 10, 11, 4, 8, 10, 4, 7, 10, 2, 2, 4, 7, 1…
$ quarter      <dbl> 1, 2, 3, 4, 1, 4, 4, 2, 3, 4, 2, 3, 4, 1, 1, 2, 3, 4, 4, …
$ sign         <chr> "negative", "negative", "negative", "positive", "negative…
$ quintile     <int> 2, 1, 3, 5, 2, 5, 2, 4, 4, 4, 1, 3, 3, 2, 1, 2, 4, 3, 1, …
$ quantile     <dbl> 2, 1, 3, 11, 2, 11, 2, 10, 10, 4, 6, 9, 9, 2, 6, 8, 4, 9,…

The main regression table only compares the bottom quintile to the top quintile. Dellavigna and Pollet (2009) also specify a number of indicator variables based on year, month, and the size of the firm (adjusted for the average of the logarithm of the size).

Dellavigna and Pollet (2009) also control for the volatility of earnings surprises where volatility is measured by the standard deviation in the 4 years before the earnings announcement. It turns out that this is a quite computational expensive exercise. Below, I first write a function to select the necessary earnings surprises and than calculate the standard deviation. Than, I use the function with the dtplyr package which makes use of the faster data.table package to do the data cleaning. The only extra step we need to make is to make the main data compatible with the package through the lazy_dt function and use the new compatible main_dt in our function. You can see that the two functions give the same result on a test firm and that we decrease the run time of the function by a factor of 2.

Warning

It turns out that the speed gain is in the advantage of the tidyverse implementation on my Mac Mini. This could be because I have installed a more recent version on my Mac Mini of the tidyverse. I am going to investigate this further at some point. Nevertheless it shows the advantage of timing the different implementations. So I am going to leave them here.

get_volatility <- function(gvkey_in, anndat_in, lag = 4){
  anndat_min_lag <- anndat_in - lag * 365 - 1
  surprises <- main %>%
    filter(gvkey == gvkey_in, anndat < anndat_in,
           anndat >= anndat_min_lag) %>%
    filter(!is.na(surprise)) %>%
    pull(surprise)
  volatility <- if_else(length(surprises) >= 4, sd(surprises),
                        NA_real_)
  return(volatility)
}
library(dtplyr)
main_dt <- lazy_dt(main)
get_volatility_dt <- function(gvkey_in, anndat_in, lag = 4){
  anndat_min_lag <- anndat_in - lag * 365 - 1
  surprises <- main_dt %>%
    filter(gvkey == gvkey_in, anndat < anndat_in,
           anndat >= anndat_min_lag) %>%
    filter(!is.na(surprise)) %>%
    pull(surprise)
  volatility <- if_else(length(surprises) >= 4, sd(surprises),
                        NA_real_)
  return(volatility)
}
test_key <- "001081"
test_anndat <- ymd("2006-02-15")
get_volatility(gvkey = test_key, anndat_in = test_anndat)
[1] 0.008746968
get_volatility_dt(gvkey = test_key, anndat_in = test_anndat)
[1] 0.008746968
microbenchmark::microbenchmark(vol = get_volatility(gvkey = test_key, anndat_in = test_anndat),
                               vol_dt = get_volatility_dt(gvkey = test_key, anndat_in = test_anndat),
                               times = 20)
Unit: milliseconds
   expr      min       lq     mean   median       uq      max neval
    vol 4.641323 4.707876 5.066754 4.759633 4.891806 10.46621    20
 vol_dt 6.113596 7.144397 7.832512 7.311755 7.512789 13.35067    20

We can now use the faster function to calculate the volatility measures with the other control variables.

subset <- main %>%
  filter(quantile %in% c(1,11)) %>%
  mutate(top = if_else(quantile == 11, 1, 0),
         log_size = log(market_value),
         volatility = pmap_dbl(list(gvkey, anndat),
                               ~ get_volatility(..1, ..2),
                               .progress = TRUE)) %>%
  mutate(log_size_adj = log_size - mean(log_size, na.rm = T),
         .by = c(quarter, year)) %>%
  mutate(size_decile = ntile(log_size_adj, 10)) %>%
  mutate(vol_decile = ntile(volatility, 10),
         .by = year)
saveRDS(subset, here("data", "freaky_friday", "subset.RDS"))

Regressions

Table 2

The tables do not really replicate which is interesting to me. For a number of reasons.

  • The results are more consistent. I wonder whether I got rid of more outliers earlier. Remember I did end up with less observations. One interpretation is that I have cleaned the data better, the other is that I got rid of important, influential observations by being too strict when cleaning the data.
  • The results for the short term CAR are consistent with the figure. Friday market reactions to bottom quantile surprises are more positive than non-friday market reactions and the sign flips for top quantile surprises.
  • I also lose substantially more observations due to the inclusion of the volatility measures. I do not know exactly why that is the case.

Panel A: Short Term CAR

subset <- readRDS(here("data", "freaky_friday", "subset.RDS"))
model1a <- feols(car_short ~ friday * top,
                 cluster = "anndat",
                 data = subset)
model2a <- feols(car_short ~ friday * top | (year[top] + month[top] + size_decile[top]),
                 cluster = "anndat",
                 data = subset)
The variable 'top' has been removed because of collinearity (see $collin.var).
model3a <- feols(car_short ~ friday * top | (year[top] + month[top] + size_decile[top] + vol_decile[top]),
                 cluster = "anndat",
                 data = subset)
NOTE: 6,839 observations removed because of NA values (Fixed-effects: 6,839).
The variable 'top' has been removed because of collinearity (see $collin.var).
msummary(list(model1a, model2a, model3a), gof_omit = gof_omit, stars = TRUE)
 (1)   (2)   (3)
(Intercept) −0.036***
(0.001)
friday 0.014*** 0.012*** 0.013**
(0.003) (0.003) (0.004)
top 0.061***
(0.002)
friday × top −0.023*** −0.020*** −0.021***
(0.004) (0.004) (0.005)
Num.Obs. 22486 22486 15647
R2 0.086 0.095 0.110
R2 Within 0.001 0.001
Std.Errors by: anndat by: anndat by: anndat
FE: size_decile X X
FE: year X X
FE: month X X
FE: vol_decile X
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001

Panel B: Long Term CAR

model1b <- feols(car_long ~ friday * top,
                 cluster = "anndat",
                 data = subset)
model2b <- feols(car_long ~ friday * top | (year[top] + month[top] + size_decile[top]),
                 cluster = "anndat",
                 data = subset)
The variable 'top' has been removed because of collinearity (see $collin.var).
model3b <- feols(car_long ~ friday * top | (year[top] + month[top] + size_decile[top] + vol_decile[top]),
                 cluster = "anndat",
                 data = subset)
NOTE: 6,839 observations removed because of NA values (Fixed-effects: 6,839).
The variable 'top' has been removed because of collinearity (see $collin.var).
msummary(list(model1b, model2b, model3b), gof_omit = gof_omit, stars = TRUE)
 (1)   (2)   (3)
(Intercept) −0.022***
(0.005)
friday −0.012 −0.012 −0.022
(0.013) (0.013) (0.015)
top 0.037***
(0.004)
friday × top 0.041** 0.043** 0.052**
(0.015) (0.014) (0.017)
Num.Obs. 22486 22486 15647
R2 0.006 0.035 0.041
R2 Within 0.001 0.001
Std.Errors by: anndat by: anndat by: anndat
FE: size_decile X X
FE: year X X
FE: month X X
FE: vol_decile X
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001

Table 3

main_extra <- main %>%
  mutate(log_size = log(market_value))  %>%
  mutate(log_size_adj = log_size - mean(log_size, na.rm = T),
         .by = c(quarter, year)) %>%
  mutate(size_decile = ntile(log_size_adj, 10))

model1 <- feols(car_short ~ friday * quantile,
                cluster = "anndat",
                data = main_extra)
model2 <- feols(car_short ~ friday + friday : quantile
                | (year[quantile] + month[quantile] + size_decile[quantile]),
                cluster = "anndat",
                data = main_extra)
model3 <- feols(car_long ~ friday * quantile,
                cluster = "anndat",
                data = main_extra)
model4 <- feols(car_long ~ friday  + friday : quantile
                | (year[quantile] + month[quantile] + size_decile[quantile]),
                cluster = "anndat",
                data = main_extra)

msummary(list(model1, model2, model3, model4), gof_omit = gof_omit, stars = TRUE)
 (1)   (2)   (3)   (4)
(Intercept) −0.041*** −0.015***
(0.001) (0.002)
friday 0.016*** 0.014*** −0.016* −0.015*
(0.002) (0.002) (0.008) (0.007)
quantile 0.006*** 0.003***
(0.000) (0.000)
friday × quantile −0.002*** −0.002*** 0.003** 0.003***
(0.000) (0.000) (0.001) (0.001)
Num.Obs. 130759 130759 130759 130759
R2 0.054 0.057 0.002 0.015
R2 Within 0.000 0.000
Std.Errors by: anndat by: anndat by: anndat by: anndat
FE: size_decile X X
FE: year X X
FE: month X X
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001

References

Dellavigna, Stefano, and Joshua M. Pollet. 2009. “Investor Inattention and Friday Earnings Announcements.” The Journal of Finance 64 (2): 709–49. https://doi.org/10.1111/j.1540-6261.2009.01447.x.