foul_analysis.R 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. # Analysis of foul data
  2. library(tidyverse)
  3. datafiles <- list.files("data", pattern="*-allfouls.csv", full.names = FALSE)
  4. allfouls <- purrr::map_df(datafiles,
  5. ~read_csv(paste0("data/", .x)),
  6. .id = "filename")
  7. ## Plots
  8. # histogram of fouls as a function of corrected score margin
  9. png('figures/foul_histogram-all.png')
  10. ggplot(allfouls, aes(x=SCOREMARGIN_CORR)) +
  11. geom_histogram(binwidth=1, fill="black") +
  12. theme_bw() +
  13. scale_y_log10() +
  14. xlab("Score Margin") + ylab("N Fouls")
  15. dev.off()
  16. median(allfouls$SCOREMARGIN_CORR)
  17. mean(allfouls$SCOREMARGIN_CORR)
  18. sd(allfouls$SCOREMARGIN_CORR)
  19. # histogram of fouls as a function of corrected score margin,
  20. # ignoring overtime and the final minute of regular play
  21. earlyfouls <- filter(allfouls,
  22. PERIOD <= 4,
  23. !(PERIOD == 4 & PCTIMESTRING < "00:01:00"))
  24. median(earlyfouls$SCOREMARGIN_CORR)
  25. mean(earlyfouls$SCOREMARGIN_CORR)
  26. sd(earlyfouls$SCOREMARGIN_CORR)
  27. png('figures/foul_histogram-regular_nofinalmin.png')
  28. ggplot(earlyfouls, aes(x=SCOREMARGIN_CORR)) +
  29. geom_histogram(binwidth=1, fill="red", alpha=0.5) +
  30. theme_bw() +
  31. scale_y_log10() +
  32. xlab("Score Margin") + ylab("N Fouls") +
  33. geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5)
  34. dev.off()
  35. # look at the distribution of fouls in the final minute
  36. lastminfouls <- filter(allfouls,
  37. (PERIOD == 4 & PCTIMESTRING < "00:01:00"))
  38. median(lastminfouls$SCOREMARGIN_CORR)
  39. mean(lastminfouls$SCOREMARGIN_CORR)
  40. sd(lastminfouls$SCOREMARGIN_CORR)
  41. png('figures/foul_histogram-regular_finalmin.png')
  42. ggplot(lastminfouls, aes(x=SCOREMARGIN_CORR)) +
  43. geom_histogram(binwidth=1, fill="red", alpha=0.5) +
  44. theme_bw() +
  45. scale_y_log10() +
  46. xlab("Score Margin") + ylab("N Fouls") +
  47. geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5)
  48. dev.off()
  49. # overtime fouls
  50. overtimefouls <- filter(allfouls,
  51. PERIOD > 4)
  52. median(overtimefouls$SCOREMARGIN_CORR)
  53. mean(overtimefouls$SCOREMARGIN_CORR)
  54. sd(overtimefouls$SCOREMARGIN_CORR)
  55. png('figures/foul_histogram-overtime.png')
  56. ggplot(overtimefouls, aes(x=SCOREMARGIN_CORR)) +
  57. geom_histogram(binwidth=1, fill="red", alpha=0.5) +
  58. theme_bw() +
  59. scale_y_log10() +
  60. xlab("Score Margin") + ylab("N Fouls") +
  61. geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5)
  62. dev.off()
  63. # hexbin plots of fouls as a function of total score and corrected score
  64. # margin, separated by home and away teams
  65. png('figures/fouls_totalscore-hexbin-byhomevisitor.png', height=600, width=1200)
  66. ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) +
  67. geom_hex() +
  68. scale_fill_viridis_c() +
  69. theme_bw() +
  70. facet_wrap(vars(FOULTEAM))
  71. dev.off()
  72. # hexbin plots of fouls as a function of total score and corrected score
  73. # margin, separated by season
  74. png('figures/fouls_totalscore-hexbin-byseason.png', height=1200, width=1200)
  75. ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) +
  76. geom_hex() +
  77. scale_fill_viridis_c() +
  78. theme_bw() +
  79. facet_wrap(vars(filename))
  80. dev.off()