foul_analysis.R 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # Analysis of foul data
  2. library(tidyverse)
  3. datafiles <- list.files("data", pattern="*-allfouls.csv", full.names = FALSE)
  4. allfouls <- purrr::map_df(datafiles,
  5. ~read_csv(paste0("data/", .x)),
  6. .id = "filename")
  7. ## Plots
  8. # histogram of fouls as a function of corrected score margin
  9. png('figures/foul_histogram-all.png')
  10. ggplot(allfouls, aes(x=SCOREMARGIN_CORR)) +
  11. geom_histogram(binwidth=1, fill="black") +
  12. theme_bw() +
  13. scale_y_log10() +
  14. xlab("Score Margin") + ylab("N Fouls")
  15. dev.off()
  16. median(allfouls$SCOREMARGIN_CORR)
  17. mean(allfouls$SCOREMARGIN_CORR)
  18. sd(allfouls$SCOREMARGIN_CORR)
  19. # histogram of fouls as a function of corrected score margin,
  20. # ignoring overtime and the final minute of regular play
  21. earlyfouls <- filter(allfouls,
  22. PERIOD <= 4,
  23. !(PERIOD == 4 & PCTIMESTRING < "00:01:00"))
  24. median(earlyfouls$SCOREMARGIN_CORR)
  25. mean(earlyfouls$SCOREMARGIN_CORR)
  26. sd(earlyfouls$SCOREMARGIN_CORR)
  27. png('figures/foul_histogram-regular_nofinalmin.png')
  28. ggplot(earlyfouls, aes(x=SCOREMARGIN_CORR)) +
  29. geom_histogram(binwidth=1, fill="red", alpha=0.5) +
  30. theme_bw() +
  31. scale_y_log10() +
  32. xlab("Score Margin") + ylab("N Fouls") +
  33. geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5)
  34. dev.off()
  35. # overtime fouls
  36. overtimefouls <- filter(allfouls,
  37. PERIOD > 4)
  38. median(overtimefouls$SCOREMARGIN_CORR)
  39. mean(overtimefouls$SCOREMARGIN_CORR)
  40. sd(overtimefouls$SCOREMARGIN_CORR)
  41. png('figures/foul_histogram-overtime.png')
  42. ggplot(overtimefouls, aes(x=SCOREMARGIN_CORR)) +
  43. geom_histogram(binwidth=1, fill="red", alpha=0.5) +
  44. theme_bw() +
  45. scale_y_log10() +
  46. xlab("Score Margin") + ylab("N Fouls") +
  47. geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5)
  48. dev.off()
  49. # hexbin plots of fouls as a function of total score and corrected score
  50. # margin, separated by home and away teams
  51. png('figures/fouls_totalscore-hexbin-byhomevisitor.png', height=600, width=1200)
  52. ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) +
  53. geom_hex() +
  54. scale_fill_viridis_c() +
  55. theme_bw() +
  56. facet_wrap(vars(FOULTEAM))
  57. dev.off()
  58. # hexbin plots of fouls as a function of total score and corrected score
  59. # margin, separated by season
  60. png('figures/fouls_totalscore-hexbin-byseason.png', height=1200, width=1200)
  61. ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) +
  62. geom_hex() +
  63. scale_fill_viridis_c() +
  64. theme_bw() +
  65. facet_wrap(vars(filename))
  66. dev.off()