foul_analysis.R 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # Analysis of foul data
  2. library(tidyverse)
  3. datafiles <- list.files("data", pattern="*-allfouls.csv", full.names = FALSE)
  4. allfouls <- purrr::map_df(datafiles,
  5. ~read_csv(paste0("data/", .x)),
  6. .id = "filename")
  7. ## Plots
  8. # histogram of fouls as a function of corrected score margin
  9. png('figures/foul_histogram-all.png')
  10. ggplot(allfouls, aes(x=SCOREMARGIN_CORR)) +
  11. geom_histogram(binwidth=1, fill="black") +
  12. theme_bw() +
  13. scale_y_log10() +
  14. xlab("Score Margin") + ylab("N Fouls")
  15. dev.off()
  16. # histogram of fouls as a function of corrected score margin,
  17. # ignoring overtime and the final minute of regular play
  18. earlyfouls <- filter(allfouls,
  19. PERIOD <= 4,
  20. !(PERIOD == 4 & PCTIMESTRING < "00:01:00"))
  21. png('figures/foul_histogram-regular_nofinalmin.png')
  22. ggplot(earlyfouls, aes(x=SCOREMARGIN_CORR)) +
  23. geom_histogram(binwidth=1, fill="red", alpha=0.5) +
  24. theme_bw() +
  25. scale_y_log10() +
  26. xlab("Score Margin") + ylab("N Fouls") +
  27. geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5)
  28. dev.off()
  29. # hexbin plots of fouls as a function of total score and corrected score
  30. # margin, separated by home and away teams
  31. png('figures/fouls_totalscore-hexbin-byhomevisitor.png', height=600, width=1200)
  32. ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) +
  33. geom_hex() +
  34. scale_fill_viridis_c() +
  35. theme_bw() +
  36. facet_wrap(vars(FOULTEAM))
  37. dev.off()
  38. # hexbin plots of fouls as a function of total score and corrected score
  39. # margin, separated by season
  40. png('figures/fouls_totalscore-hexbin-byseason.png', height=1200, width=1200)
  41. ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) +
  42. geom_hex() +
  43. scale_fill_viridis_c() +
  44. theme_bw() +
  45. facet_wrap(vars(filename))
  46. dev.off()