| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- # Analysis of foul data
- library(tidyverse)
- datafiles <- list.files("data", pattern="*-allfouls.csv", full.names = FALSE)
- allfouls <- purrr::map_df(datafiles,
- ~read_csv(paste0("data/", .x)),
- .id = "filename")
- ## Plots
- # histogram of fouls as a function of corrected score margin
- png('figures/foul_histogram-all.png')
- ggplot(allfouls, aes(x=SCOREMARGIN_CORR)) +
- geom_histogram(binwidth=1, fill="black") +
- theme_bw() +
- scale_y_log10() +
- xlab("Score Margin") + ylab("N Fouls")
- dev.off()
- # histogram of fouls as a function of corrected score margin,
- # ignoring overtime and the final minute of regular play
- earlyfouls <- filter(allfouls,
- PERIOD <= 4,
- !(PERIOD == 4 & PCTIMESTRING < "00:01:00"))
- png('figures/foul_histogram-regular_nofinalmin.png')
- ggplot(earlyfouls, aes(x=SCOREMARGIN_CORR)) +
- geom_histogram(binwidth=1, fill="red", alpha=0.5) +
- theme_bw() +
- scale_y_log10() +
- xlab("Score Margin") + ylab("N Fouls") +
- geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5)
- dev.off()
- # hexbin plots of fouls as a function of total score and corrected score
- # margin, separated by home and away teams
- png('figures/fouls_totalscore-hexbin-byhomevisitor.png', height=600, width=1200)
- ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) +
- geom_hex() +
- scale_fill_viridis_c() +
- theme_bw() +
- facet_wrap(vars(FOULTEAM))
- dev.off()
- # hexbin plots of fouls as a function of total score and corrected score
- # margin, separated by season
- png('figures/fouls_totalscore-hexbin-byseason.png', height=1200, width=1200)
- ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) +
- geom_hex() +
- scale_fill_viridis_c() +
- theme_bw() +
- facet_wrap(vars(filename))
- dev.off()
|