# Analysis of foul data library(tidyverse) datafiles <- list.files("data", pattern="*-allfouls.csv", full.names = FALSE) allfouls <- purrr::map_df(datafiles, ~read_csv(paste0("data/", .x)), .id = "filename") ## Plots # histogram of fouls as a function of corrected score margin png('figures/foul_histogram-all.png') ggplot(allfouls, aes(x=SCOREMARGIN_CORR)) + geom_histogram(binwidth=1, fill="black") + theme_bw() + scale_y_log10() + xlab("Score Margin") + ylab("N Fouls") dev.off() # histogram of fouls as a function of corrected score margin, # ignoring overtime and the final minute of regular play earlyfouls <- filter(allfouls, PERIOD <= 4, !(PERIOD == 4 & PCTIMESTRING < "00:01:00")) png('figures/foul_histogram-regular_nofinalmin.png') ggplot(earlyfouls, aes(x=SCOREMARGIN_CORR)) + geom_histogram(binwidth=1, fill="red", alpha=0.5) + theme_bw() + scale_y_log10() + xlab("Score Margin") + ylab("N Fouls") + geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5) dev.off() # hexbin plots of fouls as a function of total score and corrected score # margin, separated by home and away teams png('figures/fouls_totalscore-hexbin-byhomevisitor.png', height=600, width=1200) ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) + geom_hex() + scale_fill_viridis_c() + theme_bw() + facet_wrap(vars(FOULTEAM)) dev.off() # hexbin plots of fouls as a function of total score and corrected score # margin, separated by season png('figures/fouls_totalscore-hexbin-byseason.png', height=1200, width=1200) ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) + geom_hex() + scale_fill_viridis_c() + theme_bw() + facet_wrap(vars(filename)) dev.off()