# Analysis of foul data library(tidyverse) allfouls <- read_csv("data/foulsonly.csv") ## Plots # histogram of fouls as a function of corrected score margin png('figures/foul_histogram-all.png') ggplot(allfouls, aes(x=SCOREMARGIN_CORR)) + geom_histogram(binwidth=1, fill="black") + theme_bw() + scale_y_log10() + xlab("Score Margin") + ylab("N Fouls") dev.off() # histogram of fouls as a function of corrected score margin, # ignoring overtime and the final minute of regular play earlyfouls <- filter(allfouls, PERIOD <= 4, !(PERIOD == 4 & PCTIMESTRING < "00:01:00")) png('figures/foul_histogram-regular_nofinalmin.png') ggplot(earlyfouls, aes(x=SCOREMARGIN_CORR)) + geom_histogram(binwidth=1, fill="red", alpha=0.5) + theme_bw() + scale_y_log10() + xlab("Score Margin") + ylab("N Fouls") + geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5) dev.off() # hexbin plots of fouls as a function of total score and corrected score # margin, separated by home and away teams png('figures/fouls_totalscore-hexbin.png', height=600, width=1200) ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) + geom_hex() + scale_fill_viridis_c() + theme_bw() + facet_wrap(vars(FOULTEAM)) dev.off()