| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- # Analysis of foul data
- library(tidyverse)
- datafiles <- list.files("data", pattern="*-allfouls.csv", full.names = FALSE)
- allfouls <- purrr::map_df(datafiles,
- ~read_csv(paste0("data/", .x)),
- .id = "filename")
- ## Plots
- # histogram of fouls as a function of corrected score margin
- png('figures/foul_histogram-all.png')
- ggplot(allfouls, aes(x=SCOREMARGIN_CORR)) +
- geom_histogram(binwidth=1, fill="black") +
- theme_bw() +
- scale_y_log10() +
- xlab("Score Margin") + ylab("N Fouls")
- dev.off()
- median(allfouls$SCOREMARGIN_CORR)
- mean(allfouls$SCOREMARGIN_CORR)
- sd(allfouls$SCOREMARGIN_CORR)
- # histogram of fouls as a function of corrected score margin,
- # ignoring overtime and the final minute of regular play
- earlyfouls <- filter(allfouls,
- PERIOD <= 4,
- !(PERIOD == 4 & PCTIMESTRING < "00:01:00"))
- median(earlyfouls$SCOREMARGIN_CORR)
- mean(earlyfouls$SCOREMARGIN_CORR)
- sd(earlyfouls$SCOREMARGIN_CORR)
- png('figures/foul_histogram-regular_nofinalmin.png')
- ggplot(earlyfouls, aes(x=SCOREMARGIN_CORR)) +
- geom_histogram(binwidth=1, fill="red", alpha=0.5) +
- theme_bw() +
- scale_y_log10() +
- xlab("Score Margin") + ylab("N Fouls") +
- geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5)
- dev.off()
- # look at the distribution of fouls in the final minute
- lastminfouls <- filter(allfouls,
- (PERIOD == 4 & PCTIMESTRING < "00:01:00"))
- median(lastminfouls$SCOREMARGIN_CORR)
- mean(lastminfouls$SCOREMARGIN_CORR)
- sd(lastminfouls$SCOREMARGIN_CORR)
- png('figures/foul_histogram-regular_finalmin.png')
- ggplot(lastminfouls, aes(x=SCOREMARGIN_CORR)) +
- geom_histogram(binwidth=1, fill="red", alpha=0.5) +
- theme_bw() +
- scale_y_log10() +
- xlab("Score Margin") + ylab("N Fouls") +
- geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5)
- dev.off()
- # overtime fouls
- overtimefouls <- filter(allfouls,
- PERIOD > 4)
- median(overtimefouls$SCOREMARGIN_CORR)
- mean(overtimefouls$SCOREMARGIN_CORR)
- sd(overtimefouls$SCOREMARGIN_CORR)
- png('figures/foul_histogram-overtime.png')
- ggplot(overtimefouls, aes(x=SCOREMARGIN_CORR)) +
- geom_histogram(binwidth=1, fill="red", alpha=0.5) +
- theme_bw() +
- scale_y_log10() +
- xlab("Score Margin") + ylab("N Fouls") +
- geom_histogram(data=allfouls, binwidth=1, fill="green", alpha=0.5)
- dev.off()
- # hexbin plots of fouls as a function of total score and corrected score
- # margin, separated by home and away teams
- png('figures/fouls_totalscore-hexbin-byhomevisitor.png', height=600, width=1200)
- ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) +
- geom_hex() +
- scale_fill_viridis_c() +
- theme_bw() +
- facet_wrap(vars(FOULTEAM))
- dev.off()
- # hexbin plots of fouls as a function of total score and corrected score
- # margin, separated by season
- png('figures/fouls_totalscore-hexbin-byseason.png', height=1200, width=1200)
- ggplot(allfouls, aes(SCOREMARGIN_CORR, TOTALSCORE)) +
- geom_hex() +
- scale_fill_viridis_c() +
- theme_bw() +
- facet_wrap(vars(filename))
- dev.off()
|