Load appropriate packages and libraries
Import CMJ data
Write data into an Excel file for export
Familiarize myself data structure, formatting, ranges, etc in Excel through exploratory sorting/filtering
Sort by date as output will be chronological
Remove outliers based on first and last dates of training and extreme total distance values
# Overall correlation
overall_cor <- gps_data_clean %>%
filter(`Total Distance` > 0) %>%
summarise(
correlation = cor(`Total Distance`, `Distance > 85%`,
method = "pearson", use = "complete.obs"),
n_observations = n()
) %>%
mutate(type = "Overall Dataset")
# Average weekly correlation per athlete
player_avg_cors <- gps_data_clean %>%
mutate(Week = floor_date(Date, "week")) %>%
group_by(player_id, Week) %>%
filter(n() >= 3) %>%
summarise(
sd_total = sd(`Total Distance`, na.rm = TRUE),
sd_hsd = sd(`Distance > 85%`, na.rm = TRUE),
correlation = if(sd_total > 0 && sd_hsd > 0) {
cor(`Total Distance`, `Distance > 85%`,
method = "pearson", use = "complete.obs")
} else {
NA_real_
},
n_observations = n(),
.groups = "drop"
) %>%
filter(!is.na(correlation)) %>%
group_by(player_id) %>%
summarise(
avg_correlation = mean(correlation, na.rm = TRUE),
total_weeks = n(),
total_observations = sum(n_observations)
) %>%
arrange(player_id)
# Begin ACWR
gps_data_clean <- gps_data_clean %>%
# Group by player so calculations are player-specific
group_by(player_id) %>%
arrange(Date) %>%
mutate(
# High Speed Distance calculations
Ac_HSD = round(rollmean(`Distance > 85%`, k = 7, fill = NA, align = "right"), 1),
Ch_HSD = round(rollmean(`Distance > 85%`, k = 28, fill = NA, align = "right"), 1),
ACWR_HSD = round(Ac_HSD / Ch_HSD, 2),
# Total Distance calculations
Ac_tdist = round(rollmean(`Total Distance`, k = 7, fill = NA, align = "right"), 1),
Ch_tdist = round(rollmean(`Total Distance`, k = 28, fill = NA, align = "right"), 1),
ACWR_tdist = round(Ac_tdist / Ch_tdist, 2)
) %>%
ungroup()
# Double check the results for one random player
example_player <- gps_data_clean %>%
filter(player_id == 69756) %>%
select(Date, `Distance > 85%`, Ac_HSD, Ch_HSD, ACWR_HSD,
`Total Distance`, Ac_tdist, Ch_tdist, ACWR_tdist)
# Get the exact first and last dates with Total Distance data for Player 69756
player_dates <- gps_data_clean %>%
filter(player_id == 69756) %>%
filter(!is.na(`Total Distance`)) %>%
summarise(
start_date = min(Date[`Total Distance` > 0]),
end_date = max(Date[`Total Distance` > 0])
)
# Get data within Player 69756's date range
player_data <- gps_data_clean %>%
filter(player_id == 69756) %>%
filter(Date >= player_dates$start_date & Date <= player_dates$end_date)
# Calculate weekly averages and exposures for Player 69756
weekly_distances <- player_data %>%
mutate(Week = floor_date(Date, "week")) %>%
group_by(Week) %>%
summarise(
`Total Distance` = mean(`Total Distance`, na.rm = TRUE),
`High Speed Distance` = sum(`Distance > 85%`, na.rm = TRUE),
`High Speed Exposures` = sum(`Distance > 85%` > 50, na.rm = TRUE)
)
# Create correlation plot with r value from overall_cor
correlation_plot <- ggplot(gps_data_clean %>% filter(`Total Distance` > 0),
aes(x = `Total Distance`, y = `Distance > 85%`)) +
geom_point(alpha = 0.3, color = "blue") +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(
title = "Total Distance vs High Speed Distance",
x = "Total Distance (m)",
y = "High Speed Distance (m)"
) +
annotate(
"text",
x = max(gps_data_clean$`Total Distance`, na.rm = TRUE) * 0.8,
y = max(gps_data_clean$`Distance > 85%`, na.rm = TRUE) * 0.9,
label = paste("r =", round(overall_cor$correlation, 2)),
size = 5,
fontface = "bold"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
)
# Save plot
ggsave("distance_correlation.png", correlation_plot, width = 10, height = 8, units = "in", dpi = 300, bg = "white")
# Create the plot with dual y-axes with formatting
player_plot <- ggplot() +
geom_bar(data = weekly_distances, aes(x = Week, y = `Total Distance`, fill = "Weekly Avg Total Dist"),
stat = "identity", alpha = 0.5, width = 6) +
geom_bar(data = weekly_distances, aes(x = Week, y = `High Speed Distance`, fill = "Weekly Total HSD"),
stat = "identity", alpha = 0.7, width = 6) +
geom_smooth(data = player_data,
aes(x = Date, y = ACWR_HSD * max(weekly_distances$`Total Distance`, na.rm = TRUE) / max(player_data$ACWR_HSD, na.rm = TRUE),
color = "ACWR HSD"), linewidth = 1, se = FALSE, span = 0.2) +
geom_smooth(data = player_data,
aes(x = Date, y = ACWR_tdist * max(weekly_distances$`Total Distance`, na.rm = TRUE) / max(player_data$ACWR_tdist, na.rm = TRUE),
color = "ACWR Total Dist"), linewidth = 1, se = FALSE, span = 0.2) +
geom_hline(yintercept = c(0.8, 1.3) * max(weekly_distances$`Total Distance`, na.rm = TRUE) / max(player_data$ACWR_HSD, na.rm = TRUE),
linetype = "dashed", color = "purple", linewidth = 0.8) +
scale_y_continuous(name = "Total Distance",
sec.axis = sec_axis(~. * max(player_data$ACWR_HSD, na.rm = TRUE) / max(weekly_distances$`Total Distance`, na.rm = TRUE),
name = "ACWR Ratio", breaks = seq(0, 2, by = 0.2))) +
scale_x_date(limits = c(player_dates$start_date, player_dates$end_date), expand = c(0, 0),
breaks = seq(from = player_dates$start_date, to = player_dates$end_date, by = "1 month"),
labels = scales::date_format("%b %Y")) +
scale_fill_manual(values = c("Weekly Avg Total Dist" = "blue", "Weekly Total HSD" = "red")) +
scale_color_manual(values = c("ACWR HSD" = "orange", "ACWR Total Dist" = "black")) +
theme_minimal() +
labs(title = "Player 69756 Total Distance and HSD Metrics", x = "Date", fill = "Bars", color = "Lines") +
theme(
plot.title = element_text(size = 24, face = "bold", hjust = 0.5, margin = margin(b = 20)),
axis.title = element_text(size = 16, face = "bold"),
axis.text = element_text(size = 12, face = "bold", color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12, face = "bold"),
legend.position = "bottom",
legend.box = "horizontal",
legend.margin = margin(t = 10)
)
Correlation of Total Distance vs High Speed Distance
Player 69756 HSD Trends
– Logic and Thought Process
– Findings and Practical Applications
– Limitations and Future Considerations