Familiarization and Cleaning

  1. Load appropriate packages and libraries

  2. Import CMJ data

  3. Write data into an Excel file for export

  4. Familiarize myself data structure, formatting, ranges, etc in Excel through exploratory sorting/filtering

  5. Sort by date as output will be chronological

  6. Remove outliers based on first and last dates of training and extreme total distance values

Data Analysis

  1. Correlation of Total Distance and High Speed Distance for whole data set, as well as average weekly correlation per player
# Overall correlation
overall_cor <- gps_data_clean %>%
  filter(`Total Distance` > 0) %>%
  summarise(
    correlation = cor(`Total Distance`, `Distance > 85%`, 
                      method = "pearson", use = "complete.obs"),
    n_observations = n()
  ) %>%
  mutate(type = "Overall Dataset")

# Average weekly correlation per athlete
player_avg_cors <- gps_data_clean %>%
  mutate(Week = floor_date(Date, "week")) %>%
  group_by(player_id, Week) %>%
  filter(n() >= 3) %>%
  summarise(
    sd_total = sd(`Total Distance`, na.rm = TRUE),
    sd_hsd = sd(`Distance > 85%`, na.rm = TRUE),
    correlation = if(sd_total > 0 && sd_hsd > 0) {
      cor(`Total Distance`, `Distance > 85%`, 
          method = "pearson", use = "complete.obs")
    } else {
      NA_real_
    },
    n_observations = n(),
    .groups = "drop"
  ) %>%
  filter(!is.na(correlation)) %>%
  group_by(player_id) %>%
  summarise(
    avg_correlation = mean(correlation, na.rm = TRUE),
    total_weeks = n(),
    total_observations = sum(n_observations)
  ) %>%
  arrange(player_id)
  1. Calculate Acute Load, Chronic Load, and ACWR for both High Speed Distance and Total Distance
# Begin ACWR
gps_data_clean <- gps_data_clean %>%
  # Group by player so calculations are player-specific
  group_by(player_id) %>%
  arrange(Date) %>%
  mutate(
    # High Speed Distance calculations
    Ac_HSD = round(rollmean(`Distance > 85%`, k = 7, fill = NA, align = "right"), 1),
    Ch_HSD = round(rollmean(`Distance > 85%`, k = 28, fill = NA, align = "right"), 1),
    ACWR_HSD = round(Ac_HSD / Ch_HSD, 2),
    
    # Total Distance calculations
    Ac_tdist = round(rollmean(`Total Distance`, k = 7, fill = NA, align = "right"), 1),
    Ch_tdist = round(rollmean(`Total Distance`, k = 28, fill = NA, align = "right"), 1),
    ACWR_tdist = round(Ac_tdist / Ch_tdist, 2)
  ) %>%
  ungroup()
  1. Visually double-check ACWR calculations for execution and accuracy
# Double check the results for one random player
example_player <- gps_data_clean %>%
  filter(player_id == 69756) %>%
  select(Date, `Distance > 85%`, Ac_HSD, Ch_HSD, ACWR_HSD, 
         `Total Distance`, Ac_tdist, Ch_tdist, ACWR_tdist)
  1. Find date range of Player 69756’s data and filter out all dates before and after (for conciseness of plot)
# Get the exact first and last dates with Total Distance data for Player 69756
player_dates <- gps_data_clean %>%
  filter(player_id == 69756) %>%
  filter(!is.na(`Total Distance`)) %>%
  summarise(
    start_date = min(Date[`Total Distance` > 0]),
    end_date = max(Date[`Total Distance` > 0])
  )

# Get data within Player 69756's date range
player_data <- gps_data_clean %>%
  filter(player_id == 69756) %>%
  filter(Date >= player_dates$start_date & Date <= player_dates$end_date)
  1. Calculate weekly averages and sums of distance metrics to be used in plot
# Calculate weekly averages and exposures for Player 69756
weekly_distances <- player_data %>%
  mutate(Week = floor_date(Date, "week")) %>%
  group_by(Week) %>%
  summarise(
    `Total Distance` = mean(`Total Distance`, na.rm = TRUE),
    `High Speed Distance` = sum(`Distance > 85%`, na.rm = TRUE),
    `High Speed Exposures` = sum(`Distance > 85%` > 50, na.rm = TRUE)
  )

Plot Creation

  1. Plot creation for distance metrics correlation
# Create correlation plot with r value from overall_cor
correlation_plot <- ggplot(gps_data_clean %>% filter(`Total Distance` > 0), 
                           aes(x = `Total Distance`, y = `Distance > 85%`)) +
  geom_point(alpha = 0.3, color = "blue") +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(
    title = "Total Distance vs High Speed Distance",
    x = "Total Distance (m)",
    y = "High Speed Distance (m)"
  ) +
  annotate(
    "text",
    x = max(gps_data_clean$`Total Distance`, na.rm = TRUE) * 0.8,
    y = max(gps_data_clean$`Distance > 85%`, na.rm = TRUE) * 0.9,
    label = paste("r =", round(overall_cor$correlation, 2)),
    size = 5,
    fontface = "bold"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    plot.background = element_rect(fill = "white", color = NA),
    panel.background = element_rect(fill = "white", color = NA)
  )

# Save plot
ggsave("distance_correlation.png", correlation_plot, width = 10, height = 8, units = "in", dpi = 300, bg = "white")
  1. Plot creation for distance metrics trends
# Create the plot with dual y-axes with formatting
player_plot <- ggplot() +
  geom_bar(data = weekly_distances, aes(x = Week, y = `Total Distance`, fill = "Weekly Avg Total Dist"), 
           stat = "identity", alpha = 0.5, width = 6) +
  geom_bar(data = weekly_distances, aes(x = Week, y = `High Speed Distance`, fill = "Weekly Total HSD"), 
           stat = "identity", alpha = 0.7, width = 6) +
  geom_smooth(data = player_data, 
              aes(x = Date, y = ACWR_HSD * max(weekly_distances$`Total Distance`, na.rm = TRUE) / max(player_data$ACWR_HSD, na.rm = TRUE),
                  color = "ACWR HSD"), linewidth = 1, se = FALSE, span = 0.2) +
  geom_smooth(data = player_data,
              aes(x = Date, y = ACWR_tdist * max(weekly_distances$`Total Distance`, na.rm = TRUE) / max(player_data$ACWR_tdist, na.rm = TRUE),
                  color = "ACWR Total Dist"), linewidth = 1, se = FALSE, span = 0.2) +
  geom_hline(yintercept = c(0.8, 1.3) * max(weekly_distances$`Total Distance`, na.rm = TRUE) / max(player_data$ACWR_HSD, na.rm = TRUE),
             linetype = "dashed", color = "purple", linewidth = 0.8) +
  scale_y_continuous(name = "Total Distance",
                     sec.axis = sec_axis(~. * max(player_data$ACWR_HSD, na.rm = TRUE) / max(weekly_distances$`Total Distance`, na.rm = TRUE), 
                                         name = "ACWR Ratio", breaks = seq(0, 2, by = 0.2))) +
  scale_x_date(limits = c(player_dates$start_date, player_dates$end_date), expand = c(0, 0),
               breaks = seq(from = player_dates$start_date, to = player_dates$end_date, by = "1 month"),
               labels = scales::date_format("%b %Y")) +
  scale_fill_manual(values = c("Weekly Avg Total Dist" = "blue", "Weekly Total HSD" = "red")) +
  scale_color_manual(values = c("ACWR HSD" = "orange", "ACWR Total Dist" = "black")) +
  theme_minimal() +
  labs(title = "Player 69756 Total Distance and HSD Metrics", x = "Date", fill = "Bars", color = "Lines") +
  theme(
    plot.title = element_text(size = 24, face = "bold", hjust = 0.5, margin = margin(b = 20)),
    axis.title = element_text(size = 16, face = "bold"),
    axis.text = element_text(size = 12, face = "bold", color = "black"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_text(size = 14, face = "bold"),
    legend.text = element_text(size = 12, face = "bold"),
    legend.position = "bottom",
    legend.box = "horizontal",
    legend.margin = margin(t = 10)
  )

Visualizations

  1. Correlation of Total Distance vs High Speed Distance
    Correlation of Total Distance vs High Speed Distance

    Correlation of Total Distance vs High Speed Distance

  2. Player 69756 HSD Trends: HSD and Total Distance Averages, Totals, and ACWRs
    Player 69756 HSD Trends

    Player 69756 HSD Trends

Key Considerations

Logic and Thought Process

Findings and Practical Applications

Limitations and Future Considerations