Familiarization and Cleaning

  1. Load appropriate packages and libraries

  2. Import CMJ data

  3. Familiarize myself data structure, formatting, ranges, etc in Excel through exploratory sorting/filtering

  4. Remove outliers based on weight (-1), team (NA), and metrics of +/- 3 standard deviations

Data Analysis

  1. Correlation analysis to evaluate similarity when selecting CMJ metrics, trying to avoid redundancy
# Reorganize data long to wide for correlation analysis
cmj_wide <- cmj_data_clean %>%
  group_by(player_id, test_date, metric) %>%
  summarize(mean_value = mean(value), .groups = 'drop') %>%
  pivot_wider(
    id_cols = c(player_id, test_date),
    names_from = metric,
    values_from = mean_value
  )

# Calculate correlations for metrics
cor_matrix <- round(cor(cmj_wide[,3:ncol(cmj_wide)], use = "complete.obs"), 2)
  1. Define selected CMJ metrics to be used for analysis
# Define the selected metrics
selected_metrics <- c("Concentric Mean Power / BM", "Eccentric Mean Power / BM", 
                      "Force at Zero Velocity", "RSI-modified", "Vertical Velocity at Takeoff")
  1. Calculate percentiles for Player 3185 vs Team and vs League Outfielders
# Calculate percentiles for Player 3185 vs Team
Team_comparison <- cmj_data_clean %>%
  filter(parent_team_name == "Team" | player_id == "3185") %>%
  group_by(metric) %>%
  filter(metric %in% selected_metrics) %>%
  summarize(
    player_value = mean(value[player_id == "3185"]),
    team_percentile = ecdf(value[parent_team_name == "Team"])(mean(value[player_id == "3185"])) * 100
  ) %>%
  ungroup()

# Calculate percentiles for Player 3185 vs Position 3 (excluding Team)
position_comparison <- cmj_data_clean %>%
  filter((primary_position == "3" & parent_team_name != "Team") | player_id == "3185") %>%
  group_by(metric) %>%
  filter(metric %in% selected_metrics) %>%
  summarize(
    player_value = mean(value[player_id == "3185"]),
    position_percentile = ecdf(value[primary_position == "3" & parent_team_name != "Team"])(mean(value[player_id == "3185"])) * 100
  ) %>%
  ungroup()}

Plot Creation

  1. Correlation plot creation
# Create correlation plot
png("correlation_plot.png", width = 1500, height = 1300, res = 150)

# Reformat plot for easier readability
corrplot(cor_matrix, method = "color", type = "upper", order = "alphabet", tl.col = "black", 
         tl.srt = 45, addCoef.col = "black", number.cex = 0.9, tl.cex = 0.9, mar = c(0,0,2,0))
dev.off()
  1. Formatting data for each plot creation
# For Team comparison
team_spider_data <- data.frame(
  matrix(c(100, 100, 100, 100, 100,
           0, 0, 0, 0, 0,
           50, 50, 50, 50, 50,
           team_comparison$team_percentile),
         nrow = 4, byrow = TRUE)
)
colnames(team_spider_data) <- selected_metrics
rownames(team_spider_data) <- c("max", "min", "team_avg", "player")

# For Position 3 comparison
position_spider_data <- data.frame(
  matrix(c(100, 100, 100, 100, 100,
           0, 0, 0, 0, 0,
           50, 50, 50, 50, 50,
           position_comparison$position_percentile),
         nrow = 4, byrow = TRUE)
)
colnames(position_spider_data) <- selected_metrics
rownames(position_spider_data) <- c("max", "min", "position_avg", "player")
  1. Creating each plot
# vs Team
png("player_vs_team_spider.png", width = 1000, height = 900, res = 100)
layout(matrix(c(1,2), nrow=2, ncol=1), heights=c(4,1))

# Spider plot
par(mar=c(3, 4, 2, 4))
radarchart(team_spider_data,
           pcol = c("blue", "red"),        
           pfcol = c(rgb(0, 0, 1, 0.2),   
                     rgb(1, 0, 0, 0.2)),   
           plwd = 2,
           cglcol = "grey",
           cglty = 1,
           axislabcol = "grey",
           caxislabels = seq(0, 100, 25),
           title = "Player 3185 vs Team (Percentiles)")
legend("topright", 
       legend = c("Team Average", "Player 3185"), 
       col = c("blue", "red"), 
       lwd = 2,
       pch = 20,
       pt.cex = 2,
       bty = "n")

# Percentiles text
par(mar=c(0,0,0,0))
plot.new()
text(0.5, 0.9, "Player 3185 Percentiles:", font=2)
for(i in 1:length(selected_metrics)) {
  text(0.5, 0.8-i*0.15,
       paste(selected_metrics[i], ": ", 
             round(team_comparison$team_percentile[i], 1), "%"))
}
dev.off()

# vs Position 3
png("player_vs_position3_spider.png", width = 1000, height = 900, res = 100)
layout(matrix(c(1,2), nrow=2, ncol=1), heights=c(4,1))

# Spider plot
par(mar=c(3, 4, 2, 4))
radarchart(position_spider_data,
           pcol = c("green", "red"),       
           pfcol = c(rgb(0, 1, 0, 0.2),   
                     rgb(1, 0, 0, 0.2)),   
           plwd = 2,
           cglcol = "grey",
           cglty = 1,
           axislabcol = "grey",
           caxislabels = seq(0, 100, 25),
           title = "Player 3185 vs Position 3 Players (Percentiles)")
legend("topright", 
       legend = c("Position 3 Average", "Player 3185"), 
       col = c("green", "red"), 
       lwd = 2,
       pch = 20,
       pt.cex = 2,
       bty = "n")

# Percentiles text
par(mar=c(0,0,0,0))
plot.new()
text(0.5, 0.9, "Player 3185 Percentiles:", font=2)
for(i in 1:length(selected_metrics)) {
  text(0.5, 0.8-i*0.15,
       paste(selected_metrics[i], ": ", 
             round(position_comparison$position_percentile[i], 1), "%"))
}
dev.off()

Visualizations

  1. Correlation Matrix of CMJ Metrics
    Correlation Matrix of CMJ Metrics

    Correlation Matrix of CMJ Metrics

  2. Player 3185 Percentiles vs Team
    Player 3185 Percentiles vs Team

    Player 3185 Percentiles vs Team

  3. Player 3185 Percentiles vs League Outfield Average
    Player 3185 Percentiles vs League Outfield Average

    Player 3185 Percentiles vs League Outfield Average

Key Considerations

Selected force plate metrics:

Logic and Thought Process

Findings and Practical Applications

Limitations and Future Considerations