Forecasting FPL player scores
I play in a premier league fantasy league, but I don’t watch soccer at all. So I’m interested in making a model that can predict player scores in the upcoming weeks.
Stats from fpl are accesible through the fpl package.
library(pacman)
p_load(tidyverse, magrittr, pander, fplr, goalmodel, rap, jsonlite, glue)
source('../../src/extra.R', echo = F, encoding="utf-8")
current_week <- 29
user_id <- 5367610
players <- fpl_get_player_all()
fixtures <- fpl_get_fixtures()
teams <- fpl_get_teams()
players <- full_join(players, teams %>% select(code, team_name = name), by = c("team_code" = "code"))
By summing up the previous fixtures, we can get the number of goals scored by team.
get_goals_scored <- function(week){
away_scores <- fixtures %>%
filter(event < week) %>%
group_by(team_a) %>%
summarise(score_a = sum(team_a_score, na.rm = T)) %>%
rename(team = team_a)
home_scores <- fixtures %>%
filter(event < week) %>%
group_by(team_h) %>%
summarise(score_h = sum(team_h_score, na.rm = T)) %>%
rename(team = team_h)
inner_join(home_scores, away_scores, by = "team") %>%
mutate(score = score_h + score_a, goals_per_week = score / current_week)
}
goals_scored <- get_goals_scored(current_week)
I use the goalmodel package to predict goals scored in specific matchups. A multiplier is calculated, which is the predicted number of goals divided by the average number of goals scored by the team in a gameweek.
get_predictions_gameweek <- function(week){
f <- fixtures %>% filter(event < current_week)
gm_res <- goalmodel(goals1 = f$team_h_score, goals2 = f$team_a_score,
team1 = f$team_h, team2 = f$team_a)
week <- fixtures %>% filter(event == week)
predv <- predict_expg(gm_res, team1=week$team_h, team2=week$team_a, return_df = T)
bind_rows(
predv %>% select(team = team1, exp = expg1, goals_against = expg2),
predv %>% select(team = team2, exp = expg2, goals_against = expg1)
) %>%
mutate(team = as.numeric(team)) %>%
inner_join(goals_scored, by = "team") %>%
mutate(multiplier = exp / goals_per_week)
}
f <- fixtures %>% filter(event < current_week)
Now we calculate the predicted points for a player in a specific fixture.
I make a simple model for the the upcoming fixtures based on the following principles:
Note that two important elements are ignored in this simple model: Chance to score goals for defenders, and form of players.
Defenders and goalkeepers score 4 points for a clean sheet, and get -1 for each two goals scored against them. I ignore the risk of conceding 4+ goals, and thus only 4 and -1 are modelled, using a poisson distribution based on the expected goal score against.
The estimate of the defender’s bonus points are calculated by making the simplifying assumption that it is related only to the number of clean sheets. If the player has at least three clean sheets, use an individualized bonus per clean sheet value for that player, otherwise the average is used. There is no need to make a seperate consideration for bonus points for forwards and midfielders, since this is already included in the total score.
get_mean_bonus_per_clean_sheet <- function(position){
players %>%
filter(clean_sheets >= 3, bonus > 1, element_type == position) %>%
mutate(bonus_per_clean_sheet = clean_sheets / bonus) %>%
summarise(a = mean(bonus_per_clean_sheet)) %>% pull(a)
}
mean_goalkeeper_bonus_per_clean_sheet <- get_mean_bonus_per_clean_sheet(1)
mean_defender_bonus_per_clean_sheet <- get_mean_bonus_per_clean_sheet(2)
players %<>% mutate(
bonus_per_clean_sheet = ifelse(
clean_sheets >= 3 & bonus > 1,
clean_sheets / bonus,
ifelse(element_type == 1,
mean_goalkeeper_bonus_per_clean_sheet,
mean_defender_bonus_per_clean_sheet))
)
players_get_points <- function(week){
week_predictions <- get_predictions_gameweek(week)
right_join(week_predictions, players, by = "team") %>%
mutate(multiplier = ifelse(is.na(multiplier), 0, multiplier)) %>%
filter(minutes > 500) %>%
mutate(expected_score = case_when(
element_type %in% c(3, 4) ~ points_per_game * multiplier,
element_type %in% c(1, 2) ~ 2 + ppois(0, lambda = goals_against) * (4 + bonus_per_clean_sheet) -
1 * ppois(2, lambda = goals_against, lower = F))) %>%
group_by(id, second_name) %>%
summarise(expected_score = sum(expected_score)) %>%
rename(!!quo_name(glue("gw_{week}")) := expected_score) %>%
ungroup()
}
Use the model to list the best players to get over the upcoming gameweeks
get_forecast <- function(week){
players_get_points(week) %>%
inner_join(players_get_points(week + 1), by = c("id", "second_name")) %>%
inner_join(players_get_points(week + 2), by = c("id", "second_name")) %>%
inner_join(players_get_points(week + 3), by = c("id", "second_name")) %>%
mutate(average = (!!sym(glue("gw_{week}")) + !!sym(glue("gw_{week + 1}")) +
!!sym(glue("gw_{week + 2}")) + !!sym(glue("gw_{week + 3}"))) / 4) %>%
left_join(players, by = c("id", "second_name")) %>%
mutate(position = case_when(
element_type == 1 ~ "G", element_type == 2 ~ "D",
element_type == 3 ~ "M", element_type == 4 ~ "F")) %>%
filter(chance_of_playing_next_round > 0) %>%
select(second_name, team_name, playing = chance_of_playing_next_round, position, ppg = points_per_game, !!sym(glue("gw_{week}")), !!sym(glue("gw_{week + 1}")),
!!sym(glue("gw_{week + 2}")), !!sym(glue("gw_{week + 3}")), average)
}
fcast <- get_forecast(current_week) %>%
arrange(desc(average))
fcast %>% head(30)
second_name | team_name | playing | position | ppg | gw_29 | gw_30 | gw_31 | gw_32 | average |
---|---|---|---|---|---|---|---|---|---|
Aubameyang | Arsenal | 100 | F | 6 | 13.5 | 6.25 | 0 | 9.24 | 7.26 |
Salah | Liverpool | 100 | M | 7.1 | 9.6 | 7.27 | 6.61 | 5.32 | 7.2 |
Mané | Liverpool | 100 | M | 6.6 | 8.93 | 6.76 | 6.15 | 4.95 | 6.7 |
Jiménez | Wolves | 100 | F | 5.2 | 6.32 | 6.11 | 7.19 | 6.95 | 6.64 |
De Bruyne | Man City | 75 | M | 6.8 | 13.4 | 8.13 | 0 | 4.31 | 6.45 |
Agüero | Man City | 100 | F | 5.9 | 11.6 | 7.05 | 0 | 3.74 | 5.6 |
Vardy | Leicester | 100 | F | 6.2 | 9.73 | 6.28 | 0 | 6.32 | 5.58 |
Mahrez | Man City | 100 | M | 5.5 | 10.8 | 6.57 | 0 | 3.49 | 5.22 |
Traoré | Wolves | 100 | M | 4 | 4.86 | 4.7 | 5.53 | 5.35 | 5.11 |
Alli | Spurs | 100 | M | 5.1 | 4.79 | 4.47 | 7.05 | 3.12 | 4.86 |
Sterling | Man City | 100 | M | 5 | 9.84 | 5.98 | 0 | 3.17 | 4.75 |
Jota | Wolves | 100 | F | 3.6 | 4.38 | 4.23 | 4.98 | 4.81 | 4.6 |
Gomez | Liverpool | 100 | D | 4 | 5.35 | 4.43 | 5.54 | 3.04 | 4.59 |
San Miguel del Castillo | Liverpool | 100 | G | 2.5 | 5.24 | 4.35 | 5.42 | 3 | 4.5 |
Matip | Liverpool | 100 | D | 4.1 | 5.21 | 4.33 | 5.38 | 2.99 | 4.48 |
Lovren | Liverpool | 100 | D | 1.9 | 5.21 | 4.33 | 5.38 | 2.99 | 4.48 |
Lacazette | Arsenal | 100 | F | 3.7 | 8.35 | 3.86 | 0 | 5.7 | 4.48 |
Pépé | Arsenal | 100 | M | 3.7 | 8.35 | 3.86 | 0 | 5.7 | 4.48 |
Silva | Man City | 100 | M | 4.7 | 9.25 | 5.62 | 0 | 2.98 | 4.46 |
van Dijk | Liverpool | 100 | D | 4.8 | 5.14 | 4.28 | 5.32 | 2.96 | 4.43 |
Sarr | Watford | 100 | M | 4.2 | 3.34 | 3.47 | 4.07 | 6.19 | 4.27 |
Robertson | Liverpool | 100 | D | 4.9 | 4.92 | 4.12 | 5.08 | 2.88 | 4.25 |
Alexander-Arnold | Liverpool | 100 | D | 5.7 | 4.84 | 4.06 | 5 | 2.86 | 4.19 |
Pulisic | Chelsea | 25 | M | 4.4 | 5.49 | 5.82 | 0 | 5.11 | 4.11 |
Deeney | Watford | 100 | F | 4 | 3.18 | 3.31 | 3.88 | 5.9 | 4.07 |
Saïss | Wolves | 100 | D | 2.7 | 4.33 | 3.56 | 4.61 | 3.66 | 4.04 |
de Jesus | Man City | 100 | F | 4.2 | 8.26 | 5.02 | 0 | 2.66 | 3.99 |
Cahill | Crystal Palace | 100 | D | 2.8 | 4.66 | 4.26 | 2.39 | 4.31 | 3.91 |
Maddison | Leicester | 100 | M | 4.3 | 6.75 | 4.35 | 0 | 4.39 | 3.87 |
Snodgrass | West Ham | 100 | M | 3.7 | 3.68 | 3.72 | 3.55 | 4.5 | 3.86 |
Forecast for players on my team
my_team <- players %>% inner_join(
fromJSON(glue("https://fantasy.premierleague.com/api/entry/{user_id}/event/{current_week -1}/picks/"),
simplifyVector = TRUE)$picks %>% mutate(id = element), by = "id")
fcast %>% filter(second_name %in% my_team$second_name)
second_name | team_name | playing | position | ppg | gw_29 | gw_30 | gw_31 | gw_32 | average |
---|---|---|---|---|---|---|---|---|---|
Mané | Liverpool | 100 | M | 6.6 | 8.93 | 6.76 | 6.15 | 4.95 | 6.7 |
Jiménez | Wolves | 100 | F | 5.2 | 6.32 | 6.11 | 7.19 | 6.95 | 6.64 |
Vardy | Leicester | 100 | F | 6.2 | 9.73 | 6.28 | 0 | 6.32 | 5.58 |
van Dijk | Liverpool | 100 | D | 4.8 | 5.14 | 4.28 | 5.32 | 2.96 | 4.43 |
Alexander-Arnold | Liverpool | 100 | D | 5.7 | 4.84 | 4.06 | 5 | 2.86 | 4.19 |
Maddison | Leicester | 100 | M | 4.3 | 6.75 | 4.35 | 0 | 4.39 | 3.87 |
Doherty | Wolves | 100 | D | 4.3 | 3.91 | 3.27 | 4.14 | 3.35 | 3.67 |
Martial | Man Utd | 100 | M | 5.2 | 4.72 | 4.77 | 0 | 5.15 | 3.66 |
Tarkowski | Burnley | 100 | D | 3.6 | 2.96 | 1.78 | 4 | 3.86 | 3.15 |
Ramsdale | Bournemouth | 100 | G | 3.4 | 1.65 | 3.62 | 2.31 | 3.63 | 2.8 |
Cantwell | Norwich | 100 | M | 3.5 | 2.25 | 5.26 | 0 | 3.48 | 2.75 |
Bednarek | Southampton | 100 | D | 2.3 | 3.99 | 3.41 | - | 3.28 | - |
Use the model to recommend the team positions for this gameweek
week_forecast <- players_get_points(current_week) %>%
rename(expected_score = !!sym(glue("gw_{current_week}")))
team_score <- inner_join(week_forecast, my_team, by = "second_name") %>%
select(second_name, element_type, expected_score)
goalkeeper <- team_score %>%
filter(element_type == 1) %>% top_n(1, expected_score)
t3_defenders <- team_score %>%
filter(element_type == 2) %>% top_n(3, expected_score)
b2_defenders <- team_score %>%
filter(element_type == 2) %>% top_n(-2, expected_score)
t3_midfielders <- team_score %>%
filter(element_type == 3) %>% top_n(3, expected_score)
b2_midfielders <- team_score %>%
filter(element_type == 3) %>% top_n(-2, expected_score)
t1_forwards <- team_score %>%
filter(element_type == 4) %>% top_n(1, expected_score)
b2_forwards <- team_score %>%
filter(element_type == 4) %>% top_n(-2, expected_score)
remaining_attack <- bind_rows(b2_midfielders, b2_forwards)
t2_attack <- remaining_attack %>% top_n(2, expected_score)
b2_attack <- remaining_attack %>% top_n(-2, expected_score)
remaining_players <- bind_rows(b2_defenders, b2_attack)
t1_remaining <- remaining_players %>% top_n(1, expected_score)
b3_remaining <- remaining_players %>% top_n(-3, expected_score) %>%
arrange(desc(expected_score)) %>%
rowid_to_column("bench_position")
picks <- bind_rows(goalkeeper, t3_defenders, t3_midfielders, t1_forwards, t2_attack, t1_remaining) %>%
full_join(team_score, by = c("second_name", "element_type", "expected_score")) %>%
full_join(b3_remaining, by = c("second_name", "element_type", "expected_score"))
picks %>% mutate(captain = case_when(
second_name == picks %>% arrange(desc(expected_score)) %>% slice(1) %>% pull(second_name) ~ "C",
second_name == picks %>% arrange(desc(expected_score)) %>% slice(2) %>% pull(second_name) ~ "VC",
T ~ NA_character_
)) %>%
arrange(element_type) %>%
mutate(position = case_when(
element_type == 1 ~ "G", element_type == 2 ~ "D",
element_type == 3 ~ "M", element_type == 4 ~ "F"
)) %>% select(player = second_name, position, expected_score, captain, bench_position)
player | position | expected_score | captain | bench_position |
---|---|---|---|---|
Ramsdale | G | 1.65 | - | - |
Alexander-Arnold | D | 4.84 | - | - |
van Dijk | D | 5.14 | - | - |
Bednarek | D | 3.99 | - | - |
Tarkowski | D | 2.96 | - | 2 |
Doherty | D | 3.91 | - | 1 |
Maddison | M | 6.75 | - | - |
Mané | M | 8.93 | VC | - |
Pérez | M | 6.91 | - | - |
Martial | M | 4.72 | - | - |
Cantwell | M | 2.25 | - | 3 |
Vardy | F | 9.73 | C | - |
Ings | F | 5.96 | - | - |
Jiménez | F | 6.32 | - | - |