Import Packages

In [1]:
from IPython.display import HTML
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib import gridspec
from bs4 import BeautifulSoup
import requests
import urllib
from urllib.request import urlopen
import datetime
from datetime import date
from dateutil import relativedelta

Styling

In [2]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border-style: solid;
    border-width: thin;
    border-color: black;
}

table.dataframe th {
    background-color: grey;
    color: white;
    
}

table.dataframe tr:hover {
    background-color: rgba(46, 139, 87, 0.8);
    color: white;
    
}
</style>

Get and Prepare Data

Here we run some code to get our data from the website

I'll be using Fortuna Düsseldorf for this example as they are my local team [ and I'd love a job if anyone at @F95 ever reads this ;) ]

In [41]:
## CODE TO GET PLAYERS GOES HERE

## CODE TO LOOP THROUGH PLAYERS AND GET DATA GOES HERE

master_df
100%|██████████| 28/28 [00:38<00:00,  1.38s/it]
Out[41]:
season team comp mins firstName lastName dob
0 2018/2019 Fortuna Düsseldorf BUN 963 Oliver Fink 6 June 1982
1 2017/2018 Fortuna Düsseldorf 2.B 1676 Oliver Fink 6 June 1982
2 2016/2017 Fortuna Düsseldorf 2.B 1923 Oliver Fink 6 June 1982
3 2015/2016 Fortuna Düsseldorf 2.B 1085 Oliver Fink 6 June 1982
4 2014/2015 Fortuna Düsseldorf 2.B 1014 Oliver Fink 6 June 1982
5 2013/2014 Fortuna Düsseldorf 2.B 2083 Oliver Fink 6 June 1982
6 2012/2013 Fortuna Düsseldorf BUN 1861 Oliver Fink 6 June 1982
7 2011/2012 Fortuna Düsseldorf 2.B 2649 Oliver Fink 6 June 1982
8 2010/2011 Fortuna Düsseldorf 2.B 2765 Oliver Fink 6 June 1982
9 2009/2010 Fortuna Düsseldorf 2.B 2379 Oliver Fink 6 June 1982
10 2008/2009 Unterhaching 3.L ? Oliver Fink 6 June 1982
11 2018/2019 Fortuna Düsseldorf BUN 635 Kenan Karaman 5 March 1994
12 2017/2018 Hannover 96 BUN 845 Kenan Karaman 5 March 1994
13 2016/2017 Hannover 96 2.B 1635 Kenan Karaman 5 March 1994
14 2015/2016 Hannover 96 BUN 1743 Kenan Karaman 5 March 1994
15 2014/2015 Hannover 96 II REG 540 Kenan Karaman 5 March 1994
16 2014/2015 Hannover 96 BUN 257 Kenan Karaman 5 March 1994
17 2013/2014 Hoffenheim BUN 34 Kenan Karaman 5 March 1994
18 2013/2014 Hoffenheim II REG 565 Kenan Karaman 5 March 1994
19 2012/2013 Hoffenheim U19 U1B 1394 Kenan Karaman 5 March 1994
20 2012/2013 Hoffenheim II REG 1360 Kenan Karaman 5 March 1994
21 2011/2012 Hoffenheim II REG 13 Kenan Karaman 5 March 1994
22 2018/2019 Fortuna Düsseldorf BUN 0 Diego Armando Valentin Contento 1 May 1990
23 2017/2018 Bordeaux LI1 106 Diego Armando Valentin Contento 1 May 1990
24 2016/2017 Bordeaux LI1 2104 Diego Armando Valentin Contento 1 May 1990
25 2015/2016 Bordeaux II NA2 90 Diego Armando Valentin Contento 1 May 1990
26 2015/2016 Bordeaux LI1 2113 Diego Armando Valentin Contento 1 May 1990
27 2014/2015 Bordeaux LI1 1984 Diego Armando Valentin Contento 1 May 1990
28 2014/2015 Bayern München BUN 0 Diego Armando Valentin Contento 1 May 1990
29 2013/2014 Bayern München BUN 577 Diego Armando Valentin Contento 1 May 1990
... ... ... ... ... ... ... ...
298 2018/2019 Fortuna Düsseldorf BUN 2600 Matthias Zimmermann 16 June 1992
299 2017/2018 Stuttgart II REG 231 Matthias Zimmermann 16 June 1992
300 2017/2018 Stuttgart BUN 76 Matthias Zimmermann 16 June 1992
301 2016/2017 Stuttgart 2.B 1456 Matthias Zimmermann 16 June 1992
302 2016/2017 Stuttgart II REG 90 Matthias Zimmermann 16 June 1992
303 2015/2016 Stuttgart BUN 166 Matthias Zimmermann 16 June 1992
304 2015/2016 Stuttgart II 3.L 2780 Matthias Zimmermann 16 June 1992
305 2014/2015 Borussia M'gla… REG 2850 Matthias Zimmermann 16 June 1992
306 2013/2014 Sandhausen 2.B 1317 Matthias Zimmermann 16 June 1992
307 2012/2013 Greuther Fürth BUN 1231 Matthias Zimmermann 16 June 1992
308 2012/2013 Borussia M'gla… REG 1444 Matthias Zimmermann 16 June 1992
309 2012/2013 Borussia M'gla… BUN 0 Matthias Zimmermann 16 June 1992
310 2011/2012 Borussia M'gla… REG 1118 Matthias Zimmermann 16 June 1992
311 2011/2012 Borussia M'gla… BUN 7 Matthias Zimmermann 16 June 1992
312 2010/2011 Karlsruher SC 2.B 2952 Matthias Zimmermann 16 June 1992
313 2009/2010 Karlsruher SC 2.B 1318 Matthias Zimmermann 16 June 1992
314 2009/2010 Karlsruher SC II REG 933 Matthias Zimmermann 16 June 1992
315 2018/2019 Fortuna Düssel… REG 282 Davor Lovren 3 October 1998
316 2018/2019 Fortuna Düsseldorf BUN 44 Davor Lovren 3 October 1998
317 2018/2019 Fortuna Düsseldorf BUN 0 Davor Lovren 3 October 1998
318 2017/2018 Fortuna Düsseldorf 2.B 503 Davor Lovren 3 October 1998
319 2016/2017 Dinamo Zagreb II 2.H 2390 Davor Lovren 3 October 1998
320 2015/2016 Dinamo Zagreb 1.H 37 Davor Lovren 3 October 1998
321 2015/2016 Dinamo Zagreb II 2.H 493 Davor Lovren 3 October 1998
322 2018/2019 Fortuna Düsseldorf BUN 1649 Dodi Lukebakio 24 September 1997
323 2017/2018 Watford PRL 15 Dodi Lukebakio 24 September 1997
324 2017/2018 Sporting Charleroi FDA 1291 Dodi Lukebakio 24 September 1997
325 2016/2017 Toulouse LI1 87 Dodi Lukebakio 24 September 1997
326 2016/2017 Toulouse II NA3 513 Dodi Lukebakio 24 September 1997
327 2015/2016 Anderlecht FDA 477 Dodi Lukebakio 24 September 1997

328 rows × 7 columns

The above dataframe shows us season on season data for each player currently in the Fortuna Düsseldorf squad. A couple of things to note here:

  • We are only interested in seasons where players have played for Fortuna's first team in the Bundesliga / 2. Bundesliga
  • We need to deal with the date

In it's current format the date is being returned as a string. We can use datetime to convert this to a datetime object which will allow us to run some calculations to get each player's age in years and months.

In [42]:
dobs = []
for d in master_df.dob:
    dobs.append(datetime.datetime.strptime(d,"%d %B %Y"))
master_df['dob'] = dobs
master_df[0:20]
Out[42]:
season team comp mins firstName lastName dob
0 2018/2019 Fortuna Düsseldorf BUN 963 Oliver Fink 1982-06-06
1 2017/2018 Fortuna Düsseldorf 2.B 1676 Oliver Fink 1982-06-06
2 2016/2017 Fortuna Düsseldorf 2.B 1923 Oliver Fink 1982-06-06
3 2015/2016 Fortuna Düsseldorf 2.B 1085 Oliver Fink 1982-06-06
4 2014/2015 Fortuna Düsseldorf 2.B 1014 Oliver Fink 1982-06-06
5 2013/2014 Fortuna Düsseldorf 2.B 2083 Oliver Fink 1982-06-06
6 2012/2013 Fortuna Düsseldorf BUN 1861 Oliver Fink 1982-06-06
7 2011/2012 Fortuna Düsseldorf 2.B 2649 Oliver Fink 1982-06-06
8 2010/2011 Fortuna Düsseldorf 2.B 2765 Oliver Fink 1982-06-06
9 2009/2010 Fortuna Düsseldorf 2.B 2379 Oliver Fink 1982-06-06
10 2008/2009 Unterhaching 3.L ? Oliver Fink 1982-06-06
11 2018/2019 Fortuna Düsseldorf BUN 635 Kenan Karaman 1994-03-05
12 2017/2018 Hannover 96 BUN 845 Kenan Karaman 1994-03-05
13 2016/2017 Hannover 96 2.B 1635 Kenan Karaman 1994-03-05
14 2015/2016 Hannover 96 BUN 1743 Kenan Karaman 1994-03-05
15 2014/2015 Hannover 96 II REG 540 Kenan Karaman 1994-03-05
16 2014/2015 Hannover 96 BUN 257 Kenan Karaman 1994-03-05
17 2013/2014 Hoffenheim BUN 34 Kenan Karaman 1994-03-05
18 2013/2014 Hoffenheim II REG 565 Kenan Karaman 1994-03-05
19 2012/2013 Hoffenheim U19 U1B 1394 Kenan Karaman 1994-03-05

In the above code we first create an empty list. Then we declare a variable (d) which represents each item in our series master_df.dob. This then is used in datetime's strptime function to convert it's date (%d), full text month (%B), and year (%Y) to a datetime object which we can see as YYYY-MM-DD.

Now that we have each player's date of birth as a datetime object, we can use the really handy dateutils package to calculate his age.

In [43]:
today = date(2019,4,18)

i = 0
get_age = []

for i in range(len(master_df)):
    l_date = today
    f_date = master_df['dob'].iloc[i]
    d = relativedelta.relativedelta(l_date,f_date)
    d = str(d.years)+"."+str(d.months)
    get_age.append(float(d))
master_df['current_age'] = get_age
master_df[:20]
Out[43]:
season team comp mins firstName lastName dob current_age
0 2018/2019 Fortuna Düsseldorf BUN 963 Oliver Fink 1982-06-06 36.1
1 2017/2018 Fortuna Düsseldorf 2.B 1676 Oliver Fink 1982-06-06 36.1
2 2016/2017 Fortuna Düsseldorf 2.B 1923 Oliver Fink 1982-06-06 36.1
3 2015/2016 Fortuna Düsseldorf 2.B 1085 Oliver Fink 1982-06-06 36.1
4 2014/2015 Fortuna Düsseldorf 2.B 1014 Oliver Fink 1982-06-06 36.1
5 2013/2014 Fortuna Düsseldorf 2.B 2083 Oliver Fink 1982-06-06 36.1
6 2012/2013 Fortuna Düsseldorf BUN 1861 Oliver Fink 1982-06-06 36.1
7 2011/2012 Fortuna Düsseldorf 2.B 2649 Oliver Fink 1982-06-06 36.1
8 2010/2011 Fortuna Düsseldorf 2.B 2765 Oliver Fink 1982-06-06 36.1
9 2009/2010 Fortuna Düsseldorf 2.B 2379 Oliver Fink 1982-06-06 36.1
10 2008/2009 Unterhaching 3.L ? Oliver Fink 1982-06-06 36.1
11 2018/2019 Fortuna Düsseldorf BUN 635 Kenan Karaman 1994-03-05 25.1
12 2017/2018 Hannover 96 BUN 845 Kenan Karaman 1994-03-05 25.1
13 2016/2017 Hannover 96 2.B 1635 Kenan Karaman 1994-03-05 25.1
14 2015/2016 Hannover 96 BUN 1743 Kenan Karaman 1994-03-05 25.1
15 2014/2015 Hannover 96 II REG 540 Kenan Karaman 1994-03-05 25.1
16 2014/2015 Hannover 96 BUN 257 Kenan Karaman 1994-03-05 25.1
17 2013/2014 Hoffenheim BUN 34 Kenan Karaman 1994-03-05 25.1
18 2013/2014 Hoffenheim II REG 565 Kenan Karaman 1994-03-05 25.1
19 2012/2013 Hoffenheim U19 U1B 1394 Kenan Karaman 1994-03-05 25.1
In [45]:
master_df['mins_perc'] = master_df.mins.astype(int) / (34 * 90) # 34 matches in Bundesliga - Placeholder

master_df['mins_perc'] = np.where(master_df["season"] == "2018/2019",
                                  master_df.mins.astype(int) / (games_played * 90),
                                  master_df.mins_perc)

# I'm too lazy to go back and get the h1 tags so we'll use full names
master_df['refName'] = master_df.firstName+" "+master_df.lastName

# Let's remove rows where player did not play for Fortuna
master_df = master_df[master_df['team'] == "Fortuna Düsseldorf"]
master_df[0:20]
Out[45]:
season team comp mins firstName lastName dob current_age mins_perc refName
0 2018/2019 Fortuna Düsseldorf BUN 963 Oliver Fink 1982-06-06 36.10 0.368966 Oliver Fink
1 2017/2018 Fortuna Düsseldorf 2.B 1676 Oliver Fink 1982-06-06 36.10 0.547712 Oliver Fink
2 2016/2017 Fortuna Düsseldorf 2.B 1923 Oliver Fink 1982-06-06 36.10 0.628431 Oliver Fink
3 2015/2016 Fortuna Düsseldorf 2.B 1085 Oliver Fink 1982-06-06 36.10 0.354575 Oliver Fink
4 2014/2015 Fortuna Düsseldorf 2.B 1014 Oliver Fink 1982-06-06 36.10 0.331373 Oliver Fink
5 2013/2014 Fortuna Düsseldorf 2.B 2083 Oliver Fink 1982-06-06 36.10 0.680719 Oliver Fink
6 2012/2013 Fortuna Düsseldorf BUN 1861 Oliver Fink 1982-06-06 36.10 0.608170 Oliver Fink
7 2011/2012 Fortuna Düsseldorf 2.B 2649 Oliver Fink 1982-06-06 36.10 0.865686 Oliver Fink
8 2010/2011 Fortuna Düsseldorf 2.B 2765 Oliver Fink 1982-06-06 36.10 0.903595 Oliver Fink
9 2009/2010 Fortuna Düsseldorf 2.B 2379 Oliver Fink 1982-06-06 36.10 0.777451 Oliver Fink
11 2018/2019 Fortuna Düsseldorf BUN 635 Kenan Karaman 1994-03-05 25.10 0.243295 Kenan Karaman
22 2018/2019 Fortuna Düsseldorf BUN 0 Diego Armando Valentin Contento 1990-05-01 28.11 0.000000 Diego Armando Valentin Contento
37 2018/2019 Fortuna Düsseldorf BUN 460 Aymen Barkok 1998-05-21 20.10 0.176245 Aymen Barkok
43 2018/2019 Fortuna Düsseldorf BUN 154 Markus Suttner 1987-04-16 32.00 0.059004 Markus Suttner
57 2018/2019 Fortuna Düsseldorf BUN 613 Robin Bormuth 1995-09-19 23.60 0.234866 Robin Bormuth
58 2017/2018 Fortuna Düsseldorf 2.B 1496 Robin Bormuth 1995-09-19 23.60 0.488889 Robin Bormuth
60 2016/2017 Fortuna Düsseldorf 2.B 1672 Robin Bormuth 1995-09-19 23.60 0.546405 Robin Bormuth
62 2015/2016 Fortuna Düsseldorf 2.B 0 Robin Bormuth 1995-09-19 23.60 0.000000 Robin Bormuth
67 2018/2019 Fortuna Düsseldorf BUN 1364 Adam Bodzek 1985-09-07 33.70 0.522605 Adam Bodzek
68 2017/2018 Fortuna Düsseldorf 2.B 1867 Adam Bodzek 1985-09-07 33.70 0.610131 Adam Bodzek

Our next task is getting the data for each player's first season with the club. We will loop through each player and get the last occurance (as the data is sorted lastest to earliest) of each player and do the same as above to get the minute share for their first season and their age at the time they joined the club.

In [46]:
player_list = master_df.refName.unique()
dummy_df = pd.DataFrame()
for player in player_list:
    dummy_df = dummy_df.append(master_df[master_df['refName'] == player].iloc[-1])

start_age = []
start_mins_perc = []
first_season = []
for i in range(len(dummy_df)):
    start_date = date(int(dummy_df['season'].iloc[i][:4]),8,1)
    dob = dummy_df.dob.iloc[i]
    d = relativedelta.relativedelta(start_date,dob)
    d = str(d.years)+"."+str(d.months)
    start_age.append(float(d))
    if dummy_df.season.iloc[i] == "2018/2019":
        start_mins_perc.append(dummy_df.mins_perc.iloc[i])
        first_season.append(1)
    else:
        start_mins_perc.append(int(dummy_df.mins.iloc[i]) / (34 * 90))
        first_season.append(0)
    
master_df = master_df[master_df['season'] == "2018/2019"]
master_df.drop_duplicates(subset=['refName'],inplace=True,keep="first")
master_df['start_age'] = start_age
master_df['start_mins_perc'] = start_mins_perc
master_df['first_season'] = first_season

del master_df['firstName']
del master_df['lastName']
master_df
Out[46]:
season team comp mins dob current_age mins_perc refName start_age start_mins_perc first_season
0 2018/2019 Fortuna Düsseldorf BUN 963 1982-06-06 36.10 0.368966 Oliver Fink 27.10 0.777451 0
11 2018/2019 Fortuna Düsseldorf BUN 635 1994-03-05 25.10 0.243295 Kenan Karaman 24.40 0.243295 1
22 2018/2019 Fortuna Düsseldorf BUN 0 1990-05-01 28.11 0.000000 Diego Armando Valentin Contento 28.30 0.000000 1
37 2018/2019 Fortuna Düsseldorf BUN 460 1998-05-21 20.10 0.176245 Aymen Barkok 20.20 0.176245 1
43 2018/2019 Fortuna Düsseldorf BUN 154 1987-04-16 32.00 0.059004 Markus Suttner 31.30 0.059004 1
57 2018/2019 Fortuna Düsseldorf BUN 613 1995-09-19 23.60 0.234866 Robin Bormuth 19.10 0.000000 0
67 2018/2019 Fortuna Düsseldorf BUN 1364 1985-09-07 33.70 0.522605 Adam Bodzek 24.10 0.380719 0
81 2018/2019 Fortuna Düsseldorf BUN 944 1994-04-25 24.11 0.361686 Marcel Sobottka 21.30 0.363725 0
92 2018/2019 Fortuna Düsseldorf BUN 811 1994-03-07 25.10 0.310728 Marvin Ducksch 24.40 0.310728 1
103 2018/2019 Fortuna Düsseldorf BUN 2430 1984-05-14 34.11 0.931034 Michael Rensing 29.20 0.135294 0
120 2018/2019 Fortuna Düsseldorf BUN 1428 1990-05-12 28.11 0.547126 Alfredo Morales 28.20 0.547126 1
134 2018/2019 Fortuna Düsseldorf BUN 0 1998-01-07 21.30 0.000000 Jannick Theissen 20.60 0.000000 1
142 2018/2019 Fortuna Düsseldorf BUN 285 1997-03-14 22.10 0.109195 Dawid Kownacki 21.40 0.109195 1
149 2018/2019 Fortuna Düsseldorf BUN 2250 1992-01-15 27.30 0.862069 Marcin Kamiński 26.60 0.862069 1
160 2018/2019 Fortuna Düsseldorf BUN 1561 1987-08-28 31.70 0.598084 Rouwen Hennings 28.11 0.837255 0
178 2018/2019 Fortuna Düsseldorf BUN 1359 1994-11-07 24.50 0.520690 Benito Raman 22.80 0.526471 0
191 2018/2019 Fortuna Düsseldorf BUN 180 1979-10-18 39.60 0.068966 Jaroslav Drobný 38.90 0.068966 1
209 2018/2019 Fortuna Düsseldorf BUN 2394 1991-09-26 27.60 0.917241 Niko Gießelmann 25.10 0.941830 0
219 2018/2019 Fortuna Düsseldorf BUN 1672 1993-08-27 25.70 0.640613 Kevin Stöger 24.11 0.640613 1
232 2018/2019 Fortuna Düsseldorf BUN 1439 1993-12-06 25.40 0.551341 Jean Zimmer 23.70 0.685294 0
242 2018/2019 Fortuna Düsseldorf BUN 597 1993-02-28 26.10 0.228736 Andre Hoffmann 23.50 0.114706 0
256 2018/2019 Fortuna Düsseldorf BUN 877 1992-05-06 26.11 0.336015 Takashi Usami 25.20 0.453268 0
271 2018/2019 Fortuna Düsseldorf BUN 0 1988-06-06 30.10 0.000000 Raphael Wolf 29.10 0.902614 0
288 2018/2019 Fortuna Düsseldorf BUN 2001 1994-11-10 24.50 0.766667 Kaan Ayhan 21.80 0.644771 0
298 2018/2019 Fortuna Düsseldorf BUN 2600 1992-06-16 26.10 0.996169 Matthias Zimmermann 26.10 0.996169 1
316 2018/2019 Fortuna Düsseldorf BUN 44 1998-10-03 20.60 0.016858 Davor Lovren 18.90 0.164379 0
322 2018/2019 Fortuna Düsseldorf BUN 1649 1997-09-24 21.60 0.631801 Dodi Lukebakio 20.10 0.631801 1

Plotting the Data

We'll take a look at building our viz in stages. I'll start with building the figure and axes and add to it layer by layer. I'll annotate each part of the code to make it easier to follow and provide the full code at the end.

Step 1:

In [30]:
fig, ax = plt.subplots(figsize=(12,10))

ax = plt.subplot2grid((5,5), (0,0), colspan=4, rowspan=5)
ax1 = plt.subplot2grid((5,5), (3,4))
ax2 = plt.subplot2grid((5,5), (2,4))

First we create 3 subplots, our main plot to hold our main viz, on the top right a smaller viz for our legend, and below that a smaller version of our main viz as an explainer. If you look at Tom's viz you'll notice that this smaller plot is taller than it is wide, I've to have a think about how best to replicate that.

We're going with one main colour so we can declare that as a variable to save us having to type it each time. We then fill each ax with our background color.

Step 2:

In [31]:
bg = "#181818"

rect = ax.patch
rect.set_facecolor(bg)
rect1 = ax1.patch
rect1.set_facecolor(bg)
rect2 = ax2.patch
rect2.set_facecolor(bg)

We can now sort out the x and y axes for our chart and add grids to make it easier to read.

Step 3:

In [32]:
#set figure background colour
fig.set_facecolor(bg)


#set and place ticks on main viz
xt = [16,20,24,28,32,36,40]
yt = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
xlabels = [16, 20,24,28,32,36,40]
ylabels = ["0%", "10%","20%","30%","40%","50%","60%","70%","80%","90%","100%"]
ax.set_xticks(xt)
ax.set_yticks(yt)
ax.set_xticklabels(xlabels,**csfont,color="white")
ax.set_yticklabels(ylabels,**csfont,color="white")
ax.set_xlim(15,41)
ax.set_ylim(-0.1,1.1)

#set and place ticks on secondary viz
xt = [16,20,24,28,32,36,40]
yt = [0,0.2,0.4,0.6,0.8,1]
xlabels = [16,20,24,28,32,36,40]
ylabels = ["0%","20%","40%","60%","80%","100%"]
ax1.set_xticks(xt)
ax1.set_yticks(yt)
ax1.set_xticklabels(xlabels,**csfont,color="white")
ax1.set_yticklabels(ylabels,**csfont,color="white")
ax1.set_xlim(15,41)
ax1.set_ylim(-0.1,1.1)


#add gridlines to viz
ax.grid(zorder=1,color="white",alpha=0.2)
ax1.grid(zorder=1,color="white",alpha=0.2)

This is starting to come together, but we still need to sort out the borders of each ax, as well as add in the peak years section in our main viz. First, to remove the borders from each viz, we need to go back to the start of our plotting code (marked Step 1 above) and add the following lines:

ax = plt.subplot2grid((5,5), (0,0), colspan=4, rowspan=5)
plt.box(False)             #add this line
plt.tick_params(color=bg)  #add this line

ax1 = plt.subplot2grid((5,5), (3,4))
plt.box(False)             #add this line
plt.tick_params(color=bg)  #add this line

ax2 = plt.subplot2grid((5,5), (2,4))
ax2.axis("off")            #add this line





Step 4:

In [35]:
rect3 = plt.Rectangle([24,-0.1],5,1.2,color="seagreen",zorder=5,alpha=0.6)
ax.add_artist(rect3)

It seems's like our ax on the top right is gone, but don't worry, we just turned off the axis so now we cannot see the box or tick marks. We'll continue with our two boxes on the right and add our legend and explainer viz.

Step 5:

In [37]:
ax2.scatter(0,50,label="> Year",s=150,color="dodgerblue")
ax2.scatter(0,45,label="≤ Year",s=150,color="firebrick")
ax2.text(1,49.2,"> Year",color="white",fontsize=14)
ax2.text(1,44.2,"≤ Year",color="white",fontsize=14)
ax2.text(3.5,57,"Time at Club", color="white",fontsize=16,ha="center")
ax2.set_ylim(40,55)
ax2.set_xlim(-1,10)
In [38]:
ax1.text(20,0.4,"Age & first season\n minute share at time\n of joining club",
         color="white",zorder=4,va="top",fontsize=8)
ax1.plot([18,27],
         [0.6,0.8],color="lightgrey",zorder=6)

ax1.scatter(27,0.8,s=120,color="dodgerblue",zorder=8)

style="Simple,tail_width=0.01,head_width=4,head_length=5"
kw = dict(arrowstyle=style, color="white")

a = patches.FancyArrowPatch((36,0.90), (28,0.85),
                            connectionstyle="arc3,rad=0.4", **kw,zorder=20,lw=0.5) #curved arrow

b = patches.FancyArrowPatch((18,0.14),(18,0.6),
                            **kw,zorder=20,lw=0.5) #straight arrow

ax1.add_patch(a)
ax1.add_patch(b)

ax1.text(36,0.80,"Current Age",color="white",fontsize=8,ha="center",zorder=8)

There's a few things happening above. First, We plot 2 data points in our top right viz with ax2.scatter. Instead of creating this as a lengend with labels I prefer to create them myself to handle alignments etc.. So next we plot some text -> > Year and ≤ Year, as well as a subtitle.

In our bottom right chart we have a few more things to plot. As above, we have a data point which we plot with ax1.scatter, a line which we plot with ax1.plot, some text (we can use \n to make a linebreak in the text), and some custom arrows.

Step 7:

Next we will plot or main data. For this we will use a column we created earlier to split the data into players with more than 1 season at the club and players in their fist season.

In [49]:
master_df1 = master_df[master_df['first_season'] == 0]
x = master_df1.current_age.values
y = master_df1.mins_perc.values
xe = master_df1.start_age.values
ye = master_df1.start_mins_perc.values

master_df2 = master_df[master_df['first_season'] == 1]
fx = master_df2.current_age.values
fy = master_df2.mins_perc.values
fxe = master_df2.start_age.values
fye = master_df2.start_mins_perc.values

del [master_df1, master_df2]

ax.scatter(x,y,color="dodgerblue",zorder=8,s=150)
ax.scatter(fx,fy,color="firebrick",zorder=8,s=150)

plotting lines - For now these lines are set to one transparency, however, I'm working on some code to change the shading of the line from start to end and will update this codethrough when I've tested it sufficiently.

In [55]:
for i in range(len(x)):
    ax.plot([x,xe],
             [y,ye], color="lightgrey",zorder=7,alpha=0.1, lw=0.75)
    
for i in range(len(fx)):
    ax.plot([fx,fxe],
             [fy,fye], color="lightgrey",zorder=7,alpha=0.1, lw=0.75)

In the above code snippet, I've used lw= .. this controls the line width in matplotlib's plot function. We could also play around with the line style by using ls = ...

Next we need to add text labels to our datapoints. This is the fiddly bit that I'm working on at the moment so these charts can be automated. The issue is being able to tell which labels will overlap. My current line of thinking is to take width of x characters and calculating the difference between two points, adjust the label position based on points within a certain distance. But I'm open to suggestions - particularly those that are not dependent on further package installs.

Again, this is hard coded for Fortuna's current squad with their current minutes which is not ideal, so if you plan on running this yourself take a look at the code and see what changes you can make.

Step 8:

In [57]:
dx, dy = master_df.current_age.values, master_df.mins_perc.values

for i in range(len(master_df)):
    if master_df.refName.iloc[i] == "Davor Lovren":
        ax.text(dx[i],dy[i]+0.025,master_df.refName.iloc[i],ha="center",color="white",zorder=6,fontweight="bold")
    elif master_df.refName.iloc[i] == "Jean Zimmer":
        ax.text(dx[i],dy[i]+0.025,master_df.refName.iloc[i],ha="center",color="white",zorder=6,fontweight="bold")
    elif master_df.refName.iloc[i] == "Takashi Usami":
        ax.text(dx[i],dy[i]+0.025,master_df.refName.iloc[i],ha="center",color="white",zorder=6,fontweight="bold")
    elif master_df.refName.iloc[i] == "Kenan Karaman":
        ax.text(dx[i]+0.25,dy[i],master_df.refName.iloc[i],ha="left",color="white",zorder=6,fontweight="bold")
    elif master_df.refName.iloc[i] == "Raphael Wolf":
        ax.text(dx[i]+0.25,dy[i],master_df.refName.iloc[i],ha="left",color="white",zorder=6,fontweight="bold")
    elif master_df.refName.iloc[i] == "Robin Bormuth":
        ax.text(dx[i]-0.25,dy[i],master_df.refName.iloc[i],ha="right",color="white",zorder=6,fontweight="bold")
    else:    
        ax.text(dx[i],dy[i]-0.035,master_df.refName.iloc[i],ha="center",color="white",zorder=6,fontweight="bold")
        

Not an ideal solution, but it will do for now. If you've a better idea of how to plot the labels please get in touch, I'd be happy to try it out.

Finally, all that's left to do is to add our axis labels and titles etc...

Step 9:

In [59]:
ax.text(26.5,1.05,"Peak Years",color="white",fontweight="bold",fontsize=16,zorder=7, ha="center")

ax.set_ylabel("Share of Minutes Played",fontsize=14,fontweight="bold",color="white")
ax.set_xlabel("Age",fontsize=14,fontweight="bold",color="white")

fig.suptitle(str(master_df.team.iloc[0])+" | Squad Age Profile",color="white",fontsize=18,fontweight="bold",ha="right")
ax.text(14.9,1.1,"\n\n Bundesliga | Season 2018/2019\n\n", color="white",fontsize=14)

fig.text(0.85,0.95,"@petermckeever",color="white",fontsize=14,fontweight="bold")
In [ ]:
fig, ax = plt.subplots(figsize=(12,10))

ax = plt.subplot2grid((5,5), (0,0), colspan=4, rowspan=5)
plt.box(False)
plt.tick_params(color=bg) 
ax1 = plt.subplot2grid((5,5), (3,4))
plt.box(False)
plt.tick_params(color=bg) 
ax2 = plt.subplot2grid((5,5), (2,4))
ax2.axis("off")

bg = "#181818"

rect = ax.patch
rect.set_facecolor(bg)
rect1 = ax1.patch
rect1.set_facecolor(bg)
rect2 = ax2.patch
rect2.set_facecolor(bg)

#set figure background colour
fig.set_facecolor(bg)

#set and place ticks on main viz
xt = [16,20,24,28,32,36,40]
yt = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
xlabels = [16, 20,24,28,32,36,40]
ylabels = ["0%", "10%","20%","30%","40%","50%","60%","70%","80%","90%","100%"]
ax.set_xticks(xt)
ax.set_yticks(yt)
ax.set_xticklabels(xlabels,color="white")
ax.set_yticklabels(ylabels,color="white")
ax.set_xlim(15,41)
ax.set_ylim(-0.1,1.1)

#set and place ticks on secondary viz
xt = [16,20,24,28,32,36,40]
yt = [0,0.2,0.4,0.6,0.8,1]
xlabels = [16,20,24,28,32,36,40]
ylabels = ["0%","20%","40%","60%","80%","100%"]
ax1.set_xticks(xt)
ax1.set_yticks(yt)
ax1.set_xticklabels(xlabels,color="white")
ax1.set_yticklabels(ylabels,color="white")
ax1.set_xlim(15,41)
ax1.set_ylim(-0.1,1.1)

#add gridlines to viz
ax.grid(zorder=1,color="white",alpha=0.2)
ax1.grid(zorder=1,color="white",alpha=0.2)

rect3 = plt.Rectangle([24,-0.1],5,1.2,color="seagreen",zorder=5,alpha=0.6)
ax.add_artist(rect3)

ax2.scatter(0,50,label="> Year",s=150,color="dodgerblue")
ax2.scatter(0,45,label="≤ Year",s=150,color="firebrick")
ax2.text(1,49.2,"> Year",color="white",fontsize=14)
ax2.text(1,44.2,"≤ Year",color="white",fontsize=14)
ax2.text(3.5,57,"Time at Club", color="white",fontsize=16,ha="center")
ax2.set_ylim(40,55)
ax2.set_xlim(-1,10)


ax1.text(20,0.4,"Age & first season\n minute share at time\n of joining club", color="white",zorder=4,va="top",fontsize=8)
ax1.plot([18,27],
         [0.6,0.8],color="lightgrey",zorder=6)

ax1.scatter(27,0.8,s=120,color="dodgerblue",zorder=8)

style="Simple,tail_width=0.01,head_width=4,head_length=5"
kw = dict(arrowstyle=style, color="white")
a = patches.FancyArrowPatch((36,0.90), (28,0.85),connectionstyle="arc3,rad=0.4", **kw,zorder=20,lw=0.5)
b = patches.FancyArrowPatch((18,0.14),(18,0.6),**kw,zorder=20,lw=0.5)
ax1.add_patch(a)
ax1.add_patch(b)
ax1.text(36,0.80,"Current Age",color="white",fontsize=8,ha="center",zorder=8)

master_df1 = master_df[master_df['first_season'] == 0]
x = master_df1.current_age.values
y = master_df1.mins_perc.values
xe = master_df1.start_age.values
ye = master_df1.start_mins_perc.values

master_df2 = master_df[master_df['first_season'] == 1]
fx = master_df2.current_age.values
fy = master_df2.mins_perc.values
fxe = master_df2.start_age.values
fye = master_df2.start_mins_perc.values

del [master_df1, master_df2]

ax.scatter(x,y,color="dodgerblue",zorder=8,s=150)
ax.scatter(fx,fy,color="firebrick",zorder=8,s=150)


for i in range(len(x)):
    ax.plot([x,xe],
             [y,ye], color="lightgrey",zorder=7,alpha=0.1, lw=0.75)
    
for i in range(len(fx)):
    ax.plot([fx,fxe],
             [fy,fye], color="lightgrey",zorder=7,alpha=0.1, lw=0.75)

dx, dy = master_df.current_age.values, master_df.mins_perc.values


for i in range(len(master_df)):
    if master_df.refName.iloc[i] == "Davor Lovren":
        ax.text(dx[i],dy[i]+0.025,master_df.refName.iloc[i],ha="center",color="white",zorder=6,fontweight="bold")
    elif master_df.refName.iloc[i] == "Jean Zimmer":
        ax.text(dx[i],dy[i]+0.025,master_df.refName.iloc[i],ha="center",color="white",zorder=6,fontweight="bold")
    elif master_df.refName.iloc[i] == "Takashi Usami":
        ax.text(dx[i],dy[i]+0.025,master_df.refName.iloc[i],ha="center",color="white",zorder=6,fontweight="bold")
    elif master_df.refName.iloc[i] == "Kenan Karaman":
        ax.text(dx[i]+0.25,dy[i],master_df.refName.iloc[i],ha="left",color="white",zorder=6,fontweight="bold")
    elif master_df.refName.iloc[i] == "Raphael Wolf":
        ax.text(dx[i]+0.25,dy[i],master_df.refName.iloc[i],ha="left",color="white",zorder=6,fontweight="bold")
    elif master_df.refName.iloc[i] == "Robin Bormuth":
        ax.text(dx[i]-0.25,dy[i],master_df.refName.iloc[i],ha="right",color="white",zorder=6,fontweight="bold")
    else:    
        ax.text(dx[i],dy[i]-0.035,master_df.refName.iloc[i],ha="center",color="white",zorder=6,fontweight="bold")


ax.text(26.5,1.05,"Peak Years",color="white",fontweight="bold",fontsize=16,zorder=7, ha="center")
ax.set_ylabel("Share of Minutes Played",fontsize=14,fontweight="bold",color="white")
ax.set_xlabel("Age",fontsize=14,fontweight="bold",color="white")
fig.text(0.05,1.1,str(master_df.team.iloc[0])+" | Squad Age Profile",color="white",fontsize=18,fontweight="bold")
fig.text(0.05,1,"\n Bundesliga | Season 2018/2019\n\n", color="white",fontsize=14)
fig.text(0.85,1.1,"@your_twitter_handle",color="white",fontsize=14,fontweight="bold")


plt.tight_layout()
plt.savefig("age_utility_matrix",facecolor=bg,bbox_inches="tight")
plt.show()