Plotting xy football data in Python

Today we will look at plotting some xy football data. Last time we took a look at drawing football pitches in python. We’ll use that code here again to plot some match data. Instead of having to type all of that code each time we want to make a plot we can write it as a function. I want to be more flexible in the views I have available, so I’ve included an option to view the full pitch (“full"), or the opposition half (“half"). I really like vertical pitch views so I’ll add in an option to plot in that aspect too (“v" for vertical, “h" for horizontal).

import matplotlib.pyplot as plt

def draw_pitch(pitch, line, orientation,view):
    
    orientation = orientation
    view = view
    line = line
    pitch = pitch
    
    if orientation.lower().startswith("h"):
        
        if view.lower().startswith("h"):
            fig,ax = plt.subplots(figsize=(6.8,10.4))
            plt.xlim(49,105)
            plt.ylim(-1,69)
        else:
            fig,ax = plt.subplots(figsize=(10.4,6.8))
            plt.xlim(-1,105)
            plt.ylim(-1,69)
        ax.axis('off') # this hides the x and y ticks
    
        # side and goal lines #
        ly1 = [0,0,68,68,0]
        lx1 = [0,104,104,0,0]

        plt.plot(lx1,ly1,color=line,zorder=5)


        # boxes, 6 yard box and goals

            #outer boxes#
        ly2 = [13.84,13.84,54.16,54.16] 
        lx2 = [104,87.5,87.5,104]
        plt.plot(lx2,ly2,color=line,zorder=5)

        ly3 = [13.84,13.84,54.16,54.16] 
        lx3 = [0,16.5,16.5,0]
        plt.plot(lx3,ly3,color=line,zorder=5)

            #goals#
        ly4 = [30.34,30.34,37.66,37.66]
        lx4 = [104,104.2,104.2,104]
        plt.plot(lx4,ly4,color=line,zorder=5)

        ly5 = [30.34,30.34,37.66,37.66]
        lx5 = [0,-0.2,-0.2,0]
        plt.plot(lx5,ly5,color=line,zorder=5)


           #6 yard boxes#
        ly6 = [24.84,24.84,43.16,43.16]
        lx6 = [104,99.5,99.5,104]
        plt.plot(lx6,ly6,color=line,zorder=5)

        ly7 = [24.84,24.84,43.16,43.16]
        lx7 = [0,4.5,4.5,0]
        plt.plot(lx7,ly7,color=line,zorder=5)

        #Halfway line, penalty spots, and kickoff spot
        ly8 = [0,68] 
        lx8 = [52,52]
        plt.plot(lx8,ly8,color=line,zorder=5)


        plt.scatter(93,34,color=line,zorder=5)
        plt.scatter(11,34,color=line,zorder=5)
        plt.scatter(52,34,color=line,zorder=5)

        circle1 = plt.Circle((93.5,34), 9.15,ls='solid',lw=1.5,color=line, fill=False, zorder=1,alpha=1)
        circle2 = plt.Circle((10.5,34), 9.15,ls='solid',lw=1.5,color=line, fill=False, zorder=1,alpha=1)
        circle3 = plt.Circle((52, 34), 9.15,ls='solid',lw=1.5,color=line, fill=False, zorder=2,alpha=1)

        ## Rectangles in boxes
        rec1 = plt.Rectangle((87.5,20), 16,30,ls='-',color=pitch, zorder=1,alpha=1)
        rec2 = plt.Rectangle((0, 20), 16.5,30,ls='-',color=pitch, zorder=1,alpha=1)

        ## Pitch rectangle
        rec3 = plt.Rectangle((-1, -1), 106,70,ls='-',color=pitch, zorder=1,alpha=1)

        ax.add_artist(rec3)
        ax.add_artist(circle1)
        ax.add_artist(circle2)
        ax.add_artist(rec1)
        ax.add_artist(rec2)
        ax.add_artist(circle3)
        
    else:
        if view.lower().startswith("h"):
            fig,ax = plt.subplots(figsize=(10.4,6.8))
            plt.ylim(49,105)
            plt.xlim(-1,69)
        else:
            fig,ax = plt.subplots(figsize=(6.8,10.4))
            plt.ylim(-1,105)
            plt.xlim(-1,69)
        ax.axis('off') # this hides the x and y ticks

        # side and goal lines #
        lx1 = [0,0,68,68,0]
        ly1 = [0,104,104,0,0]

        plt.plot(lx1,ly1,color=line,zorder=5)


        # boxes, 6 yard box and goals

            #outer boxes#
        lx2 = [13.84,13.84,54.16,54.16] 
        ly2 = [104,87.5,87.5,104]
        plt.plot(lx2,ly2,color=line,zorder=5)

        lx3 = [13.84,13.84,54.16,54.16] 
        ly3 = [0,16.5,16.5,0]
        plt.plot(lx3,ly3,color=line,zorder=5)

            #goals#
        lx4 = [30.34,30.34,37.66,37.66]
        ly4 = [104,104.2,104.2,104]
        plt.plot(lx4,ly4,color=line,zorder=5)

        lx5 = [30.34,30.34,37.66,37.66]
        ly5 = [0,-0.2,-0.2,0]
        plt.plot(lx5,ly5,color=line,zorder=5)


           #6 yard boxes#
        lx6 = [24.84,24.84,43.16,43.16]
        ly6 = [104,99.5,99.5,104]
        plt.plot(lx6,ly6,color=line,zorder=5)

        lx7 = [24.84,24.84,43.16,43.16]
        ly7 = [0,4.5,4.5,0]
        plt.plot(lx7,ly7,color=line,zorder=5)

        #Halfway line, penalty spots, and kickoff spot
        lx8 = [0,68] 
        ly8 = [52,52]
        plt.plot(lx8,ly8,color=line,zorder=5)


        plt.scatter(34,93,color=line,zorder=5)
        plt.scatter(34,11,color=line,zorder=5)
        plt.scatter(34,52,color=line,zorder=5)

        circle1 = plt.Circle((34,93.5), 9.15,ls='solid',lw=1.5,color=line, fill=False, zorder=1,alpha=1)
        circle2 = plt.Circle((34,10.5), 9.15,ls='solid',lw=1.5,color=line, fill=False, zorder=1,alpha=1)
        circle3 = plt.Circle((34,52), 9.15,ls='solid',lw=1.5,color=line, fill=False, zorder=2,alpha=1)


        ## Rectangles in boxes
        rec1 = plt.Rectangle((20, 87.5), 30,16.5,ls='-',color=pitch, zorder=1,alpha=1)
        rec2 = plt.Rectangle((20, 0), 30,16.5,ls='-',color=pitch, zorder=1,alpha=1)

        ## Pitch rectangle
        rec3 = plt.Rectangle((-1, -1), 70,106,ls='-',color=pitch, zorder=1,alpha=1)

        ax.add_artist(rec3)
        ax.add_artist(circle1)
        ax.add_artist(circle2)
        ax.add_artist(rec1)
        ax.add_artist(rec2)
        ax.add_artist(circle3)
        
        

We can now use draw_pitch to get our viz:

    draw_pitch("#195905","#faf0e6","h","full)

Plotting Match Events

If your dataset has x,y locations, it’s relatively straightforward to add them to our viz. Firstly, for those who do not have access to x,y data, we’ll generate some random x,y points:

    ## generate random data

    import random

    sample = 20

    x = [random.randint(0, 104) for p in range(0, sample)]
    y = [random.randint(0,68) for p in range(0,sample)]

First we state how many numbers we would like to generate

    sample = 20

Next, for x and y, we use random.randint – a random integer – from lowest number (0) to highest number (104 for x and 68 for y). Note that if we are plotting our pitch vertically the highest numbers for x and y would be reversed.

    x = [random.randint(0, 104) for p in range(0, sample)]
    y = [random.randint(0,68) for p in range(0,sample)]

Lastly, we should set a zorder variable to ensure our data is on the top layer of our viz.

    zo = 12

we can then plot these points using matplotlib’s scatter:

    draw_pitch(draw_pitch("#195905","#faf0e6","horizontal","full")
    plt.scatter(x,y,marker='o',color='red',edgecolors="black, zorder=zo)
    plt.show()
 

Let’s see how this looks vertically.

    draw_pitch("#195905","#faf0e6","vertical","full")
    y1 = [68 - i for i in y]
    plt.scatter(y1,x,c="red",edgecolors="k",zorder=zo)
    plt.show()

**NB when plotting vertically, remember to reverse the y-axis

Plotting Shots

Now that we have everything in place, let’s look at plotting some shots. Firstly, we will do this with randomly generated data, then we’ll look at a couple of real examples. Again, I’ll be using the vertical pitch here because it’s my blog and I’ll do what I want.

    draw_pitch("#195905","#faf0e6","vertical","half")
.   sample = 20
    x = [random.randint(72, 104) for p in range(0, sample)]
    y = [random.randint(10,58) for p in range(0,sample)]
    y1 = [68 - i for i in y]
    plt.scatter(y1,x,c="red",edgecolors="k",zorder=zo)
    plt.show()

We can customise this plot further. Let’s add a new variable z to show expected goals. As this is our random dataset, we’ll generate a new range of values between 0 and 1 to imitate expected goals percentage chance.

    z = [random.uniform(0,1) for p in range(0,sample)]
    z1 = [500 * i for i in z] # This is to scale the "xG" values for plotting
    plt.scatter(y1,x,s=z1, c="red",edgecolors="k",zorder=zo)
plt.show()

The above viz shows us our random shot locations with each shot sized by its xG value. To contextualise these, we will add a small legend in the bottom left corner:

    mSize = [0.05,0.10,0.2,0.4,0.6,1] 
    mSizeS = [500 * i for i in mSize]
    mx = [5.5,7,9,11,13.5,16.5]
    my = [60,60,60,60,60,60]
    plt.scatter(mx,my,s=mSizeS,facecolors="white",
                edgecolor="white",zorder=zo)
    plt.plot([5.5,17],
             [57,57],color="white",lw=2,zorder=zo)
    i = 0
    for i in range(len(mx)):
        plt.text(mx[i],my[i],mSize[i],
                 fontsize=mSize[i]*18,color="#195905",
                 zorder=zo,ha="center",va="center")
    plt.text(11,55,"xG",color="white",
    ha="center",va="center",zorder=zo,fontsize=16)
    plt.show()

Real Match Example

The match we will use for our example is De Graafschap vs Vitesse, which was played on the 23rd December 2018.

    #Get home and away teams
    home, away = shotsModel.homeTeam.iloc[0], shotsModel.awayTeam.iloc[0]
.   #Get home and away shots
    homeShots = shotsModel[shotsModel['teamName'] == home]
    awayShots = shotsModel[shotsModel['teamName'] == away]
    #Get Goals
.   hg = int(homeShots.isGoal.sum())
    ag = int(awayShots.isGoal.sum())

De Graafschap was at home, so we’ll plot their shot map first. There’s a good bit of code to get into, so I won’t go through it all. For this article, I just want to highlight splitting up the shots, plotting them, and adding a title to the plot. In my next article I will take a look at using colour scales, and creating legends of our own. 

First I want to is separate the headers, shots with foot, or other body part. This match only saw shots with foot / head so I will not include the code for other below:

    hHead = homeShots[homeShots['isHead'] == 1]
hFoot = homeShots[homeShots['isHead'] == 0]

Next I want to break down these dataframes again into shot outcomes – goal, miss, save, shot on post (shot on post excluded below)

    hHGoal = hHead[hHead['Action'] == "Goal"]
hHMissed = hHead[hHead['Action'] == "MissedShots"]
hHSaved = hHead[hHead['Action'] == "SavedShot"]
hFGoal = hFoot[(hFoot['Action'] == "Goal")&(hFoot['isFreeKick'] == 0)]
hFMissed = hFoot[(hFoot['Action'] == "MissedShots")&(hFoot['isFreeKick'] == 0)]
hFSaved = hFoot[(hFoot['Action'] == "SavedShot")&(hFoot['isFreeKick'] == 0)]
hFKGoal = hFoot[(hFoot['Action'] == "Goal")&(hFoot['isFreeKick'] == 1)]
hFKMissed = hFoot[(hFoot['Action'] == "MissedShots")&(hFoot['isFreeKick'] == 1)]
hFKSaved = hFoot[(hFoot['Action'] == "SavedShot")&(hFoot['isFreeKick'] == 1)]
 

Now we check if each of these shot situations occurred, and if so we plot it. I’m only adding the code for shots with foot below for sake of brevity:

    if len(hFGoal) > 0:
        plt.scatter(hFGoal.yM_r, hFGoal.xM,
        s=hFGoal.xG_s,marker='H',c=hFGoal.colors,
        edgecolors="white",zorder=zo)
        plt.scatter(hFGoal.yM_r, hFGoal.xM,
        s=hFGoal.xG*2000,marker='H',facecolors="none",
        edgecolors="white",zorder=zo+1)
    if len(hFMissed) > 0:
        plt.scatter(hFMissed.yM_r, hFMissed.xM,
        s=hFMissed.xG_s,marker='H',facecolors="none",
        edgecolors="white",zorder=zo)

    if len(hFSaved) > 0:
        plt.scatter(hFSaved.yM_r, hFSaved.xM,
        s=hFSaved.xG_s,marker='H',c=hFSaved.colors,
        edgecolors="white",zorder=zo)

Just as before we are creating scatter plots. However, there are some new features here. xG_s is a column of scaled xG to make them larger in the plot, yM_r is our reversed y-axis for vertical plotting, and marker= lets us choose which marker style we want to use. You can find the options available to you here.

For some of the shots I decided not to use a colour. This is controlled by facecolors="none". Whether this was the correct decision is down to personal taste, but there are a lot of choices to be made with these charts. Is it better to assign a colour for each outcome? Should xG be shown by colour, or size, or both? Should shot type be shown by colour or shape or both?

I chose the above style because I find it aesthetically pleasing and I think it conveys the important information without the need for too much searching. Are there ways this could be improved? Definitely. Some of the ways this could be done will be looked at in the next post. Anyway, below is De Graafschap’s shot map with the xG totals from the model we created last week.

Before I finish up, here’s Vitesse’s shots map. Our titles use the team name variables we made earlier. 

    plt.title(str(away)+" "+str(round(sum(awayShots.XG),2))+" xG")
 

Today’s post was quite long, so I’ll leave it here. Next time we will take a look at plotting passes and assists, as well using colorbars and some other features to describe matches. As usual, if you have any questions or complaints, feel free to send me a message here or on twitter here.



Liked it? Take a second to support petermckeever on Patreon!
No Comments

Post A Comment