Skip to content

Instantly share code, notes, and snippets.

@VAD3R-95
Created August 24, 2022 14:02
Show Gist options
  • Save VAD3R-95/bcafd3f38f2e617fdca3bf12b19fb1fc to your computer and use it in GitHub Desktop.
Save VAD3R-95/bcafd3f38f2e617fdca3bf12b19fb1fc to your computer and use it in GitHub Desktop.
This scrapper is used to scrap the football data from transfermarkt.com
import requests as req
import pandas as pd
from bs4 import BeautifulSoup
import time
import re
df_players = pd.DataFrame()
for yr in range(1998, 2022):
year = str(yr)
url = 'https://www.transfermarkt.us/spieler-statistik/jahrestorschuetzen/statistik?jahr='+year+'&wettbewerb=alle&monatVon=01&monatBis=12&altersklasse=&spielerposition_id=0&art=0&ausrichtung=alle&land_id=0&compFilter=&plus=1'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
page = req.get(url, headers=headers, allow_redirects=True)
print(year+': '+str(page.status_code))
if page.status_code == 200:
soup = BeautifulSoup(page.content, "html.parser")
tab = soup.find_all("td",{"class":"hauptlink"})
img_club = soup.find_all("img",{"class":"tiny_wappen"})
dic_logos={}
list_logos=[]
list_new =[]
for img in img_club:
urls = re.findall(r'src=[\'"]?([^\'" >]+)', str(img))
img_link = (', '.join(urls))
team = re.findall(r'title="(.*?)"/>', str(img))
team_name = (', '.join(team))
dic_logos['Club'] = team_name
dic_logos['Link'] = img_link
list_logos.append(dic_logos.copy())
for t in tab:
value = (t.getText().strip())
list_new.append(value)
names_list = list_new[0::3]
teams_list = list_new[1::3]
goals_list = list_new[2::3]
year_list = [year] * 25
list_year = pd.DataFrame(
{'Year' : year_list,
'Player_Name': names_list,
'Club' : teams_list,
'Goals' : goals_list
})
df_img = pd.DataFrame(list_logos, columns=["Club", "Link",])
df_players = df_players.append(list_year,ignore_index = True)
time.sleep(2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment