Skip to content

Instantly share code, notes, and snippets.

@rohitrajiit
Created January 19, 2023 10:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rohitrajiit/539f0c8cae6ec920cdc75dae6bb7076d to your computer and use it in GitHub Desktop.
Save rohitrajiit/539f0c8cae6ec920cdc75dae6bb7076d to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import pandas as pd
def matchbymatch(id):
urlformat = 'https://stats.espncricinfo.com/ci/engine/player/{}.html?class=2;template=results;type=allround;view=match'.format(id)
page = requests.get(urlformat)
bs = BeautifulSoup(page.content, 'lxml')
rows = bs.find_all('tr',class_='data1')
data = []
for row in rows:
td = row.find_all('td')
datum =[x.text.strip() for x in td]
link = td[-1].select('a')[0]['href']
datum.append(link)
path = url2 + link
matchpage = requests.get(path)
bs2 = BeautifulSoup(matchpage.content, 'lxml')
totalrun = 0
for tag in bs2.find_all('td',string="TOTAL"):
parent = tag.find_parent("tr")
tds = parent.find_all('td')
totalrun = totalrun + int(tds[2].text.split('/')[0])
datum.append(totalrun)
data.append(datum)
cols = ['Bat1','Wkts', 'Conc', 'Ct','St','dummy' ,'Opposition', 'Ground', 'Start Date','odinumber','matchurl', 'totalruns']
df = pd.DataFrame(data[1:],columns=cols)
return df
a = pd.read_csv('cricinfo.csv')
for c,b in a.iterrows():
cricket[b['player']]= matchbymatch(b['id'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment