rohitrajiit/cricinfoscraping.py

## cricinfoscraping.py
import requests
from bs4 import BeautifulSoup
import pandas as pd

def matchbymatch(id):
    urlformat = 'https://stats.espncricinfo.com/ci/engine/player/{}.html?class=2;template=results;type=allround;view=match'.format(id)
    page = requests.get(urlformat)
    bs = BeautifulSoup(page.content, 'lxml')
    rows = bs.find_all('tr',class_='data1')
    data = []
    for row in rows:
        td = row.find_all('td')
        datum =[x.text.strip() for x in td]
        link = td[-1].select('a')[0]['href']
        datum.append(link)
        path = url2 + link
        matchpage = requests.get(path)
        bs2 = BeautifulSoup(matchpage.content, 'lxml')
        totalrun = 0
        for tag in bs2.find_all('td',string="TOTAL"):
            parent = tag.find_parent("tr")
            tds = parent.find_all('td')
            totalrun = totalrun + int(tds[2].text.split('/')[0])
        datum.append(totalrun)
        data.append(datum)
    cols = ['Bat1','Wkts', 'Conc', 'Ct','St','dummy' ,'Opposition', 'Ground', 'Start Date','odinumber','matchurl', 'totalruns']
    df = pd.DataFrame(data[1:],columns=cols)
    return df

 a = pd.read_csv('cricinfo.csv')

 for c,b in a.iterrows():
    cricket[b['player']]= matchbymatch(b['id'])
	import requests
	from bs4 import BeautifulSoup
	import pandas as pd

	def matchbymatch(id):
	urlformat = 'https://stats.espncricinfo.com/ci/engine/player/{}.html?class=2;template=results;type=allround;view=match'.format(id)
	page = requests.get(urlformat)
	bs = BeautifulSoup(page.content, 'lxml')
	rows = bs.find_all('tr',class_='data1')
	data = []
	for row in rows:
	td = row.find_all('td')
	datum =[x.text.strip() for x in td]
	link = td[-1].select('a')[0]['href']
	datum.append(link)
	path = url2 + link
	matchpage = requests.get(path)
	bs2 = BeautifulSoup(matchpage.content, 'lxml')
	totalrun = 0
	for tag in bs2.find_all('td',string="TOTAL"):
	parent = tag.find_parent("tr")
	tds = parent.find_all('td')
	totalrun = totalrun + int(tds[2].text.split('/')[0])
	datum.append(totalrun)
	data.append(datum)
	cols = ['Bat1','Wkts', 'Conc', 'Ct','St','dummy' ,'Opposition', 'Ground', 'Start Date','odinumber','matchurl', 'totalruns']
	df = pd.DataFrame(data[1:],columns=cols)
	return df

	a = pd.read_csv('cricinfo.csv')

	for c,b in a.iterrows():
	cricket[b['player']]= matchbymatch(b['id'])