This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import requests | |
from bs4 import BeautifulSoup | |
from time import sleep | |
def get_table(round, url=url): | |
round_url = f'{url}/{round}' | |
page = requests.get(round_url) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from urllib.request import urlopen | |
from bs4 import BeautifulSoup | |
all_quotes = [] | |
for i in range(1, 11): | |
url = f'https://quotes.toscrape.com/page/{i}/' | |
page = urlopen(url) | |
soup = BeautifulSoup(page, 'html.parser') | |
quotes = soup.find_all('div', class_='quote') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<span class="text" itemprop="text">“The text is here.”</span> | |
<span>by <small class="author" itemprop="author">Albert Einstein</small> | |
<a href="/author/Albert-Einstein">(about)</a> | |
</span> | |
<div class="tags"> | |
Tags: | |
<meta class="keywords" itemprop="keywords" content="change,deep-thoughts,thinking,world"> | |
<a class="tag" href="/tag/change/page/1/">change</a> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork"> | |
...</div> | |
<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork"> | |
...</div> | |
<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork"> | |
...</div> | |
<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork"> | |
...</div> | |
<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork"> | |
...</div> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
columns = ['date', 'home_team', 'away_team', 'home_score', 'away_score'] | |
for stat in stats_check: | |
columns.append(f'home_{stat}') | |
columns.append(f'away_{stat}') | |
dataset = pd.DataFrame(season, columns=columns) | |
dataset.to_csv('Premier_league_19_20.csv', index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
match = [date, home_team, away_team, home_score, away_score, home_stats['possession_%'], away_stats['possession_%'], | |
home_stats['shots_on_target'], away_stats['shots_on_target'], home_stats['shots'], away_stats['shots'], | |
home_stats['touches'], away_stats['touches'], home_stats['passes'], away_stats['passes'], | |
home_stats['tackles'], away_stats['tackles'], home_stats['clearances'], away_stats['clearances'], | |
home_stats['corners'], away_stats['corners'], home_stats['offsides'], away_stats['offsides'], | |
home_stats['yellow_cards'], away_stats['yellow_cards'], home_stats['red_cards'], away_stats['red_cards'], | |
home_stats['fouls_conceded'], away_stats['fouls_conceded']] | |
season.append(match) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
stats_check = ['possession_%', 'shots_on_target', 'shots', 'touches', 'passes', | |
'tackles', 'clearances', 'corners', 'offsides', 'yellow_cards', | |
'red_cards', 'fouls_conceded'] | |
for stat in stats_check: | |
if stat not in home_stats.keys(): | |
home_stats[stat] = 0 | |
away_stats[stat] = 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
home_stats = {} | |
away_stats = {} | |
home_series = stats[home_team] | |
away_series = stats[away_team] | |
stats_series = stats['Unnamed: 1'] | |
for row in zip(home_series, stats_series, away_series): | |
stat = row[1].replace(' ', '_').lower() | |
home_stats[stat] = row[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
date = WebDriverWait(driver, 20).until(EC.element_to_be_clickable(( | |
By.XPATH, '//*[@id="mainContent"]/div/section/div[2]/section/div[1]/div/div[1]/div[1]'))).text | |
date = datetime.strptime(date, '%a %d %b %Y').strftime('%m/%d/%Y') | |
home_team = driver.find_element_by_xpath( | |
'//*[@id="mainContent"]/div/section/div[2]/section/div[3]/div/div/div[1]/div[1]/a[2]/span[1]').text | |
away_team = driver.find_element_by_xpath( | |
'//*[@id="mainContent"]/div/section/div[2]/section/div[3]/div/div/div[1]/div[3]/a[2]/span[1]').text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
elem = WebDriverWait(driver, 20).until( | |
EC.element_to_be_clickable((By.XPATH, "//ul[@class='tablist']//li[@data-tab-index='2']"))) | |
elem.click() | |
sleep(3) | |
dfs = pd.read_html(driver.page_source) | |
stats = dfs[-1] | |
driver.quit() |