Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from time import sleep
def get_table(round, url=url):
round_url = f'{url}/{round}'
page = requests.get(round_url)
@otavio-s-s
otavio-s-s / gs3.py
Last active November 2, 2020 02:54
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
all_quotes = []
for i in range(1, 11):
url = f'https://quotes.toscrape.com/page/{i}/'
page = urlopen(url)
soup = BeautifulSoup(page, 'html.parser')
quotes = soup.find_all('div', class_='quote')
<span class="text" itemprop="text">“The text is here.”</span>
<span>by <small class="author" itemprop="author">Albert Einstein</small>
<a href="/author/Albert-Einstein">(about)</a>
</span>
<div class="tags">
Tags:
<meta class="keywords" itemprop="keywords" content="change,deep-thoughts,thinking,world">
<a class="tag" href="/tag/change/page/1/">change</a>
<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
...</div>
<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
...</div>
<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
...</div>
<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
...</div>
<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
...</div>
columns = ['date', 'home_team', 'away_team', 'home_score', 'away_score']
for stat in stats_check:
columns.append(f'home_{stat}')
columns.append(f'away_{stat}')
dataset = pd.DataFrame(season, columns=columns)
dataset.to_csv('Premier_league_19_20.csv', index=False)
match = [date, home_team, away_team, home_score, away_score, home_stats['possession_%'], away_stats['possession_%'],
home_stats['shots_on_target'], away_stats['shots_on_target'], home_stats['shots'], away_stats['shots'],
home_stats['touches'], away_stats['touches'], home_stats['passes'], away_stats['passes'],
home_stats['tackles'], away_stats['tackles'], home_stats['clearances'], away_stats['clearances'],
home_stats['corners'], away_stats['corners'], home_stats['offsides'], away_stats['offsides'],
home_stats['yellow_cards'], away_stats['yellow_cards'], home_stats['red_cards'], away_stats['red_cards'],
home_stats['fouls_conceded'], away_stats['fouls_conceded']]
season.append(match)
stats_check = ['possession_%', 'shots_on_target', 'shots', 'touches', 'passes',
'tackles', 'clearances', 'corners', 'offsides', 'yellow_cards',
'red_cards', 'fouls_conceded']
for stat in stats_check:
if stat not in home_stats.keys():
home_stats[stat] = 0
away_stats[stat] = 0
home_stats = {}
away_stats = {}
home_series = stats[home_team]
away_series = stats[away_team]
stats_series = stats['Unnamed: 1']
for row in zip(home_series, stats_series, away_series):
stat = row[1].replace(' ', '_').lower()
home_stats[stat] = row[0]
@otavio-s-s
otavio-s-s / PL6.py
Last active October 16, 2020 22:55
try:
date = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((
By.XPATH, '//*[@id="mainContent"]/div/section/div[2]/section/div[1]/div/div[1]/div[1]'))).text
date = datetime.strptime(date, '%a %d %b %Y').strftime('%m/%d/%Y')
home_team = driver.find_element_by_xpath(
'//*[@id="mainContent"]/div/section/div[2]/section/div[3]/div/div/div[1]/div[1]/a[2]/span[1]').text
away_team = driver.find_element_by_xpath(
'//*[@id="mainContent"]/div/section/div[2]/section/div[3]/div/div/div[1]/div[3]/a[2]/span[1]').text
elem = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, "//ul[@class='tablist']//li[@data-tab-index='2']")))
elem.click()
sleep(3)
dfs = pd.read_html(driver.page_source)
stats = dfs[-1]
driver.quit()