Skip to content

Instantly share code, notes, and snippets.

@octoparse
Created November 11, 2019 04:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save octoparse/30dcdbdb54c3098c286c4bd5e76f25e4 to your computer and use it in GitHub Desktop.
Save octoparse/30dcdbdb54c3098c286c4bd5e76f25e4 to your computer and use it in GitHub Desktop.
Scraping fantasy football projections
from bs4 import BeautifulSoup
import re
import requests
def get_html_data(url):
response = requests.get(url)
return BeautifulSoup(response.content, "html5lib")
def scrape(return_list=None):
url = 'https://fantasy.nfl.com/research/projections?offset={0}&position=1&sort=projectedPts&statCategory=projectedStats&statSeason=2019&statType=seasonProjectedStats&statWeek=10'
regex = re.compile('player-.+')
regex2 = re.compile('playerCard playerName playerNameFull playerNameId-.+')
num = 1
while num < 139:
_url = url.format(num)
soup = get_html_data(_url)
trs = soup.find_all('tr', {'class': regex})
for tr in trs:
name = tr.find('a', {'class': regex2}).text
a = tr.find('td', {'class': 'stat stat_1 numeric'}).text
b = tr.find('td', {'class': 'stat stat_5 numeric'}).text
c = tr.find('td', {'class': 'stat stat_6 numeric'}).text
d = tr.find('td', {'class': 'stat stat_7 numeric'}).text
e = tr.find('td', {'class': 'stat stat_14 numeric'}).text
f = tr.find('td', {'class': 'stat stat_15 numeric'}).text
g = tr.find('td', {'class': 'stat stat_20 numeric'}).text
h = tr.find('td', {'class': 'stat stat_21 numeric'}).text
i = tr.find('td', {'class': 'stat stat_22 numeric'}).text
j = tr.find('td', {'class': 'stat stat_28 numeric'}).text
k = tr.find('td', {'class': 'stat stat_29 numeric'}).text
l = tr.find('td', {'class': 'stat stat_32 numeric'}).text
m = tr.find('td', {'class': 'stat stat_30 numeric'}).text
n = tr.find('td', {'class': 'stat projected numeric sorted last'}).text
if return_list:
return_list.append(name, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
else:
print '{}, {},{},{},{},{},{},{},{},{},{},{},{},{},{}'.format(name, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
num += 25
def main():
return scrape()
if __name__ == '__main__':
main()
@scrapingdata
Copy link

Nice information octaparse. can you share a detailed guide about what is web scraping and what are the best tools for web scraping?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment