Skip to content

Instantly share code, notes, and snippets.

@scumola
Created November 3, 2018 17:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scumola/fe98df75440ebf0c68b57a2b5165ecfa to your computer and use it in GitHub Desktop.
Save scumola/fe98df75440ebf0c68b57a2b5165ecfa to your computer and use it in GitHub Desktop.
cpscraper.py
#!/usr/bin/python
from bs4 import BeautifulSoup as BS
import requests
import re
import time
import sys
import gspread
from oauth2client.service_account import ServiceAccountCredentials
scope = "https://spreadsheets.google.com/feeds"
credentials = ServiceAccountCredentials.from_json_keyfile_name('credential.json', scope)
gs = gspread.authorize(credentials)
gsheet = gs.open_by_key('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
wsheet = gsheet.worksheet("Sheet1")
def removeTag(raw_text):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', raw_text)
return cleantext
teams = [
"11-xxxx",
"11-xxxx",
"11-xxxx"
]
teamnum = 1
for team in teams:
teamnum += 1
cellnumber = "".join(("A",str(teamnum)))
# print cellnumber
wsheet.update_acell(cellnumber, team)
url="http://scoreboard.uscyberpatriot.org/team.php?team="+team
print url
try:
page = requests.get(url)
except:
print("[!]Error: Webpage is unavailable...")
sys.exit()
html = BS(page.content, 'html.parser')
start = 25
end = 32
placement = 1
startrow = 2
R = 1
while True:
test = html.find_all('td')[start:end]
if not len(test) == 0:
L = []
for x in test:
x = str(x)
x = removeTag(x)
if x.isdigit():
x = float(x)
else:
pass
L.append(x)
# print L
if (len(L) == 7):
print " ** image",L[0],L[5]
if "Ubuntu" in L[0]:
column = "F"
if "Windows" in L[0]:
column = "D"
if "Server" in L[0]:
column = "E"
if "Cisco" in L[0]:
column = "H"
rownum = teamnum
cellnumber = "".join((column,str(rownum)))
wsheet.update_acell(cellnumber, L[5])
start += 7
end += 7
placement += 1
R += 1
else:
break
time.sleep(2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment