Skip to content

Instantly share code, notes, and snippets.

@hybridjosto
Last active August 6, 2018 11:22
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save hybridjosto/4573849 to your computer and use it in GitHub Desktop.
Save hybridjosto/4573849 to your computer and use it in GitHub Desktop.
scraping data from a web table using python and Beautiful Soup
import urllib2
from bs4 import BeautifulSoup
# http://segfault.in/2010/07/parsing-html-table-in-python-with-beautifulsoup/
f = open('cricket-data.txt','w')
linksFile = open("linksSource.txt")
lines = list(linksFile.readlines())
for i in lines[12:108]: #12:108
url = "http://www.gunnercricket.com/"+str(i)
try:
page = urllib2.urlopen(url)
except:
continue
soup = BeautifulSoup(page)
title = soup.title
date = title.string[:4]+',' #take first 4 characters from title
try:
table = soup.find('table')
rows = table.findAll('tr')
for tr in rows:
cols = tr.findAll('td')
text_data = []
for td in cols:
text = ''.join(td)
utftext = str(text.encode('utf-8'))
text_data.append(utftext) # EDIT
text = date+','.join(text_data)
f.write(text + '\n')
except:
pass
f.close()
@sxb1649
Copy link

sxb1649 commented Jan 15, 2018

import pandas as pd
from pandas import Series, DataFrame

from bs4 import BeautifulSoup
import json
import csv

import requests

import lxml

url = "http://espn.go.com/college-football/bcs/_/year/2013 "

result = requests.get(url)

c= result.content
soup = BeautifulSoup((c), "lxml")

soup.prettify()

summary = soup.find('table',attrs = {'class':'tablehead'})
tables = summary.find_all('table')

#tables = summary.fins_all('td' /'tr')

data =[]

rows = tables[0].findAll('tr')
'''
for tr in rows:
cols = tr.findAll('td')
for td in cols:
text = td.find(text = True)
print (text),
data.append(text)
'''
soup = BeautifulSoup((html), "lxml")
table = soup.find('table', attrs = {'class' : 'tablehead'})

list_of_rows=[]

for row in table.findAll('tr')[0:]:
list_of_cells=[]
for cell in findAll('td'):
text = cell.text.replace(' ','')
list_of_cells.append(text)
list_of_rows.append(list_of_cells)

outfile = open("./Rankings.csv", "wb")
writer = csv.writer(outfile)
writer.writerows(list_of_rows)

Can please you help me with this code? Am using python 3.5

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment