Skip to content

Instantly share code, notes, and snippets.

@salayhin
Created November 3, 2015 19:02
Show Gist options
  • Save salayhin/7aaf34fa4fc29e9d8ed1 to your computer and use it in GitHub Desktop.
Save salayhin/7aaf34fa4fc29e9d8ed1 to your computer and use it in GitHub Desktop.
# Note - this code must run in Python 2.x and you must download
# http://www.pythonlearn.com/code/BeautifulSoup.py
# Into the same folder as this program
import urllib
from BeautifulSoup import *
position = raw_input('Enter Position - ')
count = raw_input('Enter Count - ')
url_first = raw_input('Enter - ')
url = ''
# Retrieve all of the anchor tags
s = []
for x in range(0, int(count)):
if x == 0:
html = urllib.urlopen(url_first).read()
soup = BeautifulSoup(html)
tags = soup('a')
tag = tags[int(position) - 1]
url = tag.get('href', None)
s.append(tag.contents[0])
elif x > 0:
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)
tags = soup('a')
tag = tags[int(position) - 1]
url = tag.get('href', None)
s.append(tag.contents[0])
#for tag in tags:
# print tag.get('href', None)
print s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment