Skip to content

Instantly share code, notes, and snippets.

@Pavelovich
Created August 9, 2014 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Pavelovich/cb82c0105846b8e74f63 to your computer and use it in GitHub Desktop.
Save Pavelovich/cb82c0105846b8e74f63 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
from bs4 import BeautifulSoup
import requests
import time
req = requests.get("http://rationalwiki.org/wiki/RationalWiki:Saloon_bar")
res = req.text
res = BeautifulSoup(res)
#sec = req.find_all("h2", 'p')
#p = req.find_all('p')
links = [0] * 5000
c = 0
for i in res.find_all('a'):
links[c] = i.get("href")
c = c + 1
links = [i for i in links if i != 0]
#links = [i for i in links if i != i.startswith("/wiki")]
c = 0
for i in links:
if not i.startswith("/wiki"):
links.remove(i)
c = c + 1
#links = [i for i in links if not any(i.startswith("/wiki"))]
print links
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment