Skip to content

Instantly share code, notes, and snippets.

@remotalks
Last active August 29, 2015 14:08
from bs4 import BeautifulSoup
import urllib2
class Bing:
def __init__(self):
self.__bing_url = "http://www.bing.com/search?q=%s&first=%s"
def search(self, _s_search):
i_page = 1
urls = []
while True:
try:
i_len_urls = len(urls)
response = urllib2.urlopen(self.__bing_url % (_s_search, i_page))
parsed_response = BeautifulSoup(response)
for h in parsed_response.findAll("h2"):
if h.a != None:
s_url = h.a['href']
if s_url in urls:
continue
elif s_url.startswith("http://") or s_url.startswith("https://") or s_url.startswith("ftp://"):
urls.append(s_url)
if len(urls) <= i_len_urls:
break
i_page = i_page + 10
except:
pass
return urls
b = Bing()
urls = b.search("bing")
print len(urls)
print '\n'.join(urls)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment