Skip to content

Instantly share code, notes, and snippets.

@leifulstrup
Created January 15, 2018 13:18
Show Gist options
  • Save leifulstrup/eab4d36538d90b27e200c2e0833a9dd6 to your computer and use it in GitHub Desktop.
Save leifulstrup/eab4d36538d90b27e200c2e0833a9dd6 to your computer and use it in GitHub Desktop.
getLikelyURL(vendorname)
import requests
from bs4 import BeautifulSoup
import re
likelyURLlookup = {} # use dynamic programming to build a lookup table
def getLikelyURL(vendorname):
try:
likelyURLlookup[vendorname]
except KeyError:
page = requests.get('http://google.com/search?q=' + ' '.join(vendorname))
soup = BeautifulSoup(page.content, "html.parser")
links = soup.findAll("a")
topLink = soup.find_all("a",href=re.compile("(?<=/url\?q=)(htt.*://.*)"))[0]
urlExtended = re.split(":(?=http)",topLink["href"].replace("/url?q=",""))[0]
likelyURLlookup[vendorname] = urlExtended.split('&')[0]
return likelyURLlookup[vendorname]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment