Skip to content

Instantly share code, notes, and snippets.

@Zulko
Created November 21, 2016 14:54
Show Gist options
  • Save Zulko/63dd041659398cf5a74f768b68598b9d to your computer and use it in GitHub Desktop.
Save Zulko/63dd041659398cf5a74f768b68598b9d to your computer and use it in GitHub Desktop.
Goes through the NEB website and compiles all enzymes prices into one spreadsheet
import urllib
import time
from Bio import Restriction
from bs4 import BeautifulSoup
from tqdm import tqdm
import pandas
def get_neb_soup(endpoint):
with urllib.request.urlopen("https://www.neb.com" + endpoint) as response:
soup = BeautifulSoup(response.read(), 'html.parser')
return soup
enzymes_catalogue = get_neb_soup("/products/restriction-endonucleases")
enzymes = [
(a.text, a.attrs["href"])
for a in enzymes_catalogue.find_all("a")
if a.attrs["href"].startswith("/products/r")
and a.text in Restriction.__dict__
]
results = []
for enz, endpoint in tqdm(enzymes):
enzyme_page = get_neb_soup(endpoint)
for td in enzyme_page.find_all("td", {"class": "sku"}):
results.append([enz]+[t.text for t in td.parent.find_all("td")[1:-2]])
time.sleep(2)
data = pandas.DataFrame(results, columns=["enzyme", "units", "concentration", "price"])
num_price = data["price"].apply(lambda s: float(s[1:]))
num_units = data["units"].apply(lambda s: int(s.replace(",","").split(" ")[0]))
data["price_per_unit"] = num_price/num_units
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment