Created
November 21, 2016 14:54
-
-
Save Zulko/63dd041659398cf5a74f768b68598b9d to your computer and use it in GitHub Desktop.
Goes through the NEB website and compiles all enzymes prices into one spreadsheet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
import time | |
from Bio import Restriction | |
from bs4 import BeautifulSoup | |
from tqdm import tqdm | |
import pandas | |
def get_neb_soup(endpoint): | |
with urllib.request.urlopen("https://www.neb.com" + endpoint) as response: | |
soup = BeautifulSoup(response.read(), 'html.parser') | |
return soup | |
enzymes_catalogue = get_neb_soup("/products/restriction-endonucleases") | |
enzymes = [ | |
(a.text, a.attrs["href"]) | |
for a in enzymes_catalogue.find_all("a") | |
if a.attrs["href"].startswith("/products/r") | |
and a.text in Restriction.__dict__ | |
] | |
results = [] | |
for enz, endpoint in tqdm(enzymes): | |
enzyme_page = get_neb_soup(endpoint) | |
for td in enzyme_page.find_all("td", {"class": "sku"}): | |
results.append([enz]+[t.text for t in td.parent.find_all("td")[1:-2]]) | |
time.sleep(2) | |
data = pandas.DataFrame(results, columns=["enzyme", "units", "concentration", "price"]) | |
num_price = data["price"].apply(lambda s: float(s[1:])) | |
num_units = data["units"].apply(lambda s: int(s.replace(",","").split(" ")[0])) | |
data["price_per_unit"] = num_price/num_units |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment