This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# extract_drug_candidates_from_sec_filings | |
# This gist extracts product candidates from SEC filings, using LLMs. This code was mostly generated by an LLM. | |
import os | |
import pandas as pd | |
import requests | |
from langchain.chat_models import ChatAnthropic | |
from langchain.output_parsers import PydanticOutputParser | |
from langchain.prompts import PromptTemplate |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch.nn as nn | |
import torch | |
from transformers import AutoModel | |
class Aggregation(nn.Module): | |
""" | |
Helper class to perform aggregation (default mean) over the word embeddings | |
""" | |
def __init__(self, aggr='mean'): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
reg = MLPRegressor(atom_features=2, fp_size=2048, hidden_size=100) | |
optimizer = torch.optim.SGD(reg.parameters(), lr=0.001, weight_decay=0.001) | |
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=100) | |
total_epochs = 1000 | |
for epoch in range(1, total_epochs+1): | |
train_loss = train_fn(train_loader, train_labels_loader, reg, opt=optimizer) | |
valid_loss = valid_fn(valid_loader, valid_labels_loader, reg) | |
scheduler.step(valid_loss) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def train_step(batch, labels, reg): | |
out = reg(batch) | |
loss = F.mse_loss(out, labels.to(torch.float), reduction='mean') | |
loss.backward() | |
return loss | |
def valid_step(batch, labels, reg): | |
out = reg(batch) | |
loss = F.mse_loss(out, labels.to(torch.float), reduction='mean') | |
return loss |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch.nn.functional as F | |
class MLP_Regressor(nn.Module): | |
def __init__(self, atom_features=2, fp_size=2048, hidden_size=100): | |
super(MLP_Regressor, self).__init__() | |
self.neural_fp = neural_fp | |
self.lin1 = nn.Linear(fp_size, hidden_size) | |
self.lin2 = nn.Linear(hidden_size, 1) | |
self.dropout = nn.Dropout(p=0.3) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import deepchem as dc | |
_, (train, valid, test), _ = dc.molnet.load_bace_regression(featurizer='Raw') | |
bs = 32 | |
train_loader, _ = prepare_dataloader(list(train.X), batch_size=bs) | |
valid_loader, _ = prepare_dataloader(valid.X, bs) | |
test_loader, _ = prepare_dataloader(test.X, bs) | |
train_labels_loader = torch.utils.data.DataLoader(train.y, batch_size=bs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
neural_fp = NeuralFP(atom_features=2, fp_size=2048) | |
fps = neural_fp(batch) # remember, batch size was 3 | |
print(fps.shape) | |
# torch.Size([3, 2048]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from torch_geometric.nn import MessagePassing | |
from torch_scatter import scatter_add | |
from torch_geometric.utils import add_self_loops, degree | |
class NeuralLoop(MessagePassing): | |
def __init__(self, atom_features, fp_size): | |
super(NeuralLoop, self).__init__(aggr='add') | |
self.H = nn.Linear(atom_features, atom_features) | |
self.W = nn.Linear(atom_features, fp_size) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
smiles_list = ['Cc1cc(c(C)n1c2ccc(F)cc2)S(=O)(=O)NCC(=O)N', | |
'CN(CC(=O)N)S(=O)(=O)c1c(C)n(c(C)c1S(=O)(=O)N(C)CC(=O)N)c2ccc(F)cc2', | |
'Fc1ccc(cc1)n2cc(COC(=O)CBr)nn2', | |
'CCOC(=O)COCc1cn(nn1)c2ccc(F)cc2', | |
'COC(=O)COCc1cn(nn1)c2ccc(F)cc2', | |
'Fc1ccc(cc1)n2cc(COCC(=O)OCc3cn(nn3)c4ccc(F)cc4)nn2'] | |
mol_list = [Chem.MolFromSmiles(smi) for smi in smiles_list] | |
dloader, dlist = prepare_dataloader() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_atom_features(mol): | |
atomic_number = [] | |
num_hs = [] | |
for atom in mol.GetAtoms(): | |
atomic_number.append(atom.GetAtomicNum()) | |
num_hs.append(atom.GetTotalNumHs(includeNeighbors=True)) | |
return torch.tensor([atomic_number, num_hs]).t() |
NewerOlder