Skip to content

Instantly share code, notes, and snippets.

@aced125
aced125 / extract_drug_candidates_from_sec_filings.py
Created September 23, 2023 18:41
extract_drug_candidates_from_sec_filings
# extract_drug_candidates_from_sec_filings
# This gist extracts product candidates from SEC filings, using LLMs. This code was mostly generated by an LLM.
import os
import pandas as pd
import requests
from langchain.chat_models import ChatAnthropic
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
import torch.nn as nn
import torch
from transformers import AutoModel
class Aggregation(nn.Module):
"""
Helper class to perform aggregation (default mean) over the word embeddings
"""
def __init__(self, aggr='mean'):
reg = MLPRegressor(atom_features=2, fp_size=2048, hidden_size=100)
optimizer = torch.optim.SGD(reg.parameters(), lr=0.001, weight_decay=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=100)
total_epochs = 1000
for epoch in range(1, total_epochs+1):
train_loss = train_fn(train_loader, train_labels_loader, reg, opt=optimizer)
valid_loss = valid_fn(valid_loader, valid_labels_loader, reg)
scheduler.step(valid_loss)
def train_step(batch, labels, reg):
out = reg(batch)
loss = F.mse_loss(out, labels.to(torch.float), reduction='mean')
loss.backward()
return loss
def valid_step(batch, labels, reg):
out = reg(batch)
loss = F.mse_loss(out, labels.to(torch.float), reduction='mean')
return loss
import torch.nn.functional as F
class MLP_Regressor(nn.Module):
def __init__(self, atom_features=2, fp_size=2048, hidden_size=100):
super(MLP_Regressor, self).__init__()
self.neural_fp = neural_fp
self.lin1 = nn.Linear(fp_size, hidden_size)
self.lin2 = nn.Linear(hidden_size, 1)
self.dropout = nn.Dropout(p=0.3)
import deepchem as dc
_, (train, valid, test), _ = dc.molnet.load_bace_regression(featurizer='Raw')
bs = 32
train_loader, _ = prepare_dataloader(list(train.X), batch_size=bs)
valid_loader, _ = prepare_dataloader(valid.X, bs)
test_loader, _ = prepare_dataloader(test.X, bs)
train_labels_loader = torch.utils.data.DataLoader(train.y, batch_size=bs)
neural_fp = NeuralFP(atom_features=2, fp_size=2048)
fps = neural_fp(batch) # remember, batch size was 3
print(fps.shape)
# torch.Size([3, 2048])
@aced125
aced125 / models.py
Last active July 3, 2020 18:45
GC models
from torch_geometric.nn import MessagePassing
from torch_scatter import scatter_add
from torch_geometric.utils import add_self_loops, degree
class NeuralLoop(MessagePassing):
def __init__(self, atom_features, fp_size):
super(NeuralLoop, self).__init__(aggr='add')
self.H = nn.Linear(atom_features, atom_features)
self.W = nn.Linear(atom_features, fp_size)
smiles_list = ['Cc1cc(c(C)n1c2ccc(F)cc2)S(=O)(=O)NCC(=O)N',
'CN(CC(=O)N)S(=O)(=O)c1c(C)n(c(C)c1S(=O)(=O)N(C)CC(=O)N)c2ccc(F)cc2',
'Fc1ccc(cc1)n2cc(COC(=O)CBr)nn2',
'CCOC(=O)COCc1cn(nn1)c2ccc(F)cc2',
'COC(=O)COCc1cn(nn1)c2ccc(F)cc2',
'Fc1ccc(cc1)n2cc(COCC(=O)OCc3cn(nn3)c4ccc(F)cc4)nn2']
mol_list = [Chem.MolFromSmiles(smi) for smi in smiles_list]
dloader, dlist = prepare_dataloader()
@aced125
aced125 / data_prep.py
Last active December 20, 2019 14:53
GCBlog_dataprep
def get_atom_features(mol):
atomic_number = []
num_hs = []
for atom in mol.GetAtoms():
atomic_number.append(atom.GetAtomicNum())
num_hs.append(atom.GetTotalNumHs(includeNeighbors=True))
return torch.tensor([atomic_number, num_hs]).t()