Skip to content

Instantly share code, notes, and snippets.

View Sulstice's full-sized avatar
🎯
Focusing

Sul Sulstice

🎯
Focusing
View GitHub Profile
{
"metadata": {
"title": "Cannabis Sativa Volatile Sulfur Compounds Analysis",
"date": "2025-01-21",
"source": "Hellma Protein Target Analysis",
"total_compounds": 7,
"total_pdb_targets": 195
},
"column_definitions": {
"chemical_name": "IUPAC chemical name",
# Imports
# -------
import time
import requests
import pandas as pd
def cas_to_smiles(cas_number):
url = f"https://cactus.nci.nih.gov/chemical/structure/{cas_number}/smiles"
from transformers import RobertaTokenizer, RobertaModel
benzene = 'C1=CC=CC=C1'
chembert = RobertaModel.from_pretrained("seyonec/ChemBERTa-zinc-base-v1")
chembert_tokenizer = RobertaTokenizer.from_pretrained("seyonec/ChemBERTa-zinc-base-v1")
inputs = chembert_tokenizer(
benzene, return_tensors="pt", padding=True, truncation=True
)
from global_chem import GlobalChem
from STOUT import translate_forward, translate_reverse
if __name__ == '__main__':
gc = GlobalChem()
gc.build_global_chem_network(print_output=False, debugger=False)
names = gc.get_all_names()
smiles = gc.get_all_smiles()
@Sulstice
Sulstice / mol2_to_sdf.py
Created March 10, 2023 10:34
save a copy
def mol2_to_sdf(mol2_file, sdf_file=None):
"""
Method to convert a multi-mol2 file to sdf format with an extra property that stores the partial charges.
It invokes PyBel's mol2 file loader because RDKit's mol2 file loader that not read the partial charges.
"""
if sdf_file == None:
sdf_file = os.path.splitext(mol2_file)[0] + ".sdf"
largeSDfile = Outputfile("sdf", sdf_file, overwrite=True)
for mymol in readfile("mol2", mol2_file):
import cirpy
from global_chem import GlobalChem
gc = GlobalChem()
gc.build_global_chem_network()
smiles_list = gc.get_node_smiles('emerging_perfluoroalkyls')
import time
from global_chem import GlobalChem
gc = GlobalChem()
gc.build_global_chem_network()
smiles_list = list(gc.get_node_smiles('constituents_of_cannabis_sativa').values())
print (f"First SMILES: {smiles_list[0]}")
from rdkit import Chem
from rdkit import Chem
import rdkit.Chem.Descriptors as Descriptors
if __name__ == '__main__':
molecule = 'O'
rdkit_molecule = Chem.MolFromSmiles(molecule)
molecular_weight = Descriptors.ExactMolWt(rdkit_molecule)
logp = Descriptors.MolLogP(rdkit_molecule)
A chemical dataset spread with filters.
<?xml version='1.0' encoding='UTF-8'?>
<R_replacements><center SMILES="*O" degree="13627"><first_layer SMILES="*OC" edge_weight="2273"><second_layer SMILES="*Cl" edge_weight="3455" /><second_layer SMILES="*OCC" edge_weight="788" /></first_layer><first_layer SMILES="*N" edge_weight="958"><second_layer SMILES="*NC" edge_weight="305" /><second_layer SMILES="*NC(C)=O" edge_weight="300" /></first_layer><first_layer SMILES="*CO" edge_weight="301"><second_layer SMILES="*C(C)O" edge_weight="75" /><second_layer SMILES="*C(C)(C)O" edge_weight="53" /></first_layer><first_layer SMILES="*OC(C)=O" edge_weight="200" /><first_layer SMILES="*C(=O)NO" edge_weight="89"><second_layer SMILES="*CC(=O)NO" edge_weight="43" /></first_layer></center><center SMILES="*OC" degree="12408"><first_layer SMILES="*Cl" edge_weight="3455"><second_layer SMILES="*F" edge_weight="4152" /><second_layer SMILES="*Br" edge_weight="2208" /></first_layer><first_layer SMILES="*O" edge_weight="2273"><second_layer SMILES="*N" edge_weight="958" /><second_la