Skip to content

Instantly share code, notes, and snippets.

View marcosfelt's full-sized avatar

Kobi Felton marcosfelt

View GitHub Profile
@marcosfelt
marcosfelt / wikipedia_chemcial_properties.csv
Created April 9, 2024 14:11
Chemical properties scraped from Wikipedia
url smiles melting_point_K name density_g_cm3 dipole_moment_Debye boiling_point_K cas_number
https://en.wikipedia.org/wiki/Actinium(III)_oxide [O--].[O--].[O--].[Ac+3].[Ac+3] 327 Actinium(III) oxide
https://en.wikipedia.org/wiki/Silver_tetrafluoroborate [Ag+].F[B-](F)(F)F 71.5 Silver tetrafluoroborate 4.16
https://en.wikipedia.org/wiki/Silver_bromide [Ag]Br 430 Silver bromide 6.47 5.62 502
https://en.wikipedia.org/wiki/Silver_bromate [Ag+].[O-]Br(=O)=O 309 Silver bromate 5.206
https://en.wikipedia.org/wiki/Silver_chloride Cl[Ag] 455 Silver chloride 547
https://en.wikipedia.org/wiki/Silver_chlorate [Ag+].[O-]Cl(=O)=O 230 Silver chlorate 4.443 250
https://en.wikipedia.org/wiki/Silver_perchlorate [Ag+].[O-]Cl(=O)(=O)=O 486 Silver perchlorate 2.806
https://en.wikipedia.org/wiki/Silver_cyanide [C-]#N.[Ag+] 335 Silver cyanide 3.943
https://en.wikipedia.org/wiki/Silver_fulminate [C-]#[N+][O-].[Ag+] Silver fulminate 3.938
from typing import Dict, List, Literal
from scipy.stats import linregress, spearmanr
from sklearn import metrics
SCORE_NAMES = ["mae", "mse", "rmse", "mape", "r2", "maxe", "expl_var"]
def calculate_metrics(
y_true, y_pred, scores: List[str] = SCORE_NAMES
) -> Dict[str, float]:
"""Calculate metrics on a given dataset."""
@marcosfelt
marcosfelt / wiki.py
Created March 16, 2023 20:53
Wikipedia Chemical Data Scraper
import logging
import re
from typing import Tuple, Union
import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
wikipedia_base = "https://en.wikipedia.org"
@marcosfelt
marcosfelt / pareto_front.py
Last active March 10, 2023 18:27
Flow chemistry data visualization based on data from Schweidtmann et al.
import numpy as np
def pareto_efficient(data, maximize=True):
"""
Copied from Summit, which in turn was probably copied from Stackoverflow
Find the pareto-efficient points
Parameters
---------
data: array-like
An (n_points, n_data) array
from typing import Dict, List, Optional
import matplotlib as mpl
from matplotlib.axes import Axes
import matplotlib.pyplot as plt
import numpy as np
def parity_plot(
y,
yhat,
ax: Optional[Axes] = None,
@marcosfelt
marcosfelt / parallel_plot.py
Last active March 10, 2023 16:44
Matplotlib examples
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm, ticker
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from scipy.interpolate import make_interp_spline
from typing import Callable, Dict, List, Optional, Union
@marcosfelt
marcosfelt / groups.py
Created February 22, 2023 12:39
Group identification (from thermo)
from rdkit import Chem
"""The code below is directly copied from the thermo group_contribution code (MIT license)
https://github.com/CalebBell/thermo/blob/516dee4ceda8e100918c7645e393a42fdfdc4bef/thermo/group_contribution/
I made changes to the SMARTS strings to match the ones used in FeOs
"""
J_BIGGS_JOBACK_SMARTS = [
["Methyl", "CH3", "[CX4H3]"],
["Secondary acyclic", "CH2", "[!R;CX4H2]"],
@marcosfelt
marcosfelt / batch_stats.py
Created January 17, 2023 12:31
Calculate mean and standard deviation using batch updates
import numpy as np
x = np.arange(100)
N = 0
mean = 0
std = 0
for i in range(11):
batch = x[10*i:10*(i+1)]
k = len(batch)
N += k
old_mean = mean
// Copyright (C) 2003 Rational Discovery LLC
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
// NOTES:
// - larger functional groups should come before smaller ones involving
// the same core atom
// - the first atom in the SMARTS query is not part of the group itself
// - it is essential that the fields in this file be separated by tabs
@marcosfelt
marcosfelt / fit_nrtl_ray.py
Created May 16, 2022 09:55
Fitting the NRTL model to data using Ray
import pandas as pd
import numpy as np
from lmfit import minimize, Parameters
import ray
from ray.exceptions import GetTimeoutError
from tqdm.auto import tqdm, trange
import typer