Skip to content

Instantly share code, notes, and snippets.

View azkalot1's full-sized avatar

Sergey Kolchenko azkalot1

  • Cellarity
  • Chicago
View GitHub Profile
callbacks = [
# Each criterion is calculated separately.
CriterionCallback(
input_key="mask",
prefix="loss_dice",
criterion_key="dice"
),
CriterionCallback(
input_key="mask",
prefix="loss_bce",
import torch
import segmentation_models_pytorch as smp
import numpy as np
import matplotlib.pyplot as plt
from catalyst import dl, metrics, core, contrib, utils
import torch.nn as nn
from skimage.io import imread
import os
from sklearn.model_selection import train_test_split
from catalyst.dl import CriterionCallback, MetricAggregationCallback
@azkalot1
azkalot1 / augs_dataset.py
Last active December 18, 2020 05:27
Augs_dataset
import albumentations as A
from torch.utils.data import Dataset, DataLoader
from collections import OrderedDict
class ChestXRayDataset(Dataset):
def __init__(
self,
images,
masks,
transforms):
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.optim import Optimizer
from torch.utils import data
import pretrainedmodels
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.optim import Optimizer
from torch.utils import data
class DataGenerator(data.Dataset):
#basically like at https://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_comparison.html, but our data is reall
#prepre paramets
params = {'quantile': .3,
'eps': .3,
'damping': .9,
'preference': -200,
'n_neighbors': 10,
'n_clusters': 5}
bandwidth = estimate_bandwidth(embedding, quantile=params['quantile'])
connectivity = kneighbors_graph(
pca = PCA(n_components=100)
pca.fit(mat)
mat_reduce = pca.transform(mat)
embedding = umap.UMAP(n_neighbors=5,
min_dist=0.5,
metric='euclidean').fit_transform(mat_reduce)
plt.figure(figsize=(15,15))
plt.scatter(embedding[:,0],embedding[:,1],s=0.2);
plt.title('Naive clustering');
cells_expression = mat.sum(axis=1)
mat = mat[cells_expression>=100,:]
mat = np.log(mat+1)
mat = mat[:,CV>=10]
f, ax = plt.subplots(1,2,figsize=(15,5))
per_cell_sum = mat.sum(axis=1)
ax[0].hist(np.log10(per_cell_sum+1));
ax[0].set_title('Distribtion of #UMIs per cell\n min {}, max {}, mean {} +- {}'.format(min(per_cell_sum),
max(per_cell_sum), np.mean(per_cell_sum),
np.sqrt(np.std(per_cell_sum))));
per_gene_sum = mat.sum(axis=0)
ax[1].hist(np.log10(per_gene_sum+1));
ax[1].set_title('Distribtion of #UMIs per gene\n min {}, max {}, mean {} +- {}'.format(min(per_gene_sum),
low_expr_thr = 100
high_expr_thr = 100000
mat = mat[:,(per_gene_sum>=low_expr_thr) & (per_gene_sum<=high_expr_thr)] #just remove extreme outliers
mean_exp = mat.mean(axis=0)
std_exp = np.sqrt(mat.std(axis=0))
CV = std_exp/mean_exp
plt.hist(CV);
plt.title('Distribution of CV, mean {} sd {}'.format(np.mean(CV), np.std(CV)**0.5));