Skip to content

Instantly share code, notes, and snippets.

@PlethoraChutney
Created March 28, 2023 18:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PlethoraChutney/fa6166e8da4448f01ed40c8cfb74797e to your computer and use it in GitHub Desktop.
Save PlethoraChutney/fa6166e8da4448f01ed40c8cfb74797e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import numpy as np
import pandas as pd
import umap
import sys
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from cryosparc.dataset import Dataset
sns.set(style = 'white')
# load the dataset and just select components and UID
particles = Dataset.load(sys.argv[1])
components = [x for x in particles.fields() if 'component' in x and 'value' in x]
components.insert(0, 'uid')
df = pd.DataFrame({x: particles[x] for x in components})
if sys.argv[2] == 'subset':
df = df.sample(10000)
just_components = df[components[1:]].values
scaled_components = StandardScaler().fit_transform(just_components)
reducer = umap.UMAP()
embedding = reducer.fit_transform(scaled_components)
plt.scatter(
embedding[:, 0],
embedding[:, 1])
plt.gca().set_aspect('equal', 'datalim')
plt.title('UMAP projection of 3DVA dataset', fontsize=24)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment