Last active
November 28, 2023 07:28
-
-
Save wjiang/acdb7f2f0ad4c20643988ca467cd30b3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# by Wen Jiang @ Purdue University | |
import sys, argparse | |
try: | |
import pandas as pd | |
import numpy as np | |
import scipy | |
except: | |
import os, site | |
os.system('python -m pip install --user pandas scipy numpy') | |
reload(site) | |
import pandas as pd | |
import numpy as np | |
import scipy | |
def main(): | |
args = parse_command_line() | |
data_cryosparc = cs2dataframe(args.csFile, passthrough=args.passthrough) | |
print("Read in %d particles from %s" % (len(data_cryosparc), args.csFile)) | |
data_relion = dataframe_cryosparc2ToRelion(data_cryosparc) | |
dataframe2star(data_relion, args.starFile) | |
print("%d images saved to %s" % (len(data_relion), args.starFile)) | |
def cs2dataframe(csFile, passthrough=None): | |
# read CryoSPARC v2.x meta data | |
cs = np.load(csFile) | |
data = pd.DataFrame.from_records(cs.tolist(), columns=cs.dtype.names) | |
if passthrough: | |
cs = np.load(passthrough) | |
passthrough_data = pd.DataFrame.from_records(cs.tolist(), columns=cs.dtype.names) | |
data = pd.concat([data, passthrough_data], axis=1) | |
return data | |
def dataframe_cryosparc2ToRelion(data): | |
ret = pd.DataFrame() | |
if "blob/idx" in data and "blob/path" in data: | |
ret["rlnImageName"] = (data['blob/idx'].astype(int)+1).map('{:06d}'.format) + "@" + data['blob/path'] | |
if "location/micrograph_path" in data: | |
ret["rlnMicrographName"] = data["location/micrograph_path"] | |
if "movie_blob/path" in data: | |
ret["rlnMovieName"] = data["movie_blob/path"] | |
if "ctf/accel_kv" in data: | |
ret["rlnVoltage"] = data['ctf/accel_kv'] | |
if "ctf/cs_mm" in data: | |
ret["rlnSphericalAberration"] = data['ctf/cs_mm'] | |
if "ctf/amp_contrast" in data: | |
ret["rlnAmplitudeContrast"] = data['ctf/amp_contrast'] | |
if "ctf/df1_A" in data and "ctf/df2_A" in data and "ctf/df_angle_rad" in data: | |
ret["rlnDefocusU"] = data["ctf/df1_A"] | |
ret["rlnDefocusV"] = data["ctf/df2_A"] | |
ret["rlnDefocusAngle"] = np.rad2deg(data["ctf/df_angle_rad"]) | |
if "ctf/phase_shift_rad" in data: | |
ret["rlnPhaseShift"] = np.rad2deg(data['ctf/phase_shift_rad']) | |
if "blob/psize_A" in data: | |
ret["rlnDetectorPixelSize"] = 5.0 | |
ret["rlnMagnification"] = (ret["rlnDetectorPixelSize"]*1e4/data["blob/psize_A"]).round(1) | |
if "alignments3D/split" in data: | |
ret["rlnRandomSubset"] = data["alignments3D/split"] + 1 | |
# 2D class assignments | |
if "alignments2D/class" in data: | |
ret["rlnClassNumber"] = data["alignments2D/class"].astype(int) + 1 | |
if "alignments2D/shift" in data: | |
shifts = pd.DataFrame(data["alignments2D/shift"].tolist()).round(2) | |
ret["rlnOriginX"] = shifts.iloc[:, 0] | |
ret["rlnOriginY"] = shifts.iloc[:, 1] | |
if "alignments2D/pose" in data: | |
ret["rlnAnglePsi"] = np.rad2deg(data["alignments2D/pose"]).round(2) | |
# 3D class assignments | |
if "alignments3D/class" in data: | |
ret["rlnClassNumber"] = data["alignments3D/class"].astype(int) + 1 | |
if "alignments3D/cross_cor" in data: | |
ret["rlnLogLikeliContribution"] = data["alignments3D/cross_cor"] # not a good pair relationship. other better choice? | |
if "alignments3D/pose" in data: | |
# cryosparc rotation r vector is in the rotvec format | |
from scipy.spatial.transform import Rotation as R | |
rotvecs = list(data["alignments3D/pose"].values) | |
r = R.from_rotvec(rotvecs) | |
e = r.as_euler('zyz', degrees=True) | |
ret["rlnAngleRot"] = e[:, 2] | |
ret["rlnAngleTilt"] = e[:, 1] | |
ret["rlnAnglePsi"] = e[:, 0] | |
if "alignments3D/shift" in data: | |
shifts = pd.DataFrame(data["alignments3D/shift"].tolist()).round(2) | |
ret["rlnOriginX"] = shifts.iloc[:, 0] | |
ret["rlnOriginY"] = shifts.iloc[:, 1] | |
if "location/center_x_frac" in data and "location/center_y_frac" in data and "location/micrograph_shape" in data: | |
locations = pd.DataFrame(data["location/micrograph_shape"].tolist()) | |
my = locations.iloc[:, 0] | |
mx = locations.iloc[:, 1] | |
ret["rlnCoordinateX"] = (data["location/center_x_frac"] * mx).astype(float).round(1) | |
ret["rlnCoordinateY"] = (data["location/center_y_frac"] * my).astype(float).round(1) | |
return ret | |
def dataframe2star(data, starFile): | |
fp = open(starFile, "wt") | |
fp.write("\ndata_\n\nloop_ \n") | |
keys = [k for k in data.columns if k.startswith("rln")] | |
for ki, k in enumerate(keys): | |
fp.write("_%s #%d \n" % (k, ki + 1)) | |
lines = data[keys[0]].astype(str) | |
for k in keys[1:]: | |
if data[k].dtype == np.float64: | |
lines += '\t' + data[k].round(6).astype(str) | |
else: | |
lines += '\t' + data[k].astype(str) | |
fp.write('\n'.join(lines)) | |
fp.write('\n') | |
def parse_command_line(): | |
description = "convert a cryoSPARC cs file to a Relion star file" | |
epilog = "Author: Wen Jiang (jiang12 at purdue.edu)\n" | |
epilog += "Copyright (c) 2019 Purdue University\n" | |
parser = argparse.ArgumentParser(description=description, epilog=epilog) | |
parser.add_argument("csFile", help="input cs file") | |
parser.add_argument("starFile", help="output star file") | |
parser.add_argument("--passthrough", metavar="<filename>", type=str, help="input cryoSPARC passthrough file", default=None) | |
args = parser.parse_args() | |
return args | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment