Skip to content

Instantly share code, notes, and snippets.

@wjiang
Last active November 28, 2023 07:28
Show Gist options
  • Save wjiang/acdb7f2f0ad4c20643988ca467cd30b3 to your computer and use it in GitHub Desktop.
Save wjiang/acdb7f2f0ad4c20643988ca467cd30b3 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# by Wen Jiang @ Purdue University
import sys, argparse
try:
import pandas as pd
import numpy as np
import scipy
except:
import os, site
os.system('python -m pip install --user pandas scipy numpy')
reload(site)
import pandas as pd
import numpy as np
import scipy
def main():
args = parse_command_line()
data_cryosparc = cs2dataframe(args.csFile, passthrough=args.passthrough)
print("Read in %d particles from %s" % (len(data_cryosparc), args.csFile))
data_relion = dataframe_cryosparc2ToRelion(data_cryosparc)
dataframe2star(data_relion, args.starFile)
print("%d images saved to %s" % (len(data_relion), args.starFile))
def cs2dataframe(csFile, passthrough=None):
# read CryoSPARC v2.x meta data
cs = np.load(csFile)
data = pd.DataFrame.from_records(cs.tolist(), columns=cs.dtype.names)
if passthrough:
cs = np.load(passthrough)
passthrough_data = pd.DataFrame.from_records(cs.tolist(), columns=cs.dtype.names)
data = pd.concat([data, passthrough_data], axis=1)
return data
def dataframe_cryosparc2ToRelion(data):
ret = pd.DataFrame()
if "blob/idx" in data and "blob/path" in data:
ret["rlnImageName"] = (data['blob/idx'].astype(int)+1).map('{:06d}'.format) + "@" + data['blob/path']
if "location/micrograph_path" in data:
ret["rlnMicrographName"] = data["location/micrograph_path"]
if "movie_blob/path" in data:
ret["rlnMovieName"] = data["movie_blob/path"]
if "ctf/accel_kv" in data:
ret["rlnVoltage"] = data['ctf/accel_kv']
if "ctf/cs_mm" in data:
ret["rlnSphericalAberration"] = data['ctf/cs_mm']
if "ctf/amp_contrast" in data:
ret["rlnAmplitudeContrast"] = data['ctf/amp_contrast']
if "ctf/df1_A" in data and "ctf/df2_A" in data and "ctf/df_angle_rad" in data:
ret["rlnDefocusU"] = data["ctf/df1_A"]
ret["rlnDefocusV"] = data["ctf/df2_A"]
ret["rlnDefocusAngle"] = np.rad2deg(data["ctf/df_angle_rad"])
if "ctf/phase_shift_rad" in data:
ret["rlnPhaseShift"] = np.rad2deg(data['ctf/phase_shift_rad'])
if "blob/psize_A" in data:
ret["rlnDetectorPixelSize"] = 5.0
ret["rlnMagnification"] = (ret["rlnDetectorPixelSize"]*1e4/data["blob/psize_A"]).round(1)
if "alignments3D/split" in data:
ret["rlnRandomSubset"] = data["alignments3D/split"] + 1
# 2D class assignments
if "alignments2D/class" in data:
ret["rlnClassNumber"] = data["alignments2D/class"].astype(int) + 1
if "alignments2D/shift" in data:
shifts = pd.DataFrame(data["alignments2D/shift"].tolist()).round(2)
ret["rlnOriginX"] = shifts.iloc[:, 0]
ret["rlnOriginY"] = shifts.iloc[:, 1]
if "alignments2D/pose" in data:
ret["rlnAnglePsi"] = np.rad2deg(data["alignments2D/pose"]).round(2)
# 3D class assignments
if "alignments3D/class" in data:
ret["rlnClassNumber"] = data["alignments3D/class"].astype(int) + 1
if "alignments3D/cross_cor" in data:
ret["rlnLogLikeliContribution"] = data["alignments3D/cross_cor"] # not a good pair relationship. other better choice?
if "alignments3D/pose" in data:
# cryosparc rotation r vector is in the rotvec format
from scipy.spatial.transform import Rotation as R
rotvecs = list(data["alignments3D/pose"].values)
r = R.from_rotvec(rotvecs)
e = r.as_euler('zyz', degrees=True)
ret["rlnAngleRot"] = e[:, 2]
ret["rlnAngleTilt"] = e[:, 1]
ret["rlnAnglePsi"] = e[:, 0]
if "alignments3D/shift" in data:
shifts = pd.DataFrame(data["alignments3D/shift"].tolist()).round(2)
ret["rlnOriginX"] = shifts.iloc[:, 0]
ret["rlnOriginY"] = shifts.iloc[:, 1]
if "location/center_x_frac" in data and "location/center_y_frac" in data and "location/micrograph_shape" in data:
locations = pd.DataFrame(data["location/micrograph_shape"].tolist())
my = locations.iloc[:, 0]
mx = locations.iloc[:, 1]
ret["rlnCoordinateX"] = (data["location/center_x_frac"] * mx).astype(float).round(1)
ret["rlnCoordinateY"] = (data["location/center_y_frac"] * my).astype(float).round(1)
return ret
def dataframe2star(data, starFile):
fp = open(starFile, "wt")
fp.write("\ndata_\n\nloop_ \n")
keys = [k for k in data.columns if k.startswith("rln")]
for ki, k in enumerate(keys):
fp.write("_%s #%d \n" % (k, ki + 1))
lines = data[keys[0]].astype(str)
for k in keys[1:]:
if data[k].dtype == np.float64:
lines += '\t' + data[k].round(6).astype(str)
else:
lines += '\t' + data[k].astype(str)
fp.write('\n'.join(lines))
fp.write('\n')
def parse_command_line():
description = "convert a cryoSPARC cs file to a Relion star file"
epilog = "Author: Wen Jiang (jiang12 at purdue.edu)\n"
epilog += "Copyright (c) 2019 Purdue University\n"
parser = argparse.ArgumentParser(description=description, epilog=epilog)
parser.add_argument("csFile", help="input cs file")
parser.add_argument("starFile", help="output star file")
parser.add_argument("--passthrough", metavar="<filename>", type=str, help="input cryoSPARC passthrough file", default=None)
args = parser.parse_args()
return args
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment