Skip to content

Instantly share code, notes, and snippets.

@wjiang
Created April 9, 2019 15:07
Show Gist options
  • Save wjiang/4cb9f9fb72783e91c0ebc4d34bdd0da3 to your computer and use it in GitHub Desktop.
Save wjiang/4cb9f9fb72783e91c0ebc4d34bdd0da3 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# by Wen Jiang @ Purdue University
import sys, argparse
import pandas as pd
import numpy as np
def main():
args = parse_command_line()
data_relion = star2dataframe(args.starFile)
print("Read in %d particles from %s" % (len(data_relion), args.starFile))
data_jspr = dataframe_relion2jspr(data_relion)
dataframe2lst(data_jspr, args.lstFile)
print("%d images saved to %s" % (len(data_jspr), args.lstFile))
def star2dataframe(starFile):
headers = []
foundheader = False
ln = 0
with open(starFile, 'rU') as f:
for l in f:
if l.startswith("_rln"):
foundheader = True
lastheader = True
head = l.split('#')[0].rstrip().lstrip('_')
headers.append(head)
else:
lastheader = False
if foundheader and not lastheader:
break
ln += 1
data = pd.read_csv(starFile, skiprows=ln, delimiter='\s+', header=None)
nans = data.iloc[:len(headers)+2, :].isnull().any(axis=1)
if nans.sum() > 0:
data = data.iloc[len(headers)+2:, :].reset_index(drop=True)
if len(data.columns) != len(headers):
print("ERROR: star file %s is invalid: %d columns were found but %d were defined in the header" % (starFile, len(data.columns), len(headers)))
sys.exit(-1)
data.columns = headers
nans = data.isnull().any(axis=1)
if nans.sum()>0:
print("WARNING: %s: %d/%d particle rows are corrupted and thus ignored" % (starFile, nans.sum(), len(data)))
print(" Corrupted particle indices: %s" % (nans.to_numpy().nonzero()[0]))
data = data[nans==False]
return data
def dataframe_relion2jspr(data):
ret = pd.DataFrame()
if "rlnImageName" in data:
tmp = data["rlnImageName"].str.split("@", expand=True)
indices, filenames = tmp.iloc[:,0], tmp.iloc[:, -1]
indices = indices.astype(int)-1
ret["pid"] = indices
ret["filename"] = filenames
if "rlnMicrographName" in data:
if "@" in data["rlnMicrographName"].iloc[0]: # movie frames
tmp = data["rlnMicrographName"].str.split("@", expand=True)
ret["frame"] = tmp.iloc[:, 0].astype(int)-1
ret["micrograph"] = tmp.iloc[:, 1]
else:
ret["micrograph"] = data["rlnMicrographName"]
if "rlnMicrographName" in data and "rlnImageName" not in data:
ret["filename"] = ret["micrograph"]
ret["pid"] = 0
elif "rlnMicrographName" not in data and "rlnImageName" not in data:
msg = "ERROR: rlnMicrographName or rlnMicrographName must be in the input star file. Available parameters are: %s" % (' '.join(data.columns))
raise AttributeError(msg)
if "rlnCoordinateX" in data and "rlnCoordinateY" in data:
ret["location"] = data["rlnCoordinateX"].astype(float).astype(str) + "," + data["rlnCoordinateY"].astype(float).astype(str)
if "rlnVoltage" in data:
ret['voltage'] = data["rlnVoltage"].astype(float)
if "rlnSphericalAberration" in data:
ret['cs'] = data["rlnSphericalAberration"].astype(float)
if "rlnAmplitudeContrast" in data:
ret['ampcont'] = data["rlnAmplitudeContrast"].astype(float) * 100.
if "rlnPhaseShift" in data:
ret["vps"] = data['rlnPhaseShift']
if "rlnDefocusU" in data and "rlnDefocusV" in data and "rlnDefocusAngle" in data:
# see Figure 1 and Eq 5 of https://doi.org/10.1016/j.jsb.2015.08.008
rlnDefocusU = data["rlnDefocusU"].astype(float)
rlnDefocusV = data["rlnDefocusV"].astype(float)
rlnDefocusAngle = data["rlnDefocusAngle"].astype(float)
# check ctffind3.f function CTF(CS,WL,WGH1,WGH2,DFMID1,DFMID2,ANGAST,THETATR, IX, IY)
ret["defocus"] = (rlnDefocusU + rlnDefocusV) / 2. / 1e4
ret["dfdiff"] = (rlnDefocusU - rlnDefocusV).abs() / 2. / 1e4 # rlnDefocusU can be larger or smaller than rlnDefocusV
ret["dfang"] = ret["dfdiff"] * 0.0
subset = rlnDefocusU > rlnDefocusV
ret.loc[subset, "dfang"] = np.fmod(rlnDefocusAngle[subset] + 360. + 90., 360.) # largest defocus direction to smallest defocus direction
ret.loc[~subset, "dfang"] = rlnDefocusAngle[~subset] # already at smallest defocus direction
if "rlnBeamTiltX" in data and "rlnBeamTiltY" in data:
ret["btamp"] = np.hypot(data["rlnBeamTiltX"], data["rlnBeamTiltY"])
ret["btang"] = np.fmod(np.rad2deg(np.arctan2(-data["rlnBeamTiltY"], -data["rlnBeamTiltX"]))+360, 360)
if "rlnDetectorPixelSize" in data and "rlnMagnification" in data:
ret["apix"] = data["rlnDetectorPixelSize"].astype(float) * 1e4 / data["rlnMagnification"]
if "rlnClassNumber" in data:
ret["class"] = data["rlnClassNumber"].astype(int) - 1
if "rlnRandomSubset" in data:
ret["set"] = data["rlnRandomSubset"].astype(int) - 1
if "rlnAngleRot" in data or "rlnAngleTilt" in data or "rlnAnglePsi" in data or "rlnAngleRotPrior" in data or "rlnAnglePsiPrior" in data or "rlnAngleTiltPrior" in data:
if "rlnAngleRot" in data and "rlnAngleTilt" in data and "rlnAnglePsi" in data:
rlnAngleRot = "rlnAngleRot"
rlnAngleTilt = "rlnAngleTilt"
rlnAnglePsi = "rlnAnglePsi"
elif "rlnAngleRotPrior" in data and "rlnAnglePsiPrior" in data and "rlnAngleTiltPrior" in data:
rlnAngleRot = "rlnAngleRotPrior"
rlnAngleTilt = "rlnAngleTiltPrior"
rlnAnglePsi = "rlnAnglePsiPrior"
else: # mixed match of the two sets of euler angles
if "rlnAngleRot" in data:
rlnAngleRot = "rlnAngleRot"
elif "rlnAngleRotPrior" in data:
rlnAngleRot = "rlnAngleRotPrior"
else:
rlnAngleRot = "rlnAngleRot"
data[rlnAngleRot] = 0.0
if "rlnAngleTilt" in data:
rlnAngleTilt = "rlnAngleTilt"
elif "rlnAngleTiltPrior" in data:
rlnAngleTilt = "rlnAngleTiltPrior"
else:
rlnAngleTilt = "rlnAngleTilt"
data[rlnAngleTilt] = 0.0
if "rlnAnglePsi" in data:
rlnAnglePsi = "rlnAnglePsi"
elif "rlnAnglePsiPrior" in data:
rlnAnglePsi = "rlnAnglePsiPrior"
else:
rlnAnglePsi = "rlnAnglePsi"
data[rlnAnglePsi] = 0.0
az = data[rlnAngleRot].astype(float) + 90.0
alt = data[rlnAngleTilt].astype(float)
phi = data[rlnAnglePsi].astype(float) - 90.0
alt0_indices = alt==0
phi[alt0_indices] += az[alt0_indices]
az[alt0_indices]=0
az[az>360] = az[az>360]-360
phi[phi<0] = phi[phi<0] + 360
ret['euler'] = alt.round(3).astype(str) + "," + az.round(3).astype(str) + "," + phi.round(3).astype(str)
if "rlnOriginX" in data and "rlnOriginY" in data:
import os
nx = ny = 0
imageFileName = str(ret.iloc[0]["filename"])
if os.path.exists(imageFileName):
try:
import mrcfile
except:
print("ERROR: failed to load mrcfile library. Please install it and rerun this command")
sys.exit(-1)
try:
with mrcfile.open(imageFileName) as mrc:
ny, nx = mrc.data.shape[1:3]
except:
print("WARNING: failed to obtain image size (nx, ny) info from %s. You should add nx/2,ny/2 to the \"center\" parameter in the output lst file" % (imageFileName))
else:
print("WARNING: cannot find image file %s to obtain image size info. Make sure that you run this script in the top level folder of your Relion project. Otherwise, you should add nx/2,ny/2 to the \"center\" parameter in the output lst file" % (imageFileName))
ret["center"] = (nx//2-data["rlnOriginX"]).astype(str) + "," + (ny//2-data["rlnOriginY"]).astype(str)
if "rlnHelicalTubeID" in data:
ret["helicaltube"] = data["rlnHelicalTubeID"].astype(int)-1
if "rlnLogLikeliContribution" in data:
if data["rlnLogLikeliContribution"].max():
ret["score"] = -1*data["rlnLogLikeliContribution"]/data["rlnLogLikeliContribution"].max()
else:
ret["score"] = data["rlnLogLikeliContribution"]
return ret
def dataframe2lst(data, lstFile):
keys = list(data)
keys.remove("pid")
keys.remove("filename")
lines = data['pid'].astype(str) + '\t' + data['filename']
for k in keys:
mask = data[k].notnull()
if data[k].dtype == np.float64:
lines[mask] += '\t' + k + '=' + data[k][mask].round(6).astype(str)
else:
lines[mask] += '\t' + k + '=' + data[k][mask].astype(str)
lines = lines.str.strip()
maxlen = max(lines.str.len())
lines = lines.str.pad(maxlen, side='right')
with open(lstFile, "w") as lstfp:
lstfp.write("#LSX\n#If you edit this file, you MUST rerun lstfast.py on it before using it!\n# %d\n" % (maxlen+1))
lstfp.write('\n'.join(lines))
lstfp.write('\n')
def parse_command_line():
description = "convert a Relion star file into a JSPR lst file"
epilog = "Author: Wen Jiang (jiang12 at purdue.edu)\n"
epilog += "Copyright (c) 2019 Purdue University\n"
parser = argparse.ArgumentParser(description=description, epilog=epilog)
parser.add_argument("starFile", help="input star file")
parser.add_argument("lstFile", help="output lst file")
args = parser.parse_args()
return args
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment