Created
April 9, 2019 15:07
-
-
Save wjiang/4cb9f9fb72783e91c0ebc4d34bdd0da3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# by Wen Jiang @ Purdue University | |
import sys, argparse | |
import pandas as pd | |
import numpy as np | |
def main(): | |
args = parse_command_line() | |
data_relion = star2dataframe(args.starFile) | |
print("Read in %d particles from %s" % (len(data_relion), args.starFile)) | |
data_jspr = dataframe_relion2jspr(data_relion) | |
dataframe2lst(data_jspr, args.lstFile) | |
print("%d images saved to %s" % (len(data_jspr), args.lstFile)) | |
def star2dataframe(starFile): | |
headers = [] | |
foundheader = False | |
ln = 0 | |
with open(starFile, 'rU') as f: | |
for l in f: | |
if l.startswith("_rln"): | |
foundheader = True | |
lastheader = True | |
head = l.split('#')[0].rstrip().lstrip('_') | |
headers.append(head) | |
else: | |
lastheader = False | |
if foundheader and not lastheader: | |
break | |
ln += 1 | |
data = pd.read_csv(starFile, skiprows=ln, delimiter='\s+', header=None) | |
nans = data.iloc[:len(headers)+2, :].isnull().any(axis=1) | |
if nans.sum() > 0: | |
data = data.iloc[len(headers)+2:, :].reset_index(drop=True) | |
if len(data.columns) != len(headers): | |
print("ERROR: star file %s is invalid: %d columns were found but %d were defined in the header" % (starFile, len(data.columns), len(headers))) | |
sys.exit(-1) | |
data.columns = headers | |
nans = data.isnull().any(axis=1) | |
if nans.sum()>0: | |
print("WARNING: %s: %d/%d particle rows are corrupted and thus ignored" % (starFile, nans.sum(), len(data))) | |
print(" Corrupted particle indices: %s" % (nans.to_numpy().nonzero()[0])) | |
data = data[nans==False] | |
return data | |
def dataframe_relion2jspr(data): | |
ret = pd.DataFrame() | |
if "rlnImageName" in data: | |
tmp = data["rlnImageName"].str.split("@", expand=True) | |
indices, filenames = tmp.iloc[:,0], tmp.iloc[:, -1] | |
indices = indices.astype(int)-1 | |
ret["pid"] = indices | |
ret["filename"] = filenames | |
if "rlnMicrographName" in data: | |
if "@" in data["rlnMicrographName"].iloc[0]: # movie frames | |
tmp = data["rlnMicrographName"].str.split("@", expand=True) | |
ret["frame"] = tmp.iloc[:, 0].astype(int)-1 | |
ret["micrograph"] = tmp.iloc[:, 1] | |
else: | |
ret["micrograph"] = data["rlnMicrographName"] | |
if "rlnMicrographName" in data and "rlnImageName" not in data: | |
ret["filename"] = ret["micrograph"] | |
ret["pid"] = 0 | |
elif "rlnMicrographName" not in data and "rlnImageName" not in data: | |
msg = "ERROR: rlnMicrographName or rlnMicrographName must be in the input star file. Available parameters are: %s" % (' '.join(data.columns)) | |
raise AttributeError(msg) | |
if "rlnCoordinateX" in data and "rlnCoordinateY" in data: | |
ret["location"] = data["rlnCoordinateX"].astype(float).astype(str) + "," + data["rlnCoordinateY"].astype(float).astype(str) | |
if "rlnVoltage" in data: | |
ret['voltage'] = data["rlnVoltage"].astype(float) | |
if "rlnSphericalAberration" in data: | |
ret['cs'] = data["rlnSphericalAberration"].astype(float) | |
if "rlnAmplitudeContrast" in data: | |
ret['ampcont'] = data["rlnAmplitudeContrast"].astype(float) * 100. | |
if "rlnPhaseShift" in data: | |
ret["vps"] = data['rlnPhaseShift'] | |
if "rlnDefocusU" in data and "rlnDefocusV" in data and "rlnDefocusAngle" in data: | |
# see Figure 1 and Eq 5 of https://doi.org/10.1016/j.jsb.2015.08.008 | |
rlnDefocusU = data["rlnDefocusU"].astype(float) | |
rlnDefocusV = data["rlnDefocusV"].astype(float) | |
rlnDefocusAngle = data["rlnDefocusAngle"].astype(float) | |
# check ctffind3.f function CTF(CS,WL,WGH1,WGH2,DFMID1,DFMID2,ANGAST,THETATR, IX, IY) | |
ret["defocus"] = (rlnDefocusU + rlnDefocusV) / 2. / 1e4 | |
ret["dfdiff"] = (rlnDefocusU - rlnDefocusV).abs() / 2. / 1e4 # rlnDefocusU can be larger or smaller than rlnDefocusV | |
ret["dfang"] = ret["dfdiff"] * 0.0 | |
subset = rlnDefocusU > rlnDefocusV | |
ret.loc[subset, "dfang"] = np.fmod(rlnDefocusAngle[subset] + 360. + 90., 360.) # largest defocus direction to smallest defocus direction | |
ret.loc[~subset, "dfang"] = rlnDefocusAngle[~subset] # already at smallest defocus direction | |
if "rlnBeamTiltX" in data and "rlnBeamTiltY" in data: | |
ret["btamp"] = np.hypot(data["rlnBeamTiltX"], data["rlnBeamTiltY"]) | |
ret["btang"] = np.fmod(np.rad2deg(np.arctan2(-data["rlnBeamTiltY"], -data["rlnBeamTiltX"]))+360, 360) | |
if "rlnDetectorPixelSize" in data and "rlnMagnification" in data: | |
ret["apix"] = data["rlnDetectorPixelSize"].astype(float) * 1e4 / data["rlnMagnification"] | |
if "rlnClassNumber" in data: | |
ret["class"] = data["rlnClassNumber"].astype(int) - 1 | |
if "rlnRandomSubset" in data: | |
ret["set"] = data["rlnRandomSubset"].astype(int) - 1 | |
if "rlnAngleRot" in data or "rlnAngleTilt" in data or "rlnAnglePsi" in data or "rlnAngleRotPrior" in data or "rlnAnglePsiPrior" in data or "rlnAngleTiltPrior" in data: | |
if "rlnAngleRot" in data and "rlnAngleTilt" in data and "rlnAnglePsi" in data: | |
rlnAngleRot = "rlnAngleRot" | |
rlnAngleTilt = "rlnAngleTilt" | |
rlnAnglePsi = "rlnAnglePsi" | |
elif "rlnAngleRotPrior" in data and "rlnAnglePsiPrior" in data and "rlnAngleTiltPrior" in data: | |
rlnAngleRot = "rlnAngleRotPrior" | |
rlnAngleTilt = "rlnAngleTiltPrior" | |
rlnAnglePsi = "rlnAnglePsiPrior" | |
else: # mixed match of the two sets of euler angles | |
if "rlnAngleRot" in data: | |
rlnAngleRot = "rlnAngleRot" | |
elif "rlnAngleRotPrior" in data: | |
rlnAngleRot = "rlnAngleRotPrior" | |
else: | |
rlnAngleRot = "rlnAngleRot" | |
data[rlnAngleRot] = 0.0 | |
if "rlnAngleTilt" in data: | |
rlnAngleTilt = "rlnAngleTilt" | |
elif "rlnAngleTiltPrior" in data: | |
rlnAngleTilt = "rlnAngleTiltPrior" | |
else: | |
rlnAngleTilt = "rlnAngleTilt" | |
data[rlnAngleTilt] = 0.0 | |
if "rlnAnglePsi" in data: | |
rlnAnglePsi = "rlnAnglePsi" | |
elif "rlnAnglePsiPrior" in data: | |
rlnAnglePsi = "rlnAnglePsiPrior" | |
else: | |
rlnAnglePsi = "rlnAnglePsi" | |
data[rlnAnglePsi] = 0.0 | |
az = data[rlnAngleRot].astype(float) + 90.0 | |
alt = data[rlnAngleTilt].astype(float) | |
phi = data[rlnAnglePsi].astype(float) - 90.0 | |
alt0_indices = alt==0 | |
phi[alt0_indices] += az[alt0_indices] | |
az[alt0_indices]=0 | |
az[az>360] = az[az>360]-360 | |
phi[phi<0] = phi[phi<0] + 360 | |
ret['euler'] = alt.round(3).astype(str) + "," + az.round(3).astype(str) + "," + phi.round(3).astype(str) | |
if "rlnOriginX" in data and "rlnOriginY" in data: | |
import os | |
nx = ny = 0 | |
imageFileName = str(ret.iloc[0]["filename"]) | |
if os.path.exists(imageFileName): | |
try: | |
import mrcfile | |
except: | |
print("ERROR: failed to load mrcfile library. Please install it and rerun this command") | |
sys.exit(-1) | |
try: | |
with mrcfile.open(imageFileName) as mrc: | |
ny, nx = mrc.data.shape[1:3] | |
except: | |
print("WARNING: failed to obtain image size (nx, ny) info from %s. You should add nx/2,ny/2 to the \"center\" parameter in the output lst file" % (imageFileName)) | |
else: | |
print("WARNING: cannot find image file %s to obtain image size info. Make sure that you run this script in the top level folder of your Relion project. Otherwise, you should add nx/2,ny/2 to the \"center\" parameter in the output lst file" % (imageFileName)) | |
ret["center"] = (nx//2-data["rlnOriginX"]).astype(str) + "," + (ny//2-data["rlnOriginY"]).astype(str) | |
if "rlnHelicalTubeID" in data: | |
ret["helicaltube"] = data["rlnHelicalTubeID"].astype(int)-1 | |
if "rlnLogLikeliContribution" in data: | |
if data["rlnLogLikeliContribution"].max(): | |
ret["score"] = -1*data["rlnLogLikeliContribution"]/data["rlnLogLikeliContribution"].max() | |
else: | |
ret["score"] = data["rlnLogLikeliContribution"] | |
return ret | |
def dataframe2lst(data, lstFile): | |
keys = list(data) | |
keys.remove("pid") | |
keys.remove("filename") | |
lines = data['pid'].astype(str) + '\t' + data['filename'] | |
for k in keys: | |
mask = data[k].notnull() | |
if data[k].dtype == np.float64: | |
lines[mask] += '\t' + k + '=' + data[k][mask].round(6).astype(str) | |
else: | |
lines[mask] += '\t' + k + '=' + data[k][mask].astype(str) | |
lines = lines.str.strip() | |
maxlen = max(lines.str.len()) | |
lines = lines.str.pad(maxlen, side='right') | |
with open(lstFile, "w") as lstfp: | |
lstfp.write("#LSX\n#If you edit this file, you MUST rerun lstfast.py on it before using it!\n# %d\n" % (maxlen+1)) | |
lstfp.write('\n'.join(lines)) | |
lstfp.write('\n') | |
def parse_command_line(): | |
description = "convert a Relion star file into a JSPR lst file" | |
epilog = "Author: Wen Jiang (jiang12 at purdue.edu)\n" | |
epilog += "Copyright (c) 2019 Purdue University\n" | |
parser = argparse.ArgumentParser(description=description, epilog=epilog) | |
parser.add_argument("starFile", help="input star file") | |
parser.add_argument("lstFile", help="output lst file") | |
args = parser.parse_args() | |
return args | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment