Skip to content

Instantly share code, notes, and snippets.

@timdecode
Last active April 26, 2023 22:24
Show Gist options
  • Save timdecode/a00e2ee1e5fd9f643d7656dee51a9baa to your computer and use it in GitHub Desktop.
Save timdecode/a00e2ee1e5fd9f643d7656dee51a9baa to your computer and use it in GitHub Desktop.
cellPACK C++ Parser (for an Objective-C project, hence the Objective-C++ code)
// (Objective-C header)
// CellPackParser.h
// Frameworks
//
// Created by Timothy Davison on 2022-05-11.
//
#pragma once
#import <Foundation/Foundation.h>
#import <simd/simd.h>
NS_ASSUME_NONNULL_BEGIN
typedef NS_ENUM(NSUInteger, CellPackType) {
lipid,
protein
};
@interface CellPackAssembly: NSObject
@property NSString * name;
@property NSString * entityName;
@property NSString * entityDescription;
@property CellPackType type;
- (instancetype) initWithName: (NSString*) name;
- (simd_float4*) atoms;
- (size_t) atomsCount;
- (simd_float4x4*) instanceTransforms;
- (size_t) instanceTransformsCount;
@end
@interface CellPackParser: NSObject
- (instancetype) initWithCIFPath:(NSString*) path;
- (NSArray<CellPackAssembly*>*) assemblies;
@end
NS_ASSUME_NONNULL_END
//
// CellPackParser.mm
// PDBParser
//
// Created by Timothy Davison on 2022-05-11.
//
#import <Foundation/Foundation.h>
#import "CellPackParser.h"
#include <gemmi/cif.hpp>
#include <gemmi/numb.hpp> // for as_number
#include <gemmi/mmread.hpp>
#include <gemmi/gz.hpp>
#include <gemmi/select.hpp>
#include <vector>
#include <fstream>
#include <map>
#include <optional>
namespace cif = gemmi::cif;
@interface CellPackAssembly ()
{
std::vector<simd_float4> _atoms;
std::vector<simd_float4x4> _instanceTransforms;
}
@property std::vector<simd_float4> cppAtoms;
@property std::vector<simd_float4x4> cppTransforms;
@end
@implementation CellPackAssembly
- (instancetype) initWithName:(NSString *)name {
if( self = [super init] ) {
self.name = name;
self.entityName = @"";
self.entityDescription = @"";
self.type = CellPackType::protein;
}
return self;
}
- (simd_float4*) atoms {
return &_cppAtoms[0];
}
- (size_t) atomsCount {
return _cppAtoms.size();
}
- (simd_float4x4*) instanceTransforms {
return &_cppTransforms[0];
}
- (size_t) instanceTransformsCount {
return _cppTransforms.size();
}
@end
@interface CellPackParser ()
+ (NSString*) fixupForGemmi:(NSString*) path;
@end;
@implementation CellPackParser
{
cif::Document doc;
gemmi::Structure structure;
}
- (instancetype)initWithCIFPath:(NSString*) path {
self = [super init];
NSString * fixedFile = [CellPackParser fixupForGemmi:path];
NSLog(@"%@", fixedFile);
std::string stdPath = std::string([fixedFile UTF8String]);
doc = cif::read_file(stdPath);
structure = gemmi::make_structure(doc);
const auto selector = gemmi::Selection();
return self;
}
+ (NSString*) fixupForGemmi:(NSString*)path {
/*
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_asym_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
comp_id seq_id
| asym_id | comp_id
| | entity_id | | asym_id
| | | sed_id | | |
| | | | | | |
ATOM 1 N N . GLN NG 683 . . -11.289 60.909 45.412 1.00 0.00 . GLN NG 683 N 1
| |
Should be: | |
ATOM 1 N N . GLN NG 683 . . -11.289 60.909 45.412 1.00 0.00 . . GLN NG N 1
We need to rewrite:
auth_seq_id to .
auth_comp_id to label_comp_id
auth_asym_id to label_asym_id
*/
enum class atomSiteKeys: int {
label_comp_id = 0,
label_asym_id,
auth_seq_id,
auth_comp_id,
auth_asym_id,
count
};
std::array<int, size_t(atomSiteKeys::count)> atomSiteIndices;
std::string stdPath = std::string([path UTF8String]);
std::ifstream infile(stdPath);
std::string line;
// search for
// loop_
// _atom_site
bool inAtomSite = false;
int column = 0;
NSUUID * uuid = [NSUUID UUID];
NSString * outPath = [NSTemporaryDirectory() stringByAppendingPathComponent: uuid.description];
std::string stdOutPath = std::string([outPath UTF8String]);
std::ofstream outfile(stdOutPath);
std::vector<std::string> lineTokens;
while( std::getline(infile, line) ) {
if( line.rfind("_atom_site") == 0 && !inAtomSite ) {
inAtomSite = true;
column = 0;
}
if( line.rfind("_atom_site") != 0 ) {
inAtomSite = false;
}
// Rewrite atoms
if( inAtomSite ) {
if( line.rfind("_atom_site.label_comp_id") == 0 ) {
atomSiteIndices[int(atomSiteKeys::label_comp_id)] = column;
}
else if( line.rfind("_atom_site.label_asym_id") == 0 ) {
atomSiteIndices[int(atomSiteKeys::label_asym_id)] = column;
}
else if( line.rfind("_atom_site.auth_seq_id") == 0 ) {
atomSiteIndices[int(atomSiteKeys::auth_seq_id)] = column;
}
else if( line.rfind("_atom_site.auth_comp_id") == 0 ) {
atomSiteIndices[int(atomSiteKeys::auth_comp_id)] = column;
}
else if( line.rfind("_atom_site.auth_asym_id") == 0 ) {
atomSiteIndices[int(atomSiteKeys::auth_asym_id)] = column;
}
column += 1;
}
// Rewrite the atoms
if( line.rfind("ATOM") == 0 ) {
// tokenize and rewrite the line
{
auto lineStream = std::istringstream{line};
auto token = std::string{};
lineTokens.clear();
while( lineStream >> token ) {
lineTokens.push_back(token);
}
// rewrite the line
lineTokens[atomSiteIndices[int(atomSiteKeys::auth_seq_id)]] = ".";
lineTokens[atomSiteIndices[int(atomSiteKeys::auth_comp_id)]] = lineTokens[atomSiteIndices[int(atomSiteKeys::label_comp_id)]];
lineTokens[atomSiteIndices[int(atomSiteKeys::auth_asym_id)]] = lineTokens[atomSiteIndices[int(atomSiteKeys::label_asym_id)]];
}
// output the line
{
auto outLine = std::ostringstream{};
for( auto& token : lineTokens ) {
outLine << token << " ";
}
outfile << outLine.str() << std::endl;
}
}
// Append the line unadulaterated
else {
outfile << line << std::endl;
}
}
outfile.close();
return outPath;
}
- (NSArray<CellPackAssembly *> *)assemblies {
// Ludo's Python Gemmi code for CellPack
// import gemmi
// f = "C:\\Users\\ludov\\Downloads\\cellpack_atom_instances_test.cif";
// st = gemmi.read_structure(f);st
// cellpack_assembly = st.assemblies[0]
// for gen in cellpack_assembly.generators :
// print (gen.subchains)
// print (len(gen.operators))
// #selct model 1 with joined subchain
// #/models/chains/residues/atoms
// sel = gemmi.Selection("/1/"+",".join(gen.subchains)+"/")
// print (sel)
// model = sel.first(st)[0]
// print('Model', model.name)
// chains = sel.chains(model)
// for chain in chains:
// print('-', chain.name)
// for residue in chain:
// for atom in residue:
// print (residue.name, atom.name, atom.pos)
// for oper in gen.operators :
// print(oper.transform.vec) #position
// print(oper.transform.mat) #rotation
NSMutableArray<CellPackAssembly*>* result = [[NSMutableArray<CellPackAssembly*> alloc] init];
// Entity columns so we can set the assemblies CellPack name
auto entityIds = doc.sole_block().find_loop("_entity.id");
auto entityDescriptions = doc.sole_block().find_loop("_entity.pdbx_description");
// Create our string > entity lookup table
std::map<std::string, int> entities;
for( int i = 0; i < entityIds.length(); ++i ) {
auto entityId = entityIds[i];
entities[entityId] = i;
}
// Parse the assemblies
for( auto& assembly : structure.assemblies ) {
for( auto& generator : assembly.generators ) {
std::vector<simd_float4> atomsOut;
std::vector<simd_float4x4> transformsOut;
// Build the structure (the regular Gemmi model code doesn't work)
std::string query = "/1/";
for( int i = 0; i < generator.subchains.size(); ++i ) {
const auto name = generator.subchains[i];
if( i == 0 ) {
query += name;
} else {
query += "," + name;
}
}
const auto selection = gemmi::Selection(query);
CellPackType type { ::protein };
std::optional<int> entityIndex;
auto chains = selection.chains(*selection.first(structure).first);
// Get the atoms
for( auto& chain : chains ) {
for( auto& residue : chain.residues ) {
type = residue.name == "LIP" ? CellPackType::lipid : CellPackType::protein;
entityIndex = entities[residue.entity_id];
for( auto& atom : residue.atoms ) {
float radius = gemmi::Element(atom.element).vdw_r();
simd_float4 simAtom = simd_make_float4(float(atom.pos.x),
float(atom.pos.y),
float(atom.pos.z),
radius);
atomsOut.push_back(simAtom);
}
}
}
// Get the transforms
for( auto op : generator.operators ) {
// The matrices are row-major
const auto rotation = op.transform.mat;
const auto translation = op.transform.vec;
// 2. Convert to column-major
simd_float4x4 transform;
// column-0
transform.columns[0][0] = rotation[0][0];
transform.columns[0][1] = rotation[1][0];
transform.columns[0][2] = rotation[2][0];
// column-1
transform.columns[1][0] = rotation[0][1];
transform.columns[1][1] = rotation[1][1];
transform.columns[1][2] = rotation[2][1];
// column-2
transform.columns[2][0] = rotation[0][2];
transform.columns[2][1] = rotation[1][2];
transform.columns[2][2] = rotation[2][2];
// translation-column-3
transform.columns[3][0] = translation.x;
transform.columns[3][1] = translation.y;
transform.columns[3][2] = translation.z;
transformsOut.push_back(transform);
}
NSString * nsName = [NSString stringWithUTF8String:assembly.name.c_str()];
CellPackAssembly * cellPackAssembly = [[CellPackAssembly alloc] initWithName:nsName];
cellPackAssembly.cppAtoms = atomsOut;
cellPackAssembly.cppTransforms = transformsOut;
cellPackAssembly.type = type;
if( entityIndex.has_value() ) {
const auto name = entityIds[entityIndex.value()];
auto description = entityDescriptions[entityIndex.value()];
// unquote our description
{
auto first = description.find("'");
if( first != std::string::npos ) {
description.erase(0, 1);
}
auto last = description.rfind("'");
if( last != std::string::npos ) {
description.erase(description.end() - 1);
}
}
cellPackAssembly.entityName = [NSString stringWithUTF8String:name.c_str()];
cellPackAssembly.entityDescription = [NSString stringWithUTF8String:description.c_str()];
}
[result addObject:cellPackAssembly];
}
}
return result;
}
@end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment