Skip to content

Instantly share code, notes, and snippets.

@JoaoRodrigues
Created February 21, 2014 17:13
Show Gist options
  • Save JoaoRodrigues/9138606 to your computer and use it in GitHub Desktop.
Save JoaoRodrigues/9138606 to your computer and use it in GitHub Desktop.
CNS Topology to PDB file - loads charges into b-factor column and parses bonds to CONECT statements
#!/usr/bin/env python
"""
Replaces the bfactor column of a PDB file
with the charge information present in a
CNS topology file.
Atoms missing from the topology are automatically
assigned a set charge (default 0.0)
If reading a cns top file, parses bond information
to connect records at the end of the output file.
"""
DEFAULT_CHARGE=0.0
## DO NOT EDIT BELOW ##
import sys
import os
import re
USAGE = "USAGE: %s file.pdb cns_file.top" %sys.argv[0]
def _write_error_message(message, show_usage=False):
"""Handles error messages"""
sys.stderr.write(message+"\n")
if show_usage:
sys.stderr.write('\n'+USAGE+'\n')
sys.exit(1)
try:
from Bio.PDB import PDBParser
from Bio.PDB import PDBIO
except ImportError:
_write_error_message("Biopython PDB module could not be imported. Check your PYTHONPATH.")
def _parse_cns_psf(psf_data):
charge_dict = {}
for line in psf_data:
fields = line.split()
if len(fields) == 8 and fields[0].isdigit():
atom_name = fields[4][1:-1].strip()
atom_charge = float(fields[6])
charge_dict[atom_name] = atom_charge
return charge_dict
def _parse_cns_topology(top_data):
"""
Parses a CNS topology file, retrieving only the lines
that contain CHARGE= fields and BONDs.
"""
charge_re = re.compile('CHARGE=([\-0-9\. ]+)')
charge_dict = {}
bonds_list = []
for line in top_data:
line = line.upper()
if 'CHARGE=' in line:
atom_name = line.split()[1].strip()
s_atom_charge = re.search(charge_re, line).group(1)
atom_charge = float(s_atom_charge)
charge_dict[atom_name] = atom_charge
elif line.startswith('BOND'):
atom_pair = tuple(line.strip().split()[1:])
bonds_list.append(atom_pair)
return (bonds_list, charge_dict)
def _replace_bfactor(structure, mapping):
"""
Uses an atom name -> number mapping to
replace the bfactor column with.
"""
for atom in structure.get_atoms():
try:
atom.bfactor = mapping[atom.name]
except KeyError:
print "Atom not found in topology: %s" %atom.name
atom.bfactor = DEFAULT_CHARGE
return structure
if __name__ == '__main__':
args = sys.argv[1:]
if len(args) != 2:
_write_error_message('Incorrect number of arguments.', True)
for f in args:
if not os.path.exists(f):
_write_error_message('File not found: %s' %f)
pdb_path = args[0]
top_path = args[1]
try:
P = PDBParser(QUIET=1)
except AttributeError:
P = PDBParser()
top_file = open(top_path)
if top_path.endswith('psf'):
bonds = None # We could parse it from the PSF too..
charges = _parse_cns_psf(top_file)
elif top_path.endswith('top') or top_path.endswith('TOP'):
bonds, charges = _parse_cns_topology(top_file)
else:
sys.exit(1)
top_file.close()
struct = P.get_structure('structure', pdb_path)
new_struct = _replace_bfactor(struct, charges)
nname = "%s.pdb" %pdb_path.replace('.pdb', '_top')
try:
io = PDBIO()
io.set_structure(new_struct)
io.save(nname, write_end=False)
except Exception, e:
_write_error_message('There was an error saving your output:\n\n%s\n\n' %e)
if bonds:
atom_to_serial = dict([ (atom.name, atom.serial_number) for atom in new_struct.get_atoms()])
pdb_handle = open(nname, 'a')
for i, j in bonds:
if i in atom_to_serial and j in atom_to_serial:
pdb_handle.write('CONECT%5i%5i\n' %(atom_to_serial[i], atom_to_serial[j]))
pdb_handle.close()
print "File saved at: %s" %nname
print "Use 'spectrum b, red_white_blue' in Pymol to visualize the charges"
if bonds:
print "Use set connect_mode, 1 to visualize only the bonds from the topology"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment