hkilter/XPLOR-2-XYZ.py

## XPLOR-2-XYZ.py
## This script will convert PDB file saved by NIH XPLOR into XYZ format.
## Will also work on other PDB files, but does not process HETATM entries - only ATOM. Can be easily extended though.
## Usage:
## xplo2xyz.py [options] <file.pdb> [<file.xyz>]
## options:
##  --version        show program's version number and exit
##  -h, --help       show this help message and exit
##  -o, --overwrite  overwrite output file, if it exists
##  -v, --verbose    print info about files being processed
## Create file xplo2xyz.py, copy-paste into the content below and save.
## URL: http://nmrwiki.org/wiki/index.php?title=Script_xplo2xyz.py_-_convert_PDB_files_created_by_NIH-XPLOR_to_XYZ_file_format

#/usr/bin/python
from optparse import OptionParser
import re
import os.path
import sys

class PDBAtom(object):
    def __init__(self, string):
        #this is what we need to parse
        #ATOM      1  CA  ORN     1       4.935   1.171   7.983  1.00  0.00      sega
        #XPLOR pdb files do not fully agree with the PDB conventions
        self.name = string[12:16].strip()
        self.x = float(string[30:38].strip())
        self.y = float(string[38:46].strip())
        self.z = float(string[46:54].strip())
        self.warnings = []
        if len(string) < 78:
            self.element = self.name[0]
            self.warnings.append('Chemical element name guessed ' +\
                                'to be %s from atom name %s' % (self.element, self.name))
        else:
            self.element = string[76:78].strip()

usage = 'usage: %prog [options] <file.pdb> [<file.xyz>]\n\n' \
        + '\tConvert protein data bank PDB file created by NIH-XPLOR to XYZ file\n' \
        + '\tReferences: http://www.wwpdb.org/docs.html, ' \
        + 'http://en.wikipedia.org/wiki/XYZ_file_format\n\n' \
        + '\tto get help type: python %prog -h'
version = '%prog 0.1 - convert pdb file to xyz'
opt = OptionParser(usage=usage,version=version)
opt.add_option('-o','--overwrite',action='store_true',dest='overwrite',\
                default=False, help='overwrite output file, if it exists')
opt.add_option('-v','--verbose',action='store_true', dest='verbose',\
                default=False, help='print info about files being processed')
(options, args) = opt.parse_args()

narg = len(args)
if narg == 0:
    opt.error('must provide name of pdb file')
elif narg > 2:
    opt.error('too many no-option arguments should be either one or two (second - name of xyz file)')
else:
    infile = args[0]
    pdb_re = re.compile('^(.+).pdb$', re.IGNORECASE)
    m = pdb_re.search(infile)
    if m:
        basename = m.group(1)
        if narg == 2:
            if args[1].endswith('.xyz'):
                outfile = args[1]
            else:
                opt.error('output file (second argument) must have .xyz extension - case insensitive')
        else:
            outfile = basename + '.xyz'
    else:
        opt.error('input file (first argument) must have .pdb extension - case insensitive')

if os.path.exists(outfile) and options.overwrite == False:
    opt.error('file %s exists, use -o or --overwrite otion to overwrite the file' % outfile)

if os.path.isfile(infile):
    pdb_file = open(infile,'r')
else:
    opt.error('file %s does not exist' % infile)

if options.verbose:
    sys.stderr.write('converting %s --> %s\n' % (infile, outfile))

lineno = 0
atoms = []
#read pdb file
for line in pdb_file:
    lineno += 1
    if line.startswith('ATOM'):
        try:
            atoms.append(PDBAtom(line))
        except:
            sys.stderr.write('\nProblem parsing line %d in file %s\n' % (lineno,infile))
            sys.stderr.write(line)
            sys.stderr.write('Probably ATOM entry is formatted incorrectly?\n')
            sys.stderr.write('Please refer to - http://www.wwpdb.org/documentation/format32/sect9.html#ATOM\n\n')
            sys.exit(1)
pdb_file.close()

#save xyz file
xyz_file = open(outfile,'w')
xyz_file.write('%d\n' % len(atoms))
xyz_file.write('xyz file converted from %s\n' % infile)
lineno = 2
num_hidden_warnings = 0
for atom in atoms:
    lineno += 1
    xyz_file.write('%s\t%f\t%f\t%f\n' % (atom.name, atom.x, atom.y, atom.z))
    if atom.warnings:
        if options.verbose:
            sys.stderr.write('Possible issue on line %d in %s\n' % (lineno, outfile))
            sys.stderr.write('\n'.join(atom.warnings))
            sys.stderr.write('\n')
        else:
            num_hidden_warnings += 1

xyz_file.close()
if options.verbose == False and num_hidden_warnings > 0:
    sys.stderr.write('file %s saved\n' % outfile)
    sys.stderr.write('%d warnings were not shown, ' % num_hidden_warnings)
    sys.stderr.write('please rerun with option -v to see them\n')
	## This script will convert PDB file saved by NIH XPLOR into XYZ format.
	## Will also work on other PDB files, but does not process HETATM entries - only ATOM. Can be easily extended though.
	## Usage:
	## xplo2xyz.py [options] <file.pdb> [<file.xyz>]
	## options:
	## --version show program's version number and exit
	## -h, --help show this help message and exit
	## -o, --overwrite overwrite output file, if it exists
	## -v, --verbose print info about files being processed
	## Create file xplo2xyz.py, copy-paste into the content below and save.
	## URL: http://nmrwiki.org/wiki/index.php?title=Script_xplo2xyz.py_-_convert_PDB_files_created_by_NIH-XPLOR_to_XYZ_file_format

	#/usr/bin/python
	from optparse import OptionParser
	import re
	import os.path
	import sys

	class PDBAtom(object):
	def __init__(self, string):
	#this is what we need to parse
	#ATOM 1 CA ORN 1 4.935 1.171 7.983 1.00 0.00 sega
	#XPLOR pdb files do not fully agree with the PDB conventions
	self.name = string[12:16].strip()
	self.x = float(string[30:38].strip())
	self.y = float(string[38:46].strip())
	self.z = float(string[46:54].strip())
	self.warnings = []
	if len(string) < 78:
	self.element = self.name[0]
	self.warnings.append('Chemical element name guessed ' +\
	'to be %s from atom name %s' % (self.element, self.name))
	else:
	self.element = string[76:78].strip()

	usage = 'usage: %prog [options] <file.pdb> [<file.xyz>]\n\n' \
	+ '\tConvert protein data bank PDB file created by NIH-XPLOR to XYZ file\n' \
	+ '\tReferences: http://www.wwpdb.org/docs.html, ' \
	+ 'http://en.wikipedia.org/wiki/XYZ_file_format\n\n' \
	+ '\tto get help type: python %prog -h'
	version = '%prog 0.1 - convert pdb file to xyz'
	opt = OptionParser(usage=usage,version=version)
	opt.add_option('-o','--overwrite',action='store_true',dest='overwrite',\
	default=False, help='overwrite output file, if it exists')
	opt.add_option('-v','--verbose',action='store_true', dest='verbose',\
	default=False, help='print info about files being processed')
	(options, args) = opt.parse_args()

	narg = len(args)
	if narg == 0:
	opt.error('must provide name of pdb file')
	elif narg > 2:
	opt.error('too many no-option arguments should be either one or two (second - name of xyz file)')
	else:
	infile = args[0]
	pdb_re = re.compile('^(.+).pdb$', re.IGNORECASE)
	m = pdb_re.search(infile)
	if m:
	basename = m.group(1)
	if narg == 2:
	if args[1].endswith('.xyz'):
	outfile = args[1]
	else:
	opt.error('output file (second argument) must have .xyz extension - case insensitive')
	else:
	outfile = basename + '.xyz'
	else:
	opt.error('input file (first argument) must have .pdb extension - case insensitive')

	if os.path.exists(outfile) and options.overwrite == False:
	opt.error('file %s exists, use -o or --overwrite otion to overwrite the file' % outfile)

	if os.path.isfile(infile):
	pdb_file = open(infile,'r')
	else:
	opt.error('file %s does not exist' % infile)

	if options.verbose:
	sys.stderr.write('converting %s --> %s\n' % (infile, outfile))

	lineno = 0
	atoms = []
	#read pdb file
	for line in pdb_file:
	lineno += 1
	if line.startswith('ATOM'):
	try:
	atoms.append(PDBAtom(line))
	except:
	sys.stderr.write('\nProblem parsing line %d in file %s\n' % (lineno,infile))
	sys.stderr.write(line)
	sys.stderr.write('Probably ATOM entry is formatted incorrectly?\n')
	sys.stderr.write('Please refer to - http://www.wwpdb.org/documentation/format32/sect9.html#ATOM\n\n')
	sys.exit(1)
	pdb_file.close()

	#save xyz file
	xyz_file = open(outfile,'w')
	xyz_file.write('%d\n' % len(atoms))
	xyz_file.write('xyz file converted from %s\n' % infile)
	lineno = 2
	num_hidden_warnings = 0
	for atom in atoms:
	lineno += 1
	xyz_file.write('%s\t%f\t%f\t%f\n' % (atom.name, atom.x, atom.y, atom.z))
	if atom.warnings:
	if options.verbose:
	sys.stderr.write('Possible issue on line %d in %s\n' % (lineno, outfile))
	sys.stderr.write('\n'.join(atom.warnings))
	sys.stderr.write('\n')
	else:
	num_hidden_warnings += 1

	xyz_file.close()
	if options.verbose == False and num_hidden_warnings > 0:
	sys.stderr.write('file %s saved\n' % outfile)
	sys.stderr.write('%d warnings were not shown, ' % num_hidden_warnings)
	sys.stderr.write('please rerun with option -v to see them\n')