Skip to content

Instantly share code, notes, and snippets.

@joemarct
Created March 28, 2013 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joemarct/5263462 to your computer and use it in GitHub Desktop.
Save joemarct/5263462 to your computer and use it in GitHub Desktop.
A simple Newick tree parser
class Newick(object):
def __init__(self, newick_file):
f = open(newick_file, 'r')
t = f.read().split(';')[0]
t.replace('"', "'")
t = t.splitlines()
self.t = ''.join(t)
self.node_counter = 0
self.tree = {}
self.parseNewick(self.t)
def _listChildren(self, nt):
children = []
sibling = ''
scount = 0
openpars = 0
closepars = 0
quote = 0
for s in nt:
scount += 1
if s is '(':
openpars += 1
if s is ')':
closepars += 1
if s is "'":
quote += 1
if s is ',':
if quote % 2 == 0:
if openpars == 0 and closepars == 0:
children.append(sibling)
sibling = ''
elif openpars == closepars:
children.append(sibling)
sibling = ''
openpars = 0
closepars = 0
elif openpars > closepars:
sibling += s
else:
sibling += s
else:
sibling += s
if scount == len(nt):
children.append(sibling)
return children
def parseNewick(self, t, parent=None):
self.node_counter += 1
node_id = self.node_counter
if '):' in t:
fpstart = t.find('(')
fpend = -1
rcount = -1
while fpend == -1:
rcount += -1
fpend = t.find(')', rcount)
node = t[fpend + 1:]
node_type = 'internal'
nt = t[fpstart+1:fpend]
children = self._listChildren(nt)
for child in children:
self.parseNewick(child, parent=node_id)
else:
node = t
node_type = 'terminal'
if ':' in node and len(node.split(':')) == 2:
name, length = node.split(':')
else:
name = node
length = ''
self.tree[node_id] = {
'name': name.replace("'", ""),
'length': length,
'type': node_type,
'parent': parent
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment