Skip to content

Instantly share code, notes, and snippets.

@benjeffery
Created March 29, 2022 17:31
Show Gist options
  • Save benjeffery/e6e37fbf36b63403fda36a0391f2f891 to your computer and use it in GitHub Desktop.
Save benjeffery/e6e37fbf36b63403fda36a0391f2f891 to your computer and use it in GitHub Desktop.
import time
import numpy as np
import numba
import tskit
#@numba.njit(cache=True)
def decode(s):
stack = [0]*10000
stack_p = 0
need_to_write_popped = False
node_num = 0
parents = [-1] * 3000000
children = [-1] * 3000000
node_depth = [0] * 3000000
edge_count = 0
for c in s:
if c == " ":
continue
elif c == "(":
stack_p += 1
stack[stack_p] = node_num
node_num+=1
elif c == ")":
if need_to_write_popped:
parents[edge_count] = stack[stack_p]
children[edge_count] = stack[stack_p + 1]
node_depth[stack[stack_p + 1]] = stack_p
else:
parents[edge_count] = stack[stack_p]
children[edge_count] = node_num
node_depth[node_num] = stack_p
node_num += 1
edge_count += 1
stack_p-=1
need_to_write_popped = True
elif c == ",":
if need_to_write_popped:
parents[edge_count] = stack[stack_p]
children[edge_count] = stack[stack_p + 1]
node_depth[stack[stack_p + 1]] = stack_p
need_to_write_popped = False
else:
parents[edge_count] = stack[stack_p]
children[edge_count] = node_num
node_depth[node_num] = stack_p
node_num += 1
edge_count += 1
else: #In label
pass
return node_num, children[:node_num-1], parents[:node_num-1], node_depth[:node_num]
def from_newick(string):
num_nodes, children, parents, node_depth = decode(string)
max_depth = np.max(node_depth)
node_depth = np.array(node_depth, dtype=np.uint32)
node_depth = (-node_depth) + max_depth
tc = tskit.TableCollection(1)
tc.nodes.set_columns(flags=np.zeros(num_nodes,dtype=np.uint32), time=node_depth)
tc.edges.set_columns(
left=np.zeros_like(children,dtype=np.uint32),
right=np.ones_like(children,dtype=np.uint32),
child=children,
parent=parents
)
tc.sort()
return tc.tree_sequence()
with open("big.newick","r") as f:
string = f.read()
t = time.perf_counter()
from_newick(string)
print(time.perf_counter()-t)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment