Skip to content

Instantly share code, notes, and snippets.

@mmore500
Created January 10, 2023 09:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mmore500/7e9b85f005882d1e4b0206a9ef6d1de2 to your computer and use it in GitHub Desktop.
Save mmore500/7e9b85f005882d1e4b0206a9ef6d1de2 to your computer and use it in GitHub Desktop.
backtrack database
import collections
import typing
class PerfectBacktrackDatabase:
"""Breadcrumb for perfectly backtracking a phylogenetic chain of descent.
Because only backwards references are held, Python garbage collection takes
care of pruning away extinct lineages.
"""
# Achieves 60% the performance of C++ shared_ptr-based implementation in
# tracking a unbranching linked list and outperforms the C++ implementation
# by 30% in tracking a phylogenetic tree with random selection
#
# p = [impl() for __ in range(100)]
# for __ in tqdm(range(10000000)):
# p[random.randrange(len(p))] = random.choice(p).CreateDescendant()
#
# see https://gist.github.com/mmore500/d6ee9011e38355f1be6c02a70db5b785
# (didn't complete debugging, but is representative of speed)
record = pd.DataFrame
next_uid: int
def __init__(
self: "PerfectBacktrackDatabase",
) -> None:
self.next_uid = 0
self.record = collections.Counter()
self.count = collections.Counter()
def CreateDescendant(
self: "PerfectBacktrackDatabase", parent_uid: typing.Optional[int]
) -> int:
descendant_uid = self.next_uid
self.next_uid += 1
self.record[descendant_uid] = parent_uid
self.count[parent_uid] += 1
self.count[descendant_uid] = 1
return descendant_uid
def Free(
self: "PerfectBacktrackDatabase",
uid: int,
) -> None:
while True:
self.count[uid] -= 1
if self.count[uid]:
break
del self.count[uid]
uid = self.record.pop(uid)
def IterLineage(
self: "PerfectBacktrackDatabase",
uid: int,
) -> None:
while uid is not None:
yield uid
uid = self.record[uid]
import collections
import typing
class PerfectBacktrackDatabase:
"""Breadcrumb for perfectly backtracking a phylogenetic chain of descent.
Because only backwards references are held, Python garbage collection takes
care of pruning away extinct lineages.
"""
# Achieves 60% the performance of C++ shared_ptr-based implementation in
# tracking a unbranching linked list and outperforms the C++ implementation
# by 30% in tracking a phylogenetic tree with random selection
#
# p = [impl() for __ in range(100)]
# for __ in tqdm(range(10000000)):
# p[random.randrange(len(p))] = random.choice(p).CreateDescendant()
#
# see https://gist.github.com/mmore500/d6ee9011e38355f1be6c02a70db5b785
# (didn't complete debugging, but is representative of speed)
record = pd.DataFrame
next_uid: int
def __init__(
self: "PerfectBacktrackDatabase",
) -> None:
self.next_uid = 0
self.record = collections.Counter()
self.count = collections.Counter()
def CreateDescendant(
self: "PerfectBacktrackDatabase", parent_uid: typing.Optional[int]
) -> int:
descendant_uid = self.next_uid
self.next_uid += 1
self.record[descendant_uid] = parent_uid
self.count[parent_uid] += 1
self.count[descendant_uid] = 1
return descendant_uid
def Free(
self: "PerfectBacktrackDatabase",
uid: int,
) -> None:
while True:
self.count[uid] -= 1
if self.count[uid]:
break
del self.count[uid]
uid = self.record.pop(uid)
def IterLineage(
self: "PerfectBacktrackDatabase",
uid: int,
) -> None:
while uid is not None:
yield uid
uid = self.record[uid]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment