Skip to content

Instantly share code, notes, and snippets.

@matthewoliver
Last active September 2, 2020 23:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matthewoliver/bb52d5ea6f125064d26535223141b68a to your computer and use it in GitHub Desktop.
Save matthewoliver/bb52d5ea6f125064d26535223141b68a to your computer and use it in GitHub Desktop.
Random untested very hacky version of a range scanner, best path picker and gap filler code.
from swift.common.utils import Timestamp, ShardRange
from swift.container.sharder import DEFAULT_SHARD_CONTAINER_THRESHOLD, \
DEFAULT_SHARD_SHRINK_POINT
from itertools import chain
# This doesn't have to be in a class, was just trying to brainstorm and write something.
class RangesScanner(object):
def __init__(self, shard_account=None, root_conatiner=None,
ranges=None, shard_threshold=None, shrink_size=None):
# These are needed for building shardrange paths
self.shard_account = shard_account
self.root_container = root_conatiner
# This assumes just a list of ranges. I'd assume we'd be smart, and if we can't find a range with active shardranges
# we'd scan again including ranges that are FOUND or CLEAVED, to make sure we do have a valid complete range before
# we attempt to fix.
self.ranges = ranges
# These are used for the weight algorithm, because we don't like ranges that are too small or too big as an overlap
# might just be due to an cleave or shrink.
self.shard_threshold = shard_threshold or \
DEFAULT_SHARD_CONTAINER_THRESHOLD
self.shrink_size = shrink_size or \
DEFAULT_SHARD_CONTAINER_THRESHOLD * DEFAULT_SHARD_SHRINK_POINT
self.reset()
if ranges:
self.scan()
def reset(self):
self.complete = list()
self.frags = list()
self.lowers = defaultdict(list)
self.uppers = defaultdict(list)
self._scanned = False
def has_complete(self):
return self._scanned and len(self.complete) > 0
def has_frags(self):
return self._scanned and len(self.frags) > 0
def walk_range(self, lower, l=None, ranges=None, complete=True):
if not l:
l = []
if not ranges:
ranges = list(self.ranges)
if lower in self.lowers:
multiple = len(self.lowers[lower]) > 1
for sr in self.lowers[lower]:
if sr in ranges:
ranges.remove(sr)
if sr.upper_str == "":
l.append(sr)
if complete:
self.complete.append(l)
else:
self.fragments.append(l)
return
if multiple:
new_l = list(l)
new_l.append(sr)
self.walk_range(sr.upper_str, new_l, ranges, complete)
else:
l.append(sr)
self.walk_range(sr.upper_str, l, ranges, complete)
elif l:
self.fragments.append(l)
return
def scan(self, ranges=None):
self.reset()
if ranges:
self.ranges = ranges
overlaps = False
gaps = False
ranges_left = list(self.ranges)
for sr in self.ranges:
self.lowers[sr.lower_str].append(sr)
self.uppers[sr.upper_str].append(sr)
if len(self.lowers[sr.lower_str]) > 1 or len(self.uppers[sr.upper_str]) > 1:
overlaps = True
# Quick check for gaps
if set(self.lowers.keys()).difference(set(self.uppers.keys())):
gaps = True
# Short cut
if not overlaps and not gaps:
self.complete.append(self.ranges)
return
self.walk_range('', ranges=ranges_left)
while ranges_left:
ranges_left.sort()
self.walk_range(ranges_left[0].lower, ranges=ranges_left, complete=False)
self._scanned = True
def _ranges_to_remove(self, good_path):
remove_set = set()
good_set = set(good_path)
for path in chain(self.complete, self.frags):
path = set(path)
path.difference_update(good_set)
remove_set.update(path)
return list(remove_set)
def _calc_range_weight(self, shard_range):
# Ideally we want to choose a path with the least movement, so we
# and to choose a set of shard ranges that contain the most objects.
# But it isn't that simple. What if we've just cleaved, we don't want
# to pick and old un-cleaved range just to cleave it again.
# Also what if we've just shrunk? Then a range that is too small isn't
# one we actually want to pick either, because then we'd need to shrink
# again.
# Attempt 1:
# weight == object_count / shard_threshold
# Which should give us close to a percentage "full" anything over
# 1 really needs to shard.
# But, what if something has already sharded, or has just shrunk, then
# we don't want to pick that complete path, otherwise we need to shard
# or shrink again. So if it's > shard_threshold or smaller then shrink_size,
# we'll negate the sign.
#
# We do this because the we want to pick the path with the biggest weight
# as we want to cleave of the paths with less object (because data movement)
# so by giving them a negative they should become less attractive.
result = shard_range.object_count / float(self.shard_threshold)
if shard_range.object_count < self.shrink_size \
or shard_range.object_count > self.shard_threshold:
return result * -1
return result
def _find_best_complete_path(self):
if len(self.complete) == 1:
return self.complete[0]
# We have more then 1 complete range. This could be because:
# 1: We're in split brain
# 2: We've recently cleaved
# 3: We've recently started shrinking.
# In either case, let's walk and calulate the best path, noting
# that it's isn't just object count.
weighted_paths = [(path, sum(map(self._calc_range_weight, path)))
for path in self.complete]
return max(weighted_paths, key=lambda x: x[-1])[0]
def _find_and_build_best_fragment_path(self):
# we have 3 different types of fragments, mins, maxes and floating.
# That is, by definition, a fragment is a path that doesn't go from
# MIN to MAX. So:
# - MINS: fragment paths that start at MIN
# - MAXES: fragment paths that end at MAX
# - FLOATING: fragments that dont start or end at MIN or MAX
mins, maxes, floating = list(), list(), list()
for frag in self.frags:
if frag[0].lower == ShardRange.MIN:
mins.append(frag)
elif frag[-1].upper == ShardRange.MAX:
maxes.append(frag)
else:
floating.append(frag)
if not mins and not maxes and not floating:
return None
# When finding the best fragment path, let's make it simple, we want to
# build a complete path by inserting a new shard range. So the best path
# is a shard range that will fill the smallest gap.
# So let's make an assumption that we want to find the smallest gap
# in which case, let's start by finding the frag from mins that reaches
# the furthest.. or the max(mins).
# Then do the same for maxes, so the min(maxes).
def max_shard_range(frag):
return frag[-1].upper_str
def min_shard_range(frag):
return frag[0].lower_str
min_frag = max(mins, key=max_shard_range) if mins else None
max_frag = min(maxes, key=min_shard_range) if maxes else None
# Let's attempt to connect the frags up.
min_path = min_frag if min_frag else []
while floating:
if not min_path:
min_floating = min(floating, key=min_shard_range)
min_path += self._make_new_range('', min_floating[0].lower_str)
min_path += min_floating
floating.remove(min_floating)
else:
# filter out everything < min_path
floating = [f for f in floating if f[-1] > min_path[-1]]
if not floating:
continue
min_floating = min(floating, key=min_shard_range)
# _connect_frags connects the 2, deals with overlaps and
# removes/add not required frags to self.frags so we can remove
# them later.
min_path = self._connect_frags(min_path, min_floating)
floating.remove(min_floating)
# Now we just need to connect the min_path to the max_frag
if not min_path:
min_path += self._make_new_range('', max_frag[0].lower_str)
min_path += max_frag
else:
min_path = self._connect_frags(min_path, max_frag)
return min_path
def _connect_frags(self, left_frag, right_frag):
# We've already filtered out fragments so the last one on the left,
# is < the last one on the right. Meaning the last range on left must
# either overlap or is well clear of the whole right range. (I think
# that logic makes sense)
overlap_map = map(left_frag[-1].overlaps, right_frag)
if any(overlap_map):
# we have overlaps so we want to find the last overlap.
overlap_map.reverse()
reverse_index = overlap_map.index(True)
last_match_index = len(right_frag) - 1 - reverse_index
# OK before me modify a SR, let's remove the existing frag from frags.
self.frags.remove(right_frag)
# and add the new smaller one
pre_overlap_frag = right_frag[:last_match_index]
if pre_overlap_frag:
self.frags.append(pre_overlap_frag)
right_frag[last_match_index].lower = left_frag[-1].upper
right_frag[last_match_index].state_timestamp = Timestamp.now()
left_frag += right_frag[last_match_index:]
else:
# just connect the 2 with a new shardrange.
left_frag += self._make_new_range(left_frag[-1].upper_str,
right_frag[0].lower_str)
left_frag += right_frag
return left_frag
def _make_new_range(self, lower, upper):
ts = Timestamp.now()
path = ShardRange.make_path(self.shard_account, self.root_container,
self.root_container, ts, 0)
return ShardRange(path, ts, lower, upper, state=ShardRange.FOUND)
def find_best_path(self):
ranges_to_remove = list()
# Rule 1: If there are complete paths these _always_ win over fragment
# Paths
if self.complete:
# Rule 1.1: find all the fragment ranges that aren't a part of
# the completed ranges to remove.
path = self._find_best_complete_path()
else:
# Rule 2: There are no completed paths, so we need to build
# the best path we can with the fragments we have. This
# will involve adding a shardrange(s) :S
path = self._find_and_build_best_fragment_path()
ranges_to_remove = self._ranges_to_remove(path)
return path, ranges_to_remove
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment