Skip to content

Instantly share code, notes, and snippets.

@miracle2k
Created October 26, 2013 23:22
Show Gist options
  • Save miracle2k/7175789 to your computer and use it in GitHub Desktop.
Save miracle2k/7175789 to your computer and use it in GitHub Desktop.
Aborted insane attempt to shorten a url by removing the least interesting parts first.
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = itertools.tee(iterable)
next(b, None)
return zip(a, b)
def shorten_url(url, maxlength):
"""Will shorten a url to the given maximum length by removing the
(hopefully) least interesting parts - the ones in the middle.
"""
# TODO: It might be ok to remove http: for example without a dot.
def prioritizer(url):
parsed = urlparse.urlsplit(url)
# parsed with order information
counter = itertools.count(0)
pwo = []
def append(fmt, value, where=pwo):
if not value:
where.append((next(counter), ''))
else:
where.append((next(counter), fmt.format(value)))
append('{}:', parsed.scheme)
append('//{}/', parsed.netloc)
pwo.append([])
for part in parsed.path.split('/'):
if part:
append('{}/', part, pwo[-1])
append('?{}', parsed.query)
append('#{}', parsed.fragment)
parsed = SplitResult(*pwo)
# yield in the desired order. the easy customizability of this is
# the fruit of our labor above.
yield parsed.scheme
yield parsed.fragment
yield parsed.query
def x():
while parsed.path:
yield parsed.path.pop()
if parsed.path:
yield parsed.path.pop(0)
yield from reversed(list(x()))
yield parsed.netloc
blocks_by_prio = list(prioritizer(url))
blocks = [block for id, block in sorted(
[(idx, (priority, block))
for (priority, (idx, block)) in enumerate(blocks_by_prio) if block])]
length = sum([len(block[1]) for block in blocks])
required_to_shave = length - maxlength
def try_to_solve_in(blocks, count):
best_score = None
best_solution = None
for idxlist in itertools.combinations(range(len(blocks)), count):
# Calculate how many ... need to be inserted if we remove
# these group of blocks.
gaps = sum([1 for x, y in pairwise(idxlist) if y-x>1])
if idxlist[0] > 0:
gaps += 1
if idxlist[-1] < len(blocks)-1:
gaps += 1
# This will be higher if we remove more important blocks
priority_score = sum([blocks[i][0] for i in idxlist])
score = gaps*40 + priority_score*1
#if True:
# ccount = sum([len(blocks[i][1]) for i in idxlist])
# if ccount - 3*gaps < required_to_shave:
# continue
# print(idxlist, gaps, sum([blocks[i][0] for i in idxlist]), score)
if best_score and best_score <= score:
continue
ccount = sum([len(blocks[i][1]) for i in idxlist])
if ccount - 3*gaps >= required_to_shave:
best_score = score
best_solution = idxlist
return best_solution
for x in range(1, len(blocks)+1):
solution = try_to_solve_in(blocks, x)
if solution:
# XXX: actually insert the ... parts
return ('<{}>'.format(required_to_shave), solution,
[block[1] for id, block in enumerate(blocks) if not id in solution])
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment