miracle2k/truncate_url.py

## truncate_url.py
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b)


def shorten_url(url, maxlength):
    """Will shorten a url to the given maximum length by removing the
    (hopefully) least interesting parts - the ones in the middle.
    """
    # TODO: It might be ok to remove http: for example without a dot.

    def prioritizer(url):
        parsed = urlparse.urlsplit(url)

        # parsed with order information
        counter = itertools.count(0)
        pwo = []
        def append(fmt, value, where=pwo):
            if not value:
                where.append((next(counter), ''))
            else:
                where.append((next(counter), fmt.format(value)))

        append('{}:', parsed.scheme)
        append('//{}/', parsed.netloc)
        pwo.append([])
        for part in parsed.path.split('/'):
            if part:
                append('{}/', part, pwo[-1])
        append('?{}', parsed.query)
        append('#{}', parsed.fragment)

        parsed = SplitResult(*pwo)

        # yield in the desired order. the easy customizability of this is
        # the fruit of our labor above.
        yield parsed.scheme
        yield parsed.fragment
        yield parsed.query
        def x():
            while parsed.path:
                yield parsed.path.pop()
                if parsed.path:
                    yield parsed.path.pop(0)
        yield from reversed(list(x()))
        yield parsed.netloc

    blocks_by_prio = list(prioritizer(url))
    blocks = [block for id, block in sorted(
                [(idx, (priority, block))
                 for (priority, (idx, block)) in enumerate(blocks_by_prio) if block])]


    length = sum([len(block[1]) for block in blocks])
    required_to_shave = length - maxlength

    def try_to_solve_in(blocks, count):
        best_score = None
        best_solution = None
        for idxlist in itertools.combinations(range(len(blocks)), count):
            # Calculate how many ... need to be inserted if we remove
            # these group of blocks.
            gaps = sum([1 for x, y in pairwise(idxlist) if y-x>1])
            if idxlist[0] > 0:
                gaps += 1
            if idxlist[-1] < len(blocks)-1:
                gaps += 1

            # This will be higher if we remove more important blocks
            priority_score = sum([blocks[i][0] for i in idxlist])

            score = gaps*40 + priority_score*1

            #if True:
            #    ccount = sum([len(blocks[i][1]) for i in idxlist])
            #    if ccount - 3*gaps < required_to_shave:
            #        continue
            #    print(idxlist, gaps, sum([blocks[i][0] for i in idxlist]), score)

            if best_score and best_score <= score:
                continue

            ccount = sum([len(blocks[i][1]) for i in idxlist])
            if ccount - 3*gaps >= required_to_shave:
                best_score = score
                best_solution = idxlist

        return best_solution

    for x in range(1, len(blocks)+1):
        solution = try_to_solve_in(blocks, x)
        if solution:
            # XXX: actually insert the ... parts
            return ('<{}>'.format(required_to_shave), solution,
                    [block[1] for id, block in enumerate(blocks) if not id in solution])
            break
	def pairwise(iterable):
	"s -> (s0,s1), (s1,s2), (s2, s3), ..."
	a, b = itertools.tee(iterable)
	next(b, None)
	return zip(a, b)


	def shorten_url(url, maxlength):
	"""Will shorten a url to the given maximum length by removing the
	(hopefully) least interesting parts - the ones in the middle.
	"""
	# TODO: It might be ok to remove http: for example without a dot.

	def prioritizer(url):
	parsed = urlparse.urlsplit(url)

	# parsed with order information
	counter = itertools.count(0)
	pwo = []
	def append(fmt, value, where=pwo):
	if not value:
	where.append((next(counter), ''))
	else:
	where.append((next(counter), fmt.format(value)))

	append('{}:', parsed.scheme)
	append('//{}/', parsed.netloc)
	pwo.append([])
	for part in parsed.path.split('/'):
	if part:
	append('{}/', part, pwo[-1])
	append('?{}', parsed.query)
	append('#{}', parsed.fragment)

	parsed = SplitResult(*pwo)

	# yield in the desired order. the easy customizability of this is
	# the fruit of our labor above.
	yield parsed.scheme
	yield parsed.fragment
	yield parsed.query
	def x():
	while parsed.path:
	yield parsed.path.pop()
	if parsed.path:
	yield parsed.path.pop(0)
	yield from reversed(list(x()))
	yield parsed.netloc

	blocks_by_prio = list(prioritizer(url))
	blocks = [block for id, block in sorted(
	[(idx, (priority, block))
	for (priority, (idx, block)) in enumerate(blocks_by_prio) if block])]


	length = sum([len(block[1]) for block in blocks])
	required_to_shave = length - maxlength

	def try_to_solve_in(blocks, count):
	best_score = None
	best_solution = None
	for idxlist in itertools.combinations(range(len(blocks)), count):
	# Calculate how many ... need to be inserted if we remove
	# these group of blocks.
	gaps = sum([1 for x, y in pairwise(idxlist) if y-x>1])
	if idxlist[0] > 0:
	gaps += 1
	if idxlist[-1] < len(blocks)-1:
	gaps += 1

	# This will be higher if we remove more important blocks
	priority_score = sum([blocks[i][0] for i in idxlist])

	score = gaps40 + priority_score1

	#if True:
	# ccount = sum([len(blocks[i][1]) for i in idxlist])
	# if ccount - 3*gaps < required_to_shave:
	# continue
	# print(idxlist, gaps, sum([blocks[i][0] for i in idxlist]), score)

	if best_score and best_score <= score:
	continue

	ccount = sum([len(blocks[i][1]) for i in idxlist])
	if ccount - 3*gaps >= required_to_shave:
	best_score = score
	best_solution = idxlist

	return best_solution

	for x in range(1, len(blocks)+1):
	solution = try_to_solve_in(blocks, x)
	if solution:
	# XXX: actually insert the ... parts
	return ('<{}>'.format(required_to_shave), solution,
	[block[1] for id, block in enumerate(blocks) if not id in solution])
	break