sebastiano-barrera/hashcode2017.py

## hashcode2017.py
import sys
from collections import namedtuple
import numpy as np

Video = namedtuple('Video', 'size')
Endpoint = namedtuple('Endpoint', 'dc_latency conns')
ReqBundle = namedtuple('ReqBundle', 'video_id endpoint_id count')

class Problem(namedtuple('Problem', 'n_caches cache_cap videos endpoints reqs')):
    @staticmethod
    def read_file(f):
        if isinstance(f, str):
            f = open(f, 'r')

        header = f.readline()
        n_videos, n_endpoints, n_reqs, n_caches, capacity = [ int(tok) for tok in header.split() ]

        videos = [ Video(size=int(tok)) for tok in f.readline().split() ]

        endpoints = []
        for i in range(n_endpoints):
            dc_latency, n_local_caches = map(int, f.readline().split())
            conns = {}
            for k in range(n_local_caches):
                cache_id, latency = map(int, f.readline().split())
                conns[cache_id] = latency
            endpoints.append(Endpoint(dc_latency, conns))

        reqs = []
        for i in range(n_reqs):
            video_id, endpoint_id, count = map(int, f.readline().split())
            reqs.append(ReqBundle(video_id=video_id,
                                  endpoint_id=endpoint_id,
                                  count=count))

        return Problem(n_caches=n_caches,
                       cache_cap=capacity,
                       videos=videos,
                       endpoints=endpoints,
                       reqs=reqs)

# prob = Problem.read_file(sys.argv[1])
#prob = Problem.read_file('me_at_the_zoo.in')
#prob = Problem.read_file('test.in')
#prob = Problem.read_file(file_name)

def gen_cache_allocs(prob):
    cache_allocs = [ [prob.cache_cap, []] for i in range(prob.n_caches) ]

    sorted_reqs = sorted (prob.reqs, key=lambda c: -c.count)

    for req in sorted_reqs:
        video = prob.videos[req.video_id]
        endpoint = prob.endpoints[req.endpoint_id]
        conns = endpoint.conns.items()
        best_latency = endpoint.dc_latency
        best_cache = None

        for conn, latency in conns:
            if cache_allocs[conn][0] > video.size and latency < best_latency:
                # store as best cache
                best_cache = conn
                best_latency = latency


        if best_cache != None and req.video_id not in cache_allocs[best_cache][1]:
            cache_allocs[best_cache][0] -= video.size
            cache_allocs[best_cache][1].append(req.video_id)
        #print req, best_latency, endpoint.dc_latency, best_cache
    return cache_allocs


# Solution here!

#n_used = sum(len(alloc) > 0 for alloc in cache_allocs)
#print(n_used)
#for alloc in cache_allocs:
#    if len(alloc) > 0:
#        print (len(alloc[1]), alloc[1]), 'free', alloc[0]

files = ['me_at_the_zoo.in', 'trending_today.in', 'videos_worth_spreading.in', 'kittens.in']

for f in files:
    print f
    prob = Problem.read_file(f)
    cache_allocs = gen_cache_allocs(prob)

    out = open(f.replace('in','out'), 'w')
    out.write(str(len(cache_allocs))+'\n')

    for alloc in xrange(len(cache_allocs)):
        if len(cache_allocs[alloc][1]) > 0:
            out.write (str(alloc) + ' ' +(' '.join(map(str, cache_allocs[alloc][1])))+'\n')

    out.close()
# evaluation


"""
tot_saved = 0
tot_videos = 0
for req in prob.reqs:
    dc_latency = prob.endpoints[req.endpoint_id].dc_latency
    min_latency = dc_latency

    for cache_id, latency in prob.endpoints[req.endpoint_id].conns.items():
        if req.video_id in cache_allocs[cache_id][1] and latency < min_latency:
            min_latency = latency

    tot_videos += req.count
    tot_saved += (dc_latency - min_latency) * req.count

print 1000 * tot_saved / tot_videos
"""

## hashcode2017_1.py
import sys
from collections import namedtuple
import numpy as np
import pandas as pd

Video = namedtuple('Video', 'size')
Endpoint = namedtuple('Endpoint', 'dc_latency conns')
ReqBundle = namedtuple('ReqBundle', 'video_id endpoint_id count')

class Problem(namedtuple('Problem', 'n_caches cache_cap videos endpoints reqs')):
    @staticmethod
    def read_file(f):
        if isinstance(f, str):
            f = open(f, 'r')

        header = f.readline()
        n_videos, n_endpoints, n_reqs, n_caches, capacity = [ int(tok) for tok in header.split() ]

        videos = [ Video(size=int(tok)) for tok in f.readline().split() ]

        endpoints = []
        for i in range(n_endpoints):
            dc_latency, n_local_caches = map(int, f.readline().split())
            conns = {}
            for k in range(n_local_caches):
                cache_id, latency = map(int, f.readline().split())
                conns[cache_id] = latency
            endpoints.append(Endpoint(dc_latency, conns))

        reqs = []
        for i in range(n_reqs):
            video_id, endpoint_id, count = map(int, f.readline().split())
            reqs.append(ReqBundle(video_id=video_id,
                                  endpoint_id=endpoint_id,
                                  count=count))

        return Problem(n_caches=n_caches,
                       cache_cap=capacity,
                       videos=videos,
                       endpoints=endpoints,
                       reqs=reqs)


def make_gains(prob):
    gains = np.zeros((len(prob.videos), prob.n_caches))

    for req in prob.reqs:
        endpoint = prob.endpoints[req.endpoint_id]
        for cache_id, latency_saving in endpoint.conns.items():
            gains[req.video_id, cache_id] += latency_saving * req.count

    return gains


def gains_to_df(mat):
    df = pd.DataFrame(mat).stack().reset_index()
    df.rename(inplace=True, columns={
        'level_0': 'Video',
        'level_1': 'Cache',
        0: 'Gain'
    })
    df.sort_values('Gain', ascending=False, inplace=True)
    return df


MAX_CONSECUTIVE_FAILURES = 100


def simulate_allocations(prob, alloc_ops):
    cap = prob.cache_cap
    free = cap * np.ones(prob.n_caches)
    allocs = [ [] for i in range(prob.n_caches) ]

    consecutive_failures = 0

    for row in alloc_ops.itertuples():
        video_id, cache_id = row.Video, row.Cache
        size = prob.videos[video_id].size
        if free[cache_id] < size:
            print('not enough space to put video #{} into cache #{}'
                  .format(video_id, cache_id))
            consecutive_failures += 1
            if consecutive_failures > MAX_CONSECUTIVE_FAILURES:
                break
        else:
            free[cache_id] -= size
            allocs[cache_id].append(video_id)

    return allocs, free


def write_solution(cache_allocs, file=sys.stdout):
    n_used = sum(len(alloc) > 0 for alloc in cache_allocs)
    print(n_used, file=file)
    for cache_id, alloc in enumerate(cache_allocs):
        if len(alloc) > 0:
            print(cache_id, *alloc, file=file)


def main():
    if len(sys.argv) == 1:
        print('usage: solution.py input_1.in input_2.in ...')
        return

    for filename in sys.argv[1:]:
        print('---', filename)
        print('  parsing')
        problem = Problem.read_file(filename)
        print('  computing gains matrix')
        gains = make_gains(problem)
        print('  converting to DF')
        alloc_ops = gains_to_df(gains)
        print('  simulating allocations')
        allocs, free = simulate_allocations(problem, alloc_ops)

        print('  writing output')
        out_filename = filename + '_sol.txt'
        with open(out_filename, 'w') as f:
            write_solution(allocs, file=f)

if __name__ == '__main__':
    main()
	import sys
	from collections import namedtuple
	import numpy as np

	Video = namedtuple('Video', 'size')
	Endpoint = namedtuple('Endpoint', 'dc_latency conns')
	ReqBundle = namedtuple('ReqBundle', 'video_id endpoint_id count')

	class Problem(namedtuple('Problem', 'n_caches cache_cap videos endpoints reqs')):
	@staticmethod
	def read_file(f):
	if isinstance(f, str):
	f = open(f, 'r')

	header = f.readline()
	n_videos, n_endpoints, n_reqs, n_caches, capacity = [ int(tok) for tok in header.split() ]

	videos = [ Video(size=int(tok)) for tok in f.readline().split() ]

	endpoints = []
	for i in range(n_endpoints):
	dc_latency, n_local_caches = map(int, f.readline().split())
	conns = {}
	for k in range(n_local_caches):
	cache_id, latency = map(int, f.readline().split())
	conns[cache_id] = latency
	endpoints.append(Endpoint(dc_latency, conns))

	reqs = []
	for i in range(n_reqs):
	video_id, endpoint_id, count = map(int, f.readline().split())
	reqs.append(ReqBundle(video_id=video_id,
	endpoint_id=endpoint_id,
	count=count))

	return Problem(n_caches=n_caches,
	cache_cap=capacity,
	videos=videos,
	endpoints=endpoints,
	reqs=reqs)

	# prob = Problem.read_file(sys.argv[1])
	#prob = Problem.read_file('me_at_the_zoo.in')
	#prob = Problem.read_file('test.in')
	#prob = Problem.read_file(file_name)

	def gen_cache_allocs(prob):
	cache_allocs = [ [prob.cache_cap, []] for i in range(prob.n_caches) ]

	sorted_reqs = sorted (prob.reqs, key=lambda c: -c.count)

	for req in sorted_reqs:
	video = prob.videos[req.video_id]
	endpoint = prob.endpoints[req.endpoint_id]
	conns = endpoint.conns.items()
	best_latency = endpoint.dc_latency
	best_cache = None

	for conn, latency in conns:
	if cache_allocs[conn][0] > video.size and latency < best_latency:
	# store as best cache
	best_cache = conn
	best_latency = latency


	if best_cache != None and req.video_id not in cache_allocs[best_cache][1]:
	cache_allocs[best_cache][0] -= video.size
	cache_allocs[best_cache][1].append(req.video_id)
	#print req, best_latency, endpoint.dc_latency, best_cache
	return cache_allocs


	# Solution here!

	#n_used = sum(len(alloc) > 0 for alloc in cache_allocs)
	#print(n_used)
	#for alloc in cache_allocs:
	# if len(alloc) > 0:
	# print (len(alloc[1]), alloc[1]), 'free', alloc[0]

	files = ['me_at_the_zoo.in', 'trending_today.in', 'videos_worth_spreading.in', 'kittens.in']

	for f in files:
	print f
	prob = Problem.read_file(f)
	cache_allocs = gen_cache_allocs(prob)

	out = open(f.replace('in','out'), 'w')
	out.write(str(len(cache_allocs))+'\n')

	for alloc in xrange(len(cache_allocs)):
	if len(cache_allocs[alloc][1]) > 0:
	out.write (str(alloc) + ' ' +(' '.join(map(str, cache_allocs[alloc][1])))+'\n')

	out.close()
	# evaluation


	"""
	tot_saved = 0
	tot_videos = 0
	for req in prob.reqs:
	dc_latency = prob.endpoints[req.endpoint_id].dc_latency
	min_latency = dc_latency

	for cache_id, latency in prob.endpoints[req.endpoint_id].conns.items():
	if req.video_id in cache_allocs[cache_id][1] and latency < min_latency:
	min_latency = latency

	tot_videos += req.count
	tot_saved += (dc_latency - min_latency) * req.count

	print 1000 * tot_saved / tot_videos
	"""