Skip to content

Instantly share code, notes, and snippets.

@wenfahu
Created November 17, 2017 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wenfahu/9766641df18f403fa51c9fbc7f19b5bb to your computer and use it in GitHub Desktop.
Save wenfahu/9766641df18f403fa51c9fbc7f19b5bb to your computer and use it in GitHub Desktop.
from collections import defaultdict
import numpy as np
import pdb
from IPython import embed
sky = defaultdict(list)
def find_tuple(pts, tgts):
idx = []
for t in tgts:
idx.append(np.where((pts == t).all(axis=1))[0])
return idx
def input_pruning(pts, k):
'''
remove the k-dominated points, input preprocessing
pts: n x m ndarray, n points each has m attributes
k: k value in k group skyline
'''
pairwise = np.logical_and(np.all(pts[:, None] <= pts, axis=-1), np.any(pts[:, None] < pts, axis=-1))
num_of_dominance = np.sum(pairwise, axis=1)
return pts[num_of_dominance < k]
def is_pareto_efficient(costs):
'''
the indices of skyline points
costs: n x m ndarray
n : number of input points
m : number of attributes per point
'''
is_efficient = np.ones(costs.shape[0], dtype = bool)
for i, c in enumerate(costs):
if is_efficient[i]:
is_efficient[is_efficient] = np.any(costs[is_efficient]<=c, axis=1) # Remove dominated points
return is_efficient
def pareto_frontier(pts):
unit_g = np.logical_and(np.all(pts[:, None] <= pts, axis=-1), np.any(pts[:, None] < pts, axis=-1))
dominated = np.any(unit_g, axis=1)
return np.logical_not(dominated)
def compute_skyline(groups):
'''
groups: g x k x m ndarray, g specifies the number of candidate k-skyline groups,
k is the k value of k-skyline, m is the number of attributes of each point
return the k-skyline groups
'''
# aggregate the groups using SUM function
# aggregated_groups is g x m ndarray
aggregated_groups = np.sum(groups, axis=1)
return groups[pareto_frontier(aggregated_groups)]
def join_groups(*args):
'''
group union of 2d array a and b
'''
return np.unique(np.vstack(args) , axis=0)
# sky is global dict that serves as the table for dynamic programming algorithm
# for computing k-skyline group. the key of sky is a tuple (k, n) where k is
# the key value of k-skyline, and n is number of points. the value of sky is the
# corresponding k-skyline in n points
def sky_group(pts, k, n):
'''
recursive method to get the k-skyline in the n points
returns a p x k x m ndarray,
p : number of groups
k : k-skyline
m : number of attributes of each point
'''
# S2 : g x k x m matrix , sky^(n-1)_(k-1) union t_n
# S1 : g x k x m matrix , sky^(n-1)_k
m = pts.shape[-1]
tail_pt = pts[n-1]
if np.any(sky[(k, n)]):
return sky[(k, n)]
if k==1:
S2 = np.reshape(tail_pt, (1, k, m))
else:
S2 = np.empty([0, k, m])
pre_k_sky = sky_group(pts, k-1, n-1)
for g in pre_k_sky:
# g is a single k-skyline of shape K x M
candidate_group = join_groups(g, tail_pt[np.newaxis, :])
S2 = join_groups(S2, candidate_group[np.newaxis, :])
if k < n:
S1 = sky_group(pts, k, n-1)
else:
S1 = np.empty([0, k, m])
candidates = join_groups(S1, S2)
# candidates is the group of candidates of k-skyline of shape GxKxM
res = compute_skyline(candidates)
sky[(k, n)] = res
return res
if __name__ == "__main__":
# pts = np.random.randn(10 ,3)
pts = np.array([ [3, 3], [0, 4], [4, 1], [2, 3],
[2, 2], [2, 1] ])
pts_processed = input_pruning(pts, 3)
pdb.set_trace()
solution = sky_group(pts_processed, 3, pts_processed.shape[0])
embed()
print(solution)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment