Skip to content

Instantly share code, notes, and snippets.

@lmiq
Last active August 17, 2021 01:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lmiq/828133335a83088e63f21152416c7f7c to your computer and use it in GitHub Desktop.
Save lmiq/828133335a83088e63f21152416c7f7c to your computer and use it in GitHub Desktop.
apricot
import seaborn; seaborn.set_style('whitegrid')
import time
import scipy
import numpy as np
from numba import njit
from numba import prange
from sklearn.datasets import fetch_covtype
digits_data = fetch_covtype()
X_digits = np.abs(digits_data.data)
def calculate_gains(func, parallel, fastmath, cache):
@njit(parallel=parallel, fastmath=fastmath, cache=cache)
def calculate_gains_(X, gains, current_values, idxs, current_concave_values_sum):
for i in prange(idxs.shape[0]):
idx = idxs[i]
gains[i] = func(current_values + X[idx, :]).sum()
gains -= current_concave_values_sum
return gains
return calculate_gains_
def fit(X, k):
calculate_gains_ = calculate_gains(np.sqrt, True, True, False)
n, d = X.shape
cost = 0.0
ranking = []
total_gains = []
mask = np.zeros(n)
current_values = np.zeros(d)
current_concave_values = np.sqrt(current_values)
current_concave_values_sum = current_concave_values.sum()
idxs = np.arange(n)
gains = np.zeros(idxs.shape[0], dtype='float64')
while cost < k:
gains = calculate_gains_(X, gains, current_values, idxs, current_concave_values_sum)
idx = np.argmax(gains)
best_idx = idxs[idx]
curr_cost = 1.
if cost + curr_cost > k:
break
cost += curr_cost
# Calculate gains
gain = gains[idx] * curr_cost
# Select next
current_values += X[best_idx, :]
current_concave_values = np.sqrt(current_values)
current_concave_values_sum = current_concave_values.sum()
ranking.append(best_idx)
total_gains.append(gain)
mask[best_idx] = 1
idxs = np.where(mask == 0)[0]
return ranking, total_gains
k = 1000
tic = time.time()
ranking0, gains0 = fit(X=X_digits, k=k)
toc0 = time.time() - tic
print("-------------------------------------------------")
print("Fitting time: ",toc0)
print(sum(ranking0), sum(gains0))
print("-------------------------------------------------")
import Pkg
Pkg.activate("apricot")
using BenchmarkTools
using ScikitLearn
using LoopVectorization
using DataStructures
using TimerOutputs
const tmr = TimerOutput()
struct LazyGreedy
X::Matrix{Float64}
end
struct Result
ranking::Vector{Int32}
gains::Vector{Float64}
end
function fill!(pq::PriorityQueue, k, v)
for i in eachindex(k)
enqueue!(pq, k[i], v[i])
end
end;
function get_gains!(X, current_values, idxs, gains)
Threads.@threads for i in eachindex(idxs)
s = 0.0
for j in eachindex(current_values)
@inbounds s += @fastmath sqrt(current_values[j] + X[j, idxs[i]])
end
gains[i] = s
end
end;
function calculate_gains!(X, gains, current_values, idxs, current_concave_value_sum)
get_gains!(X, current_values, idxs, gains)
gains .-= current_concave_value_sum
return gains
end;
function calculate_gains(X, current_values, idxs::Int, current_concave_value_sum)
return get_gains(X, current_values, idxs) - current_concave_value_sum
end;
function get_gains(X, current_values, idxs::Int)
s = 0.0
@tturbo for j in 1:length(current_values)
s += sqrt(current_values[j] + X[j, idxs])
end
return s
end;
function fit(optimizer::LazyGreedy, k)
d, n = size(optimizer.X)
cost = 0.0
# curr_cost = 1.
sample_cost = ones(Float64, n)
ranking = Int32[]
total_gains = Float64[]
mask = zeros(Int8, n)
current_values = zeros(Float64, d)
current_concave_values = sqrt.(current_values)
current_concave_values_sum = sum(current_concave_values)
idxs = 1:n
gains = zeros(Float64, size(idxs)[1])
calculate_gains!(optimizer.X, gains, current_values, idxs, current_concave_values_sum)
gains ./= sample_cost[idxs]
pq = PriorityQueue{Int64, Float64}(Base.Order.Reverse)
fill!(pq, idxs, gains)
while cost < k
if length(pq) == 0
return
end
best_gain = -Inf
best_idx = -1
while true
idx = dequeue!(pq)
if cost + sample_cost[idx] > k
continue
end
if best_idx == idx
break
end
# @timeit tmr "hot loop" gain = calculate_gains(optimizer.X, current_values, idx, current_concave_values_sum)
gain = calculate_gains(optimizer.X, current_values, idx, current_concave_values_sum)
gain /= sample_cost[idx]
enqueue!(pq, idx, gain)
if gain > best_gain
best_gain = gain
best_idx = idx
elseif gain == best_gain && best_gain == 0.0
best_gain = gain
best_idx = idx
break
end
end
cost += sample_cost[best_idx]
best_gain *= sample_cost[best_idx]
# Select next
current_values += view(optimizer.X, :, best_idx)
current_concave_values .= sqrt.(current_values)
current_concave_values_sum = sum(current_concave_values)
push!(ranking, best_idx)
push!(total_gains, best_gain)
mask[best_idx] = 1
#idxs = findall(==(0), mask)
end
return Result(ranking, total_gains)
end;
k = 1000;
@sk_import datasets: (fetch_covtype)
digits_data = fetch_covtype();
X_digits = abs.(digits_data["data"]);
X_digits = transpose(X_digits);
opt2 = LazyGreedy(X_digits);
r = fit(opt2,k)
println("---------------------------------------------")
println(" Fitting time: ")
@time fit(opt2,k)
println(sum(r.ranking)," ",sum(r.gains))
println("---------------------------------------------")
# @show tmr
import Pkg
Pkg.activate("apricot")
using BenchmarkTools
using ScikitLearn
using LoopVectorization
using DataStructures
using TimerOutputs
const tmr = TimerOutput()
struct NaiveGreedy
X::Matrix{Float64}
end
struct Result
ranking::Vector{Int32}
gains::Vector{Float64}
end
function fill!(pq::PriorityQueue, k, v)
for i in eachindex(k)
enqueue!(pq, k[i], v[i])
end
end;
function get_gains!(X, current_values, idxs, gains)
@tturbo for i in eachindex(idxs)
s = 0.0
for j in eachindex(current_values)
s += sqrt(current_values[j] + X[j, idxs[i]])
end
gains[i] = s
end
end;
function calculate_gains!(X, gains, current_values, idxs, current_concave_value_sum)
get_gains!(X, current_values, idxs, gains)
gains .-= current_concave_value_sum
return gains
end;
function fit(optimizer::NaiveGreedy, k)
d, n = size(optimizer.X)
cost = 0.0
ranking = Int32[]
total_gains = Float64[]
current_values = zeros(Float64, d)
current_concave_values = sqrt.(current_values)
current_concave_values_sum = sum(current_concave_values)
idxs = collect(1:n)
gains = zeros(Float64, size(idxs)[1])
while cost < k
gains = calculate_gains!(optimizer.X, gains, current_values, idxs, current_concave_values_sum)
idx = argmax(gains)
best_idx = idxs[idx]
curr_cost = 1.
if cost + curr_cost > k
break
end
cost += curr_cost
# Calculate gains
gain = gains[idx] * curr_cost
# Select next
current_values .+= view(optimizer.X, :, best_idx)
current_concave_values_sum = sum(sqrt, current_values)
push!(ranking, best_idx)
push!(total_gains, gain)
popat!(idxs, findfirst(==(best_idx), idxs))
end
return Result(ranking, total_gains)
end;
k = 1000;
@sk_import datasets: (fetch_covtype)
digits_data = fetch_covtype();
X_digits = abs.(digits_data["data"]);
X_digits = transpose(X_digits);
opt2 = NaiveGreedy(X_digits);
r = fit(opt2,k)
println("---------------------------------------------")
println(" Fitting time: ")
@time fit(opt2,k)
println(sum(r.ranking)," ",sum(r.gains))
println("---------------------------------------------")
#@show tmr
import Pkg
Pkg.activate("apricot")
using BenchmarkTools
using ScikitLearn
using LoopVectorization
using DataStructures
using TimerOutputs
const tmr = TimerOutput()
struct NaiveGreedy
X::Matrix{Float64}
end
struct Result
ranking::Vector{Int32}
gains::Vector{Float64}
end
function fill!(pq::PriorityQueue, k, v)
for i in eachindex(k)
enqueue!(pq, k[i], v[i])
end
end;
#function get_gains!(X, current_values, idxs, gains)
# @tturbo for i in eachindex(idxs)
# s = 0.0
# for j in eachindex(current_values)
# s += sqrt(current_values[j] + X[j, idxs[i]])
# end
# gains[i] = s
# end
#end;
function get_gains!(X, current_values, idxs, gains)
Threads.@threads for i in eachindex(idxs)
s = 0.0
@inbounds @simd for j in eachindex(current_values)
s += sqrt(current_values[j] + X[j, idxs[i]])
end
@inbounds gains[i] = s
end
end;
function calculate_gains!(X, gains, current_values, idxs, current_concave_value_sum)
get_gains!(X, current_values, idxs, gains)
gains .-= current_concave_value_sum
return gains
end;
function fit(optimizer::NaiveGreedy, k)
d, n = size(optimizer.X)
cost = 0.0
ranking = Int32[]
total_gains = Float64[]
mask = zeros(Int8, n)
current_values = zeros(Float64, d)
current_concave_values = sqrt.(current_values)
current_concave_values_sum = sum(current_concave_values)
idxs = 1:n
gains = zeros(Float64, size(idxs)[1])
while cost < k
gains = calculate_gains!(optimizer.X, gains, current_values, idxs, current_concave_values_sum)
idx = argmax(gains)
best_idx = idxs[idx]
curr_cost = 1.
if cost + curr_cost > k
break
end
cost += curr_cost
# Calculate gains
gain = gains[idx] * curr_cost
# Select next
current_values += view(optimizer.X, :, best_idx)
current_concave_values .= sqrt.(current_values)
current_concave_values_sum = sum(current_concave_values)
push!(ranking, best_idx)
push!(total_gains, gain)
mask[best_idx] = 1
idxs = findall(==(0), mask)
end
return Result(ranking, total_gains)
end
k = 1000;
@sk_import datasets: (fetch_covtype)
digits_data = fetch_covtype();
X_digits = abs.(digits_data["data"]);
X_digits = transpose(X_digits);
opt2 = NaiveGreedy(X_digits);
r = fit(opt2,k)
println("---------------------------------------------")
println(" Fitting time: ")
@time fit(opt2,k)
println(sum(r.ranking)," ",sum(r.gains))
println("---------------------------------------------")
#@show tmr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment