Last active
August 17, 2021 01:46
-
-
Save lmiq/828133335a83088e63f21152416c7f7c to your computer and use it in GitHub Desktop.
apricot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import seaborn; seaborn.set_style('whitegrid') | |
import time | |
import scipy | |
import numpy as np | |
from numba import njit | |
from numba import prange | |
from sklearn.datasets import fetch_covtype | |
digits_data = fetch_covtype() | |
X_digits = np.abs(digits_data.data) | |
def calculate_gains(func, parallel, fastmath, cache): | |
@njit(parallel=parallel, fastmath=fastmath, cache=cache) | |
def calculate_gains_(X, gains, current_values, idxs, current_concave_values_sum): | |
for i in prange(idxs.shape[0]): | |
idx = idxs[i] | |
gains[i] = func(current_values + X[idx, :]).sum() | |
gains -= current_concave_values_sum | |
return gains | |
return calculate_gains_ | |
def fit(X, k): | |
calculate_gains_ = calculate_gains(np.sqrt, True, True, False) | |
n, d = X.shape | |
cost = 0.0 | |
ranking = [] | |
total_gains = [] | |
mask = np.zeros(n) | |
current_values = np.zeros(d) | |
current_concave_values = np.sqrt(current_values) | |
current_concave_values_sum = current_concave_values.sum() | |
idxs = np.arange(n) | |
gains = np.zeros(idxs.shape[0], dtype='float64') | |
while cost < k: | |
gains = calculate_gains_(X, gains, current_values, idxs, current_concave_values_sum) | |
idx = np.argmax(gains) | |
best_idx = idxs[idx] | |
curr_cost = 1. | |
if cost + curr_cost > k: | |
break | |
cost += curr_cost | |
# Calculate gains | |
gain = gains[idx] * curr_cost | |
# Select next | |
current_values += X[best_idx, :] | |
current_concave_values = np.sqrt(current_values) | |
current_concave_values_sum = current_concave_values.sum() | |
ranking.append(best_idx) | |
total_gains.append(gain) | |
mask[best_idx] = 1 | |
idxs = np.where(mask == 0)[0] | |
return ranking, total_gains | |
k = 1000 | |
tic = time.time() | |
ranking0, gains0 = fit(X=X_digits, k=k) | |
toc0 = time.time() - tic | |
print("-------------------------------------------------") | |
print("Fitting time: ",toc0) | |
print(sum(ranking0), sum(gains0)) | |
print("-------------------------------------------------") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Pkg | |
Pkg.activate("apricot") | |
using BenchmarkTools | |
using ScikitLearn | |
using LoopVectorization | |
using DataStructures | |
using TimerOutputs | |
const tmr = TimerOutput() | |
struct LazyGreedy | |
X::Matrix{Float64} | |
end | |
struct Result | |
ranking::Vector{Int32} | |
gains::Vector{Float64} | |
end | |
function fill!(pq::PriorityQueue, k, v) | |
for i in eachindex(k) | |
enqueue!(pq, k[i], v[i]) | |
end | |
end; | |
function get_gains!(X, current_values, idxs, gains) | |
Threads.@threads for i in eachindex(idxs) | |
s = 0.0 | |
for j in eachindex(current_values) | |
@inbounds s += @fastmath sqrt(current_values[j] + X[j, idxs[i]]) | |
end | |
gains[i] = s | |
end | |
end; | |
function calculate_gains!(X, gains, current_values, idxs, current_concave_value_sum) | |
get_gains!(X, current_values, idxs, gains) | |
gains .-= current_concave_value_sum | |
return gains | |
end; | |
function calculate_gains(X, current_values, idxs::Int, current_concave_value_sum) | |
return get_gains(X, current_values, idxs) - current_concave_value_sum | |
end; | |
function get_gains(X, current_values, idxs::Int) | |
s = 0.0 | |
@tturbo for j in 1:length(current_values) | |
s += sqrt(current_values[j] + X[j, idxs]) | |
end | |
return s | |
end; | |
function fit(optimizer::LazyGreedy, k) | |
d, n = size(optimizer.X) | |
cost = 0.0 | |
# curr_cost = 1. | |
sample_cost = ones(Float64, n) | |
ranking = Int32[] | |
total_gains = Float64[] | |
mask = zeros(Int8, n) | |
current_values = zeros(Float64, d) | |
current_concave_values = sqrt.(current_values) | |
current_concave_values_sum = sum(current_concave_values) | |
idxs = 1:n | |
gains = zeros(Float64, size(idxs)[1]) | |
calculate_gains!(optimizer.X, gains, current_values, idxs, current_concave_values_sum) | |
gains ./= sample_cost[idxs] | |
pq = PriorityQueue{Int64, Float64}(Base.Order.Reverse) | |
fill!(pq, idxs, gains) | |
while cost < k | |
if length(pq) == 0 | |
return | |
end | |
best_gain = -Inf | |
best_idx = -1 | |
while true | |
idx = dequeue!(pq) | |
if cost + sample_cost[idx] > k | |
continue | |
end | |
if best_idx == idx | |
break | |
end | |
# @timeit tmr "hot loop" gain = calculate_gains(optimizer.X, current_values, idx, current_concave_values_sum) | |
gain = calculate_gains(optimizer.X, current_values, idx, current_concave_values_sum) | |
gain /= sample_cost[idx] | |
enqueue!(pq, idx, gain) | |
if gain > best_gain | |
best_gain = gain | |
best_idx = idx | |
elseif gain == best_gain && best_gain == 0.0 | |
best_gain = gain | |
best_idx = idx | |
break | |
end | |
end | |
cost += sample_cost[best_idx] | |
best_gain *= sample_cost[best_idx] | |
# Select next | |
current_values += view(optimizer.X, :, best_idx) | |
current_concave_values .= sqrt.(current_values) | |
current_concave_values_sum = sum(current_concave_values) | |
push!(ranking, best_idx) | |
push!(total_gains, best_gain) | |
mask[best_idx] = 1 | |
#idxs = findall(==(0), mask) | |
end | |
return Result(ranking, total_gains) | |
end; | |
k = 1000; | |
@sk_import datasets: (fetch_covtype) | |
digits_data = fetch_covtype(); | |
X_digits = abs.(digits_data["data"]); | |
X_digits = transpose(X_digits); | |
opt2 = LazyGreedy(X_digits); | |
r = fit(opt2,k) | |
println("---------------------------------------------") | |
println(" Fitting time: ") | |
@time fit(opt2,k) | |
println(sum(r.ranking)," ",sum(r.gains)) | |
println("---------------------------------------------") | |
# @show tmr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Pkg | |
Pkg.activate("apricot") | |
using BenchmarkTools | |
using ScikitLearn | |
using LoopVectorization | |
using DataStructures | |
using TimerOutputs | |
const tmr = TimerOutput() | |
struct NaiveGreedy | |
X::Matrix{Float64} | |
end | |
struct Result | |
ranking::Vector{Int32} | |
gains::Vector{Float64} | |
end | |
function fill!(pq::PriorityQueue, k, v) | |
for i in eachindex(k) | |
enqueue!(pq, k[i], v[i]) | |
end | |
end; | |
function get_gains!(X, current_values, idxs, gains) | |
@tturbo for i in eachindex(idxs) | |
s = 0.0 | |
for j in eachindex(current_values) | |
s += sqrt(current_values[j] + X[j, idxs[i]]) | |
end | |
gains[i] = s | |
end | |
end; | |
function calculate_gains!(X, gains, current_values, idxs, current_concave_value_sum) | |
get_gains!(X, current_values, idxs, gains) | |
gains .-= current_concave_value_sum | |
return gains | |
end; | |
function fit(optimizer::NaiveGreedy, k) | |
d, n = size(optimizer.X) | |
cost = 0.0 | |
ranking = Int32[] | |
total_gains = Float64[] | |
current_values = zeros(Float64, d) | |
current_concave_values = sqrt.(current_values) | |
current_concave_values_sum = sum(current_concave_values) | |
idxs = collect(1:n) | |
gains = zeros(Float64, size(idxs)[1]) | |
while cost < k | |
gains = calculate_gains!(optimizer.X, gains, current_values, idxs, current_concave_values_sum) | |
idx = argmax(gains) | |
best_idx = idxs[idx] | |
curr_cost = 1. | |
if cost + curr_cost > k | |
break | |
end | |
cost += curr_cost | |
# Calculate gains | |
gain = gains[idx] * curr_cost | |
# Select next | |
current_values .+= view(optimizer.X, :, best_idx) | |
current_concave_values_sum = sum(sqrt, current_values) | |
push!(ranking, best_idx) | |
push!(total_gains, gain) | |
popat!(idxs, findfirst(==(best_idx), idxs)) | |
end | |
return Result(ranking, total_gains) | |
end; | |
k = 1000; | |
@sk_import datasets: (fetch_covtype) | |
digits_data = fetch_covtype(); | |
X_digits = abs.(digits_data["data"]); | |
X_digits = transpose(X_digits); | |
opt2 = NaiveGreedy(X_digits); | |
r = fit(opt2,k) | |
println("---------------------------------------------") | |
println(" Fitting time: ") | |
@time fit(opt2,k) | |
println(sum(r.ranking)," ",sum(r.gains)) | |
println("---------------------------------------------") | |
#@show tmr | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Pkg | |
Pkg.activate("apricot") | |
using BenchmarkTools | |
using ScikitLearn | |
using LoopVectorization | |
using DataStructures | |
using TimerOutputs | |
const tmr = TimerOutput() | |
struct NaiveGreedy | |
X::Matrix{Float64} | |
end | |
struct Result | |
ranking::Vector{Int32} | |
gains::Vector{Float64} | |
end | |
function fill!(pq::PriorityQueue, k, v) | |
for i in eachindex(k) | |
enqueue!(pq, k[i], v[i]) | |
end | |
end; | |
#function get_gains!(X, current_values, idxs, gains) | |
# @tturbo for i in eachindex(idxs) | |
# s = 0.0 | |
# for j in eachindex(current_values) | |
# s += sqrt(current_values[j] + X[j, idxs[i]]) | |
# end | |
# gains[i] = s | |
# end | |
#end; | |
function get_gains!(X, current_values, idxs, gains) | |
Threads.@threads for i in eachindex(idxs) | |
s = 0.0 | |
@inbounds @simd for j in eachindex(current_values) | |
s += sqrt(current_values[j] + X[j, idxs[i]]) | |
end | |
@inbounds gains[i] = s | |
end | |
end; | |
function calculate_gains!(X, gains, current_values, idxs, current_concave_value_sum) | |
get_gains!(X, current_values, idxs, gains) | |
gains .-= current_concave_value_sum | |
return gains | |
end; | |
function fit(optimizer::NaiveGreedy, k) | |
d, n = size(optimizer.X) | |
cost = 0.0 | |
ranking = Int32[] | |
total_gains = Float64[] | |
mask = zeros(Int8, n) | |
current_values = zeros(Float64, d) | |
current_concave_values = sqrt.(current_values) | |
current_concave_values_sum = sum(current_concave_values) | |
idxs = 1:n | |
gains = zeros(Float64, size(idxs)[1]) | |
while cost < k | |
gains = calculate_gains!(optimizer.X, gains, current_values, idxs, current_concave_values_sum) | |
idx = argmax(gains) | |
best_idx = idxs[idx] | |
curr_cost = 1. | |
if cost + curr_cost > k | |
break | |
end | |
cost += curr_cost | |
# Calculate gains | |
gain = gains[idx] * curr_cost | |
# Select next | |
current_values += view(optimizer.X, :, best_idx) | |
current_concave_values .= sqrt.(current_values) | |
current_concave_values_sum = sum(current_concave_values) | |
push!(ranking, best_idx) | |
push!(total_gains, gain) | |
mask[best_idx] = 1 | |
idxs = findall(==(0), mask) | |
end | |
return Result(ranking, total_gains) | |
end | |
k = 1000; | |
@sk_import datasets: (fetch_covtype) | |
digits_data = fetch_covtype(); | |
X_digits = abs.(digits_data["data"]); | |
X_digits = transpose(X_digits); | |
opt2 = NaiveGreedy(X_digits); | |
r = fit(opt2,k) | |
println("---------------------------------------------") | |
println(" Fitting time: ") | |
@time fit(opt2,k) | |
println(sum(r.ranking)," ",sum(r.gains)) | |
println("---------------------------------------------") | |
#@show tmr | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment