Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save alexanderguzhva/cb2b9a08ec312e585b5ba11e3691ce36 to your computer and use it in GitHub Desktop.
Save alexanderguzhva/cb2b9a08ec312e585b5ba11e3691ce36 to your computer and use it in GitHub Desktop.
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <memory>
#include <optional>
#include <random>
#include <vector>
#include <raft/core/device_resources.hpp>
#include <raft/distance/distance_types.hpp>
#include <raft/neighbors/ivf_pq.cuh>
#include <raft/neighbors/specializations.cuh>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <rmm/mr/device/managed_memory_resource.hpp>
#include <rmm/mr/device/pool_memory_resource.hpp>
int main(int argc, char** argv) {
const size_t nColumns = 256;
const size_t nTrainRows = 65536;
const size_t nQueryRows = 16;
const size_t nLists = 256;
const size_t nSubq = 8;
const size_t nProbe = 16;
const bool isTrainShared = true;
const size_t topk = 32;
std::default_random_engine rng(123);
std::uniform_real_distribution<float> u(0, 1);
std::vector<float> hDataTrain(nColumns * nTrainRows, 0);
for (size_t i = 0; i < hDataTrain.size(); i++) {
hDataTrain[i] = u(rng);
}
std::vector<float> hDataQuery(nColumns * nQueryRows, 0);
for (size_t i = 0; i < hDataQuery.size(); i++) {
hDataQuery[i] = u(rng);
}
// don't care about cudaFree
float* dDataTrain = nullptr;
cudaMalloc((void**)&dDataTrain, sizeof(float) * hDataTrain.size());
cudaMemcpy(
dDataTrain,
hDataTrain.data(),
sizeof(float) * hDataTrain.size(),
cudaMemcpyHostToDevice);
float* dDataQuery = nullptr;
cudaMalloc((void**)&dDataQuery, sizeof(float) * hDataQuery.size());
cudaMemcpy(
dDataQuery,
hDataQuery.data(),
sizeof(float) * hDataQuery.size(),
cudaMemcpyHostToDevice);
// raft code
using data_t = float;
using idx_t = int64_t;
using raft_index_t = raft::neighbors::ivf_pq::index<idx_t>;
//
std::vector<idx_t> hIndices(nQueryRows * topk, -1);
std::vector<data_t> hDistances(nQueryRows * topk, -1);
// don't care about cudaFree
idx_t* dIndices = nullptr;
cudaMalloc((void**)&dIndices, sizeof(idx_t) * hIndices.size());
float* dDistances = nullptr;
cudaMalloc((void**)&dDistances, sizeof(data_t) * hDistances.size());
//
raft::device_resources handle;
//
auto data_view = raft::make_device_matrix_view<const data_t, idx_t>(
dDataTrain, nTrainRows, nColumns);
// let's keep other parameters as is
raft::neighbors::ivf_pq::index_params index_params;
index_params.n_lists = nLists;
index_params.pq_dim = nSubq;
index_params.add_data_on_build = true;
// subspace is faster, cluster is more precise
index_params.codebook_kind = (isTrainShared)
? raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE
: raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER;
// build
auto raft_index = raft::neighbors::ivf_pq::build<data_t, idx_t>(
handle, index_params, data_view);
// wait for build to finish
handle.sync_stream();
// unneeded, but just to be super safe
cudaDeviceSynchronize();
// search
auto query_view = raft::make_device_matrix_view<const data_t, idx_t>(
dDataQuery, nQueryRows, nColumns);
auto inds_view =
raft::make_device_matrix_view<idx_t, idx_t>(dIndices, nQueryRows, topk);
auto dists_view = raft::make_device_matrix_view<data_t, idx_t>(
dDistances, nQueryRows, topk);
//
raft::neighbors::ivf_pq::search_params search_params;
search_params.n_probes = nProbe;
// search
raft::neighbors::ivf_pq::search<data_t, idx_t>(
handle, search_params, raft_index, query_view, inds_view, dists_view);
handle.sync_stream();
// unneeded, but just to be super safe
cudaDeviceSynchronize();
// copy back
cudaMemcpy(
hIndices.data(),
dIndices,
sizeof(idx_t) * hIndices.size(),
cudaMemcpyDeviceToHost);
cudaMemcpy(
hDistances.data(),
dDistances,
sizeof(data_t) * hDistances.size(),
cudaMemcpyDeviceToHost);
// print results
std::cout << "topk for the first query" << std::endl;
for (size_t i = 0; i < topk; i++) {
std::cout << hIndices[i] << "\t" << hDistances[i] << std::endl;
}
std::cout << std::endl;
std::cout << "topk for the last query" << std::endl;
for (size_t i = 0; i < topk; i++) {
std::cout << hIndices[i + topk * (nQueryRows - 1)] << "\t"
<< hDistances[i + topk * (nQueryRows - 1)] << std::endl;
}
std::cout << std::endl;
//
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment