Skip to content

Instantly share code, notes, and snippets.

@drin
Last active August 30, 2022 18:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drin/8dfa8ee631ef17b63dca5c2348f20d3c to your computer and use it in GitHub Desktop.
Save drin/8dfa8ee631ef17b63dca5c2348f20d3c to your computer and use it in GitHub Desktop.
Some Arrow Benchmarking
// A version that is directly comparable to
// https://gist.github.com/js8544/8569c0e0bb810f1254904e4584def167#file-benchmark-cc-L12
static void GreaterEqual(benchmark::State& state) { // NOLINT non-const reference
constexpr int64_t test_size = 10000;
constexpr int64_t max_val = std::numeric_limits<int64_t>::max();
auto test_vals = benchmark_rng.Int64(test_size, 0, max_val);
auto test_ints = std::static_pointer_cast<arrow::Int64Array>(test_vals);
while (state.KeepRunning()) {
arrow::BooleanBuilder builder;
auto ret = builder.Reserve(test_ints->length());
for (int i = 0; i < test_ints->length(); ++i) {
if (test_ints->Value(i) >= 100) { ret = builder.Append(true); }
else { ret = builder.Append(false); }
}
auto result = builder.Finish();
benchmark::DoNotOptimize(result);
}
state.SetBytesProcessed(state.iterations() * (test_vals->length() * sizeof(int64_t)));
state.SetItemsProcessed(state.iterations() * test_vals->length());
};
** Run on an M1 **
Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
2022-08-30T10:40:36-07:00
Running ./release/arrow-misc-benchmark
Run on (8 X 24.121 MHz CPU s)
CPU Caches:
L1 Data 64 KiB (x8)
L1 Instruction 128 KiB (x8)
L2 Unified 4096 KiB (x2)
Load Average: 1.60, 1.47, 1.36
-----------------------------------------------------------------------------
Benchmark Time CPU Iterations UserCounters...
-----------------------------------------------------------------------------
GreaterEqual 34893 ns 34893 ns 19840 bytes_per_second=2.13527G/s items_per_second=286.592M/s
ArrayGreaterThan 35654 ns 35654 ns 19631 bytes_per_second=2.08968G/s items_per_second=280.472M/s
ComputeGreaterThan 3182 ns 3182 ns 223978 bytes_per_second=23.4183G/s items_per_second=3.14315G/s
FastArrayGreaterThan 6256 ns 6256 ns 112027 bytes_per_second=11.9086G/s items_per_second=1.59834G/s
// version using the compute layer
static void ComputeGreaterThan(benchmark::State& state) { // NOLINT non-const reference
constexpr int64_t test_size = 10000;
constexpr int64_t max_val = std::numeric_limits<int64_t>::max();
// test data
auto test_vals = benchmark_rng.Int64(test_size, 0, max_val);
auto scalar_100 = std::make_shared<arrow::Int64Scalar>(100);
while (state.KeepRunning()) {
ASSERT_OK_AND_ASSIGN(Datum gte_result,
compute::CallFunction("greater_equal", {test_vals, scalar_100}));
benchmark::DoNotOptimize(gte_result);
benchmark::DoNotOptimize(test_vals);
benchmark::DoNotOptimize(scalar_100);
}
state.SetBytesProcessed(state.iterations() * (test_vals->length() * sizeof(int64_t)));
state.SetItemsProcessed(state.iterations() * test_vals->length());
}
// ** A version that approximates the implementation of "greater_equal" **
static void ArrayGreaterThan(benchmark::State& state) { // NOLINT non-const reference
constexpr int64_t test_size = 10000;
constexpr int64_t max_val = std::numeric_limits<int64_t>::max();
static constexpr int kBatchSize = 32;
// Test data
auto test_vals = benchmark_rng.Int64(test_size, 0, max_val);
auto test_intarr = std::static_pointer_cast<arrow::Int64Array>(test_vals);
while (state.KeepRunning()) {
const int64_t *int_vals = test_intarr->raw_values();
arrow::BooleanBuilder out_builder;
ASSERT_OK(out_builder.Reserve(test_vals->length()));
int64_t num_batches = test_vals->length() / kBatchSize;
uint8_t temp_output[kBatchSize];
for (int64_t batch_ndx = 0; batch_ndx < num_batches; ++batch_ndx) {
for (int val_ndx = 0; val_ndx < kBatchSize; ++val_ndx) {
temp_output[val_ndx] = *(int_vals++) >= 100 ? 1 : 0;
// ASSERT_OK(out_builder.Append(test_intarr->Value(val_ndx) >= 100));
}
ASSERT_OK(out_builder.AppendValues((const uint8_t *) &temp_output, kBatchSize));
}
auto out_array = out_builder.Finish();
benchmark::DoNotOptimize(out_array);
benchmark::DoNotOptimize(test_intarr);
benchmark::DoNotOptimize(out_builder);
}
state.SetBytesProcessed(state.iterations() * (test_vals->length() * sizeof(int64_t)));
state.SetItemsProcessed(state.iterations() * test_vals->length());
}
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <algorithm>
#include <cstdint>
#include <limits>
#include <random>
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/array/builder_primitive.h"
#include "arrow/compute/exec.h"
namespace arrow {
namespace internal {
namespace {
constexpr auto kSeed = 0x94378165;
constexpr double null_prob = 0;
static random::RandomArrayGenerator benchmark_rng(kSeed);
} // annonymous namespace
// ------------------------------
// Benchmark functions
static void GreaterEqual(benchmark::State& state) { // NOLINT non-const reference
constexpr int64_t test_size = 10000;
constexpr int64_t max_val = std::numeric_limits<int64_t>::max();
auto test_vals = benchmark_rng.Int64(test_size, 0, max_val);
auto test_ints = std::static_pointer_cast<arrow::Int64Array>(test_vals);
while (state.KeepRunning()) {
arrow::BooleanBuilder builder;
auto ret = builder.Reserve(test_ints->length());
for (int i = 0; i < test_ints->length(); ++i) {
if (test_ints->Value(i) >= 100) { ret = builder.Append(true); }
else { ret = builder.Append(false); }
}
auto result = builder.Finish();
benchmark::DoNotOptimize(result);
}
state.SetBytesProcessed(state.iterations() * (test_vals->length() * sizeof(int64_t)));
state.SetItemsProcessed(state.iterations() * test_vals->length());
};
// >> Functions that use compute layer
static void ComputeGreaterThan(benchmark::State& state) { // NOLINT non-const reference
constexpr int64_t test_size = 10000;
constexpr int64_t max_val = std::numeric_limits<int64_t>::max();
// test data
auto test_vals = benchmark_rng.Int64(test_size, 0, max_val);
auto scalar_100 = std::make_shared<arrow::Int64Scalar>(100);
while (state.KeepRunning()) {
ASSERT_OK_AND_ASSIGN(Datum gte_result,
compute::CallFunction("greater_equal", {test_vals, scalar_100}));
benchmark::DoNotOptimize(gte_result);
benchmark::DoNotOptimize(test_vals);
benchmark::DoNotOptimize(scalar_100);
}
state.SetBytesProcessed(state.iterations() * (test_vals->length() * sizeof(int64_t)));
state.SetItemsProcessed(state.iterations() * test_vals->length());
}
// ----------------------------------------------------------------------
// Benchmark declarations
BENCHMARK(GreaterEqual);
BENCHMARK(ArrayGreaterThan);
BENCHMARK(ComputeGreaterThan);
} // namespace internal
} // namespace arrow
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment