Skip to content

Instantly share code, notes, and snippets.

@adzcai
Last active August 19, 2021 16:27
Show Gist options
  • Save adzcai/b3d3501b4a14b8d5c44f76f7338c96a3 to your computer and use it in GitHub Desktop.
Save adzcai/b3d3501b4a14b8d5c44f76f7338c96a3 to your computer and use it in GitHub Desktop.
Getting started with Boost.Python and CMake

Getting started with Boost.Python

Boost.Python is a library in the Boost family of C++ libraries that helps interface between C++ and Python.

CMake is a build tool generator that allows us to write cross-platform code and better manage our dependencies.

Apache Arrow is a columnar memory format for efficient analytic operations. We build both Arrow C++ and pyarrow, which depends on the C++ version, from source. See the minimal build example for more information.

Getting started

  1. Download and unzip this gist.
git clone https://gist.github.com/pi-guy-in-the-sky/b3d3501b4a14b8d5c44f76f7338c96a3 boost-python-demo
cd boost-python-demo
  1. Create a Docker image using the Dockerfile in the directory:
docker build -t boost-python-demo .
  1. Run a Docker container from this image:
docker run --rm boost-python-demo
  1. It should print "Hello world!"
cmake_minimum_required(VERSION 3.16)
project(hello_cmake_boost_python)
# find the python include libraries on our system and our boost installation
find_package(Python3 REQUIRED COMPONENTS Development NumPy)
find_package(Boost REQUIRED COMPONENTS python3)
find_package(Arrow REQUIRED)
# create our "hello" shared library (.so) which will be imported from Python
add_library(hello SHARED hello.cpp)
# make sure our library can find the python headers
target_include_directories(hello PRIVATE ${Python3_INCLUDE_DIRS})
# don't add "lib" in front, so that it can be imported from python
set_target_properties(hello PROPERTIES PREFIX "")
target_link_libraries(hello
${Boost_LIBRARIES}
arrow_shared
arrow_python
Python3::NumPy
)
# add an install target to the build tool which can be called e.g. "make install"
install(TARGETS hello DESTINATION lib)
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive
ENV LD_LIBRARY_PATH=/usr/local/lib
ENV PYTHONPATH=/usr/local/lib
ENV ARROW_HOME=/usr/local
# install packages
RUN apt-get update -yq && \
apt-get install -yq --no-install-recommends \
apt-transport-https \
build-essential \
cmake \
git \
ninja-build \
python3-dev \
python3-pip \
software-properties-common \
wget && \
apt-get clean && rm -rf /var/lib/apt/lists*
RUN python3 -m pip install --upgrade pip && \
python3 -m pip install --upgrade setuptools && \
python3 -m pip install wheel
WORKDIR /root
# download and unzip boost
RUN wget https://boostorg.jfrog.io/artifactory/main/release/1.76.0/source/boost_1_76_0.tar.bz2 && \
tar --bzip2 -xf boost_1_76_0.tar.bz2 && \
git clone https://github.com/apache/arrow.git
# install the boost libraries, passing the path to the python interpreter
WORKDIR /root/boost_1_76_0
RUN ./bootstrap.sh --with-python=$(which python3) --with-libraries=python && \
./b2 install
# install arrow C++
# since pyarrow (ARROW_PYTHON) requires numpy to be installed,
# we first use pip to install the requirements
WORKDIR /root/arrow/cpp/build
RUN python3 -m pip install -r /root/arrow/python/requirements-build.txt && \
cmake -GNinja \
-DCMAKE_INSTALL_LIBDIR=lib \
-DARROW_PYTHON=ON \
.. && \
cmake --build . && \
cmake --install .
# install pyarrow
WORKDIR /root/arrow/python
ENV PYARROW_CMAKE_GENERATOR=Ninja
RUN python3 setup.py install
# copy our source code into the container
WORKDIR /root
COPY . .
# build our project
WORKDIR /root/build
RUN cmake -GNinja .. && \
cmake --build . && \
cmake --install .
# run the sample python code
WORKDIR /root
CMD [ "python3", "main.py" ]
#include <boost/python.hpp>
#include <arrow/api.h>
#include <arrow/python/api.h>
#include <arrow/python/pyarrow.h>
#include <iostream>
char const *greet()
{
return "Hello world!";
}
// sample code for creating an arrow::Table
PyObject *create_table()
{
// initialize pyarrow
if (arrow::py::import_pyarrow() < 0)
{
std::cerr << "error importing pyarrow\n";
return nullptr;
}
// create arrow::Array with test data
arrow::Int32Builder builder;
builder.Append(42);
builder.Append(48);
builder.Append(64);
std::shared_ptr<arrow::Array> arr;
arrow::Status st = builder.Finish(&arr);
if (!st.ok())
{
std::cerr << "error finishing array\n";
return nullptr;
}
// create an arrow::Table with a single column with the test data
auto field1 = arrow::field("test", arrow::int32());
auto schema = arrow::schema({field1});
auto table = arrow::Table::Make(schema, {arr});
// wrap the table into a PyObject
return arrow::py::wrap_table(table);
}
// make sure the module name is the same as the name imported into python
// i.e. if you run "import hello" in python, the module name should be "hello" (and not "hello_ext")
BOOST_PYTHON_MODULE(hello)
{
using namespace boost::python;
def("greet", greet);
def("create_table", create_table);
}
import hello
print(hello.greet())
table = hello.create_table()
print(table.to_string())
print(table.column(0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment