Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,43 @@ jobs:
timeout-minutes: 10 # Consider increasing timeout
working-directory: build/benchmark
run: ./benchmark_xtensor

numpy:
runs-on: ubuntu-24.04
name: gcc 14 - numpy-xsimd
steps:
- name: Install GCC
uses: egor-tensin/setup-gcc@v1
with:
version: '14'
platform: x64

- name: Checkout code
uses: actions/checkout@v3

- name: Set conda environment
uses: mamba-org/setup-micromamba@main
with:
environment-name: myenv
environment-file: environment-dev.yml
init-shell: bash
cache-downloads: true

- name: Configure using CMake
run: |
export CC=gcc-14
export CXX=g++-14
cmake -G Ninja -Bbuild -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DBUILD_BENCHMARK=ON -DBUILD_NUMPY_BENCHMARKS=ON -DXTENSOR_USE_XSIMD=ON

- name: Build NumPy from source
working-directory: build
run: cmake --build . --target xbenchmark_numpy_env

- name: Build benchmark target
working-directory: build
run: cmake --build . --target benchmark_xtensor --parallel 8

- name: Run NumPy comparison benchmark
timeout-minutes: 20
working-directory: build/benchmark
run: ./benchmark_xtensor --benchmark_filter='(add_|multiply_|sin_|exp_|sum_axis[01]_)'
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ OPTION(XTENSOR_CHECK_DIMENSION "xtensor dimension check" OFF)
OPTION(XTENSOR_FORCE_TEMPORARY_MEMORY_IN_ASSIGNMENTS "xtensor force the use of temporary memory when assigning instead of an automatic overlap check" ON)
OPTION(BUILD_TESTS "xtensor test suite" OFF)
OPTION(BUILD_BENCHMARK "xtensor benchmark" OFF)
OPTION(BUILD_NUMPY_BENCHMARKS "xtensor benchmark comparisons against NumPy" OFF)
OPTION(DOWNLOAD_GBENCHMARK "download google benchmark and build from source" ON)
OPTION(DEFAULT_COLUMN_MAJOR "set default layout to column major" OFF)
OPTION(CPP23 "enables C++23 (experimental)" OFF)
Expand Down Expand Up @@ -247,6 +248,10 @@ if(BUILD_TESTS)
add_subdirectory(test)
endif()

if(BUILD_NUMPY_BENCHMARKS AND NOT BUILD_BENCHMARK)
message(FATAL_ERROR "BUILD_NUMPY_BENCHMARKS requires BUILD_BENCHMARK=ON")
endif()

if(BUILD_BENCHMARK)
add_subdirectory(benchmark)
endif()
Expand Down
52 changes: 52 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,58 @@ cmake -DCMAKE_INSTALL_PREFIX=your_install_prefix
make install
```

### Benchmarking xtensor against NumPy

The benchmark target can optionally compare xtensor and NumPy on matching
elementwise operations and reductions. The current comparison set includes
elementwise math functions together with reducers such as `sum`, `mean`,
`amin`, `amax`, and `prod`. The NumPy comparison path embeds Python and calls
the public NumPy API from the same benchmark executable.

To keep the comparison fair, build the benchmarks in release mode and rebuild
NumPy from source with the same optimization policy used by the xtensor
benchmark target:

```bash
cmake -G Ninja -Bbuild -DBUILD_BENCHMARK=ON -DBUILD_NUMPY_BENCHMARKS=ON -DXTENSOR_USE_XSIMD=ON
cmake --build build --target xbenchmark_numpy_env
cmake --build build --target benchmark_xtensor
./build/benchmark/benchmark_xtensor --benchmark_filter='(add_|multiply_|sin_|exp_|sum_|mean_|amin_|amax_|prod_)'
```

`xbenchmark_numpy_env` reinstalls NumPy from source with
`XTENSOR_NUMPY_BENCHMARK_CFLAGS`, which defaults to `-O3` and adds
`-march=native` when supported by the current compiler. It also exports
`CC` and `CXX` from the active CMake configuration so NumPy is built with the
same compiler toolchain as the xtensor benchmark target. The install uses
`--no-cache-dir --no-binary=numpy` so the target recompiles NumPy instead of
reusing a cached wheel. The requested BLAS and LAPACK backends are controlled
with `XTENSOR_NUMPY_BENCHMARK_BLAS` and `XTENSOR_NUMPY_BENCHMARK_LAPACK`,
which default to `openblas`, and the benchmark startup banner reports the
backend and compiler arguments NumPy actually used.

To generate a Markdown comparison report from the benchmark output, use:

```bash
python tools/report_numpy_benchmarks.py \
--benchmark-exe build/benchmark/benchmark_xtensor \
--benchmark-filter='(add_|multiply_|sin_|exp_|sum_|mean_|amin_|amax_|prod_)' \
--output build/xtensor_numpy_report.md
```

The script can also analyze an existing Google Benchmark JSON file via
`--input-json`.

For a one-command workflow from CMake, use:

```bash
cmake --build build --target xbenchmark_numpy_report
```

The report target rebuilds NumPy from source through `xbenchmark_numpy_env`,
runs the benchmark executable, and writes the Markdown report to
`XTENSOR_NUMPY_BENCHMARK_REPORT`.

### Installing xtensor using vcpkg

You can download and install xtensor using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
Expand Down
46 changes: 46 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,18 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)

include(CheckCXXCompilerFlag)

set(_xtensor_numpy_benchmark_cflags "-O3")

string(TOUPPER "${CMAKE_BUILD_TYPE}" U_CMAKE_BUILD_TYPE)

if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Intel")
CHECK_CXX_COMPILER_FLAG(-march=native arch_native_supported)
if(arch_native_supported AND NOT CMAKE_CXX_FLAGS MATCHES "-march")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
endif()
if(arch_native_supported)
string(APPEND _xtensor_numpy_benchmark_cflags " -march=native")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g -Wunused-parameter -Wextra -Wreorder")

if(NOT "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC")
Expand Down Expand Up @@ -63,6 +68,13 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU"
endif()
endif()

set(XTENSOR_NUMPY_BENCHMARK_CFLAGS "${_xtensor_numpy_benchmark_cflags}" CACHE STRING "CFLAGS used to build NumPy from source for comparison benchmarks")
set(XTENSOR_NUMPY_BENCHMARK_BLAS "openblas" CACHE STRING "BLAS backend requested when building NumPy from source for comparison benchmarks")
set(XTENSOR_NUMPY_BENCHMARK_LAPACK "openblas" CACHE STRING "LAPACK backend requested when building NumPy from source for comparison benchmarks")
set(XTENSOR_NUMPY_BENCHMARK_REPORT "${CMAKE_BINARY_DIR}/xtensor_numpy_report.md" CACHE FILEPATH "Output path for the xtensor vs NumPy benchmark report")
set(XTENSOR_NUMPY_BENCHMARK_REPORT_FILTER ".*_(xtensor|numpy)/.*" CACHE STRING "Google Benchmark filter used when generating the xtensor vs NumPy report")
set(XTENSOR_NUMPY_BENCHMARK_REPORT_MIN_TIME "0.05s" CACHE STRING "Minimum benchmark runtime used when generating the xtensor vs NumPy report")

if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /MP /bigobj")
set(CMAKE_EXE_LINKER_FLAGS /MANIFEST:NO)
Expand Down Expand Up @@ -120,11 +132,45 @@ set(XTENSOR_BENCHMARK
main.cpp
)

if(BUILD_NUMPY_BENCHMARKS)
list(APPEND XTENSOR_BENCHMARK benchmark_numpy.cpp)
find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Embed)
endif()


set(XTENSOR_BENCHMARK_TARGET benchmark_xtensor)
add_executable(${XTENSOR_BENCHMARK_TARGET} EXCLUDE_FROM_ALL ${XTENSOR_BENCHMARK} ${XTENSOR_HEADERS})
target_link_libraries(${XTENSOR_BENCHMARK_TARGET} PUBLIC xtensor ${GBENCHMARK_LIBRARIES})

if(BUILD_NUMPY_BENCHMARKS)
target_compile_definitions(${XTENSOR_BENCHMARK_TARGET} PUBLIC XTENSOR_ENABLE_NUMPY_BENCHMARKS=1)
target_link_libraries(${XTENSOR_BENCHMARK_TARGET} PUBLIC Python3::Python)

add_custom_target(xbenchmark_numpy_env
COMMAND ${CMAKE_COMMAND} -E env
"CC=${CMAKE_C_COMPILER}"
"CXX=${CMAKE_CXX_COMPILER}"
"CFLAGS=${XTENSOR_NUMPY_BENCHMARK_CFLAGS}"
"CXXFLAGS=${XTENSOR_NUMPY_BENCHMARK_CFLAGS}"
${Python3_EXECUTABLE} -m pip install --upgrade --force-reinstall --no-cache-dir --no-binary=numpy
--config-settings=setup-args=-Dblas=${XTENSOR_NUMPY_BENCHMARK_BLAS}
--config-settings=setup-args=-Dlapack=${XTENSOR_NUMPY_BENCHMARK_LAPACK}
numpy
USES_TERMINAL
COMMENT "Installing NumPy from source with benchmark flags: ${XTENSOR_NUMPY_BENCHMARK_CFLAGS}, BLAS=${XTENSOR_NUMPY_BENCHMARK_BLAS}, LAPACK=${XTENSOR_NUMPY_BENCHMARK_LAPACK}")

add_custom_target(xbenchmark_numpy_report
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/../tools/report_numpy_benchmarks.py
--benchmark-exe $<TARGET_FILE:${XTENSOR_BENCHMARK_TARGET}>
--benchmark-filter=${XTENSOR_NUMPY_BENCHMARK_REPORT_FILTER}
--benchmark-min-time=${XTENSOR_NUMPY_BENCHMARK_REPORT_MIN_TIME}
--output ${XTENSOR_NUMPY_BENCHMARK_REPORT}
DEPENDS xbenchmark_numpy_env ${XTENSOR_BENCHMARK_TARGET}
USES_TERMINAL
VERBATIM
COMMENT "Generating xtensor vs NumPy benchmark report at ${XTENSOR_NUMPY_BENCHMARK_REPORT}")
endif()

if(XTENSOR_USE_TBB)
target_compile_definitions(${XTENSOR_BENCHMARK_TARGET} PUBLIC XTENSOR_USE_TBB)
target_include_directories(${XTENSOR_BENCHMARK_TARGET} PUBLIC ${TBB_INCLUDE_DIRS})
Expand Down
Loading
Loading