diff --git a/.github/workflows/alpine3.yml b/.github/workflows/alpine3.yml index e82c07205..750969b5a 100644 --- a/.github/workflows/alpine3.yml +++ b/.github/workflows/alpine3.yml @@ -8,4 +8,3 @@ jobs: with: container: alpine:3 pre-checkout-script: apk add bash git - run-valgrind: true diff --git a/.github/workflows/arm.yml b/.github/workflows/arm.yml index e1fad3af1..1362b57b2 100644 --- a/.github/workflows/arm.yml +++ b/.github/workflows/arm.yml @@ -34,7 +34,6 @@ jobs: uses: ./.github/workflows/task-unit-test.yml with: env: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - run-valgrind: false # run the job without valgrind stop-runner: name: Stop self-hosted EC2 runner diff --git a/.github/workflows/debian11.yml b/.github/workflows/debian11.yml index c6c2476d1..ddafd26e2 100644 --- a/.github/workflows/debian11.yml +++ b/.github/workflows/debian11.yml @@ -1,30 +1,9 @@ name: debian bullseye flow -on: - workflow_dispatch: - inputs: - gcc11: - description: 'Use GCC 11' - required: false - default: false - type: boolean - workflow_call: - inputs: - gcc11: - description: 'Use GCC 11' - required: false - default: false - type: boolean +on: [workflow_dispatch, workflow_call] jobs: bullseye: - uses: ./.github/workflows/task-unit-test.yml - with: - container: debian:bullseye - pre-checkout-script: apt-get update && apt-get -y install git - - bullseye-gcc11: - if: ${{ inputs.gcc11 == true }} uses: ./.github/workflows/task-unit-test.yml with: container: gcc:11-bullseye diff --git a/.github/workflows/event-merge-to-queue.yml b/.github/workflows/event-merge-to-queue.yml index 99381c6d6..3a8e53b3f 100644 --- a/.github/workflows/event-merge-to-queue.yml +++ b/.github/workflows/event-merge-to-queue.yml @@ -21,8 +21,6 @@ jobs: # container: ubuntu:bionic bullseye: uses: ./.github/workflows/debian11.yml - with: - gcc11: false # amazonlinux2: # needs: [check-if-docs-only] # if: ${{ needs.check-if-docs-only.outputs.only-docs-changed == 'false' }} diff --git a/.github/workflows/event-nightly.yml b/.github/workflows/event-nightly.yml index 2ee33aa02..1281bae40 100644 --- a/.github/workflows/event-nightly.yml +++ b/.github/workflows/event-nightly.yml @@ -24,8 +24,6 @@ jobs: # container: ubuntu:bionic bullseye: uses: ./.github/workflows/debian11.yml - with: - gcc11: true # amazonlinux2: # needs: [check-if-docs-only] # if: ${{ needs.check-if-docs-only.outputs.only-docs-changed == 'false' }} diff --git a/.github/workflows/event-pull_request.yml b/.github/workflows/event-pull_request.yml index e229cbf88..29b6443b3 100644 --- a/.github/workflows/event-pull_request.yml +++ b/.github/workflows/event-pull_request.yml @@ -42,6 +42,8 @@ jobs: run: make check-format - name: unit tests run: make unit_test + - name: address sanitizer tests + run: make asan - name: flow tests run: make flow_test VERBOSE=1 # Using version 4 if node20 is supported, since it is MUCH faster (15m vs 25s) @@ -57,7 +59,6 @@ jobs: coverage: - needs: [basic-tests] if: ${{ !github.event.pull_request.draft}} uses: ./.github/workflows/coverage.yml secrets: inherit diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 64ab0224e..0daeb0797 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -7,4 +7,3 @@ jobs: uses: ./.github/workflows/task-unit-test.yml with: env: macos-latest - run-valgrind: false diff --git a/.github/workflows/mariner2.yml b/.github/workflows/mariner2.yml index 2e6892e97..b46bb1fd2 100644 --- a/.github/workflows/mariner2.yml +++ b/.github/workflows/mariner2.yml @@ -8,4 +8,3 @@ jobs: with: container: mcr.microsoft.com/cbl-mariner/base/core:2.0 pre-checkout-script: tdnf install -y --noplugins --skipsignature tar gzip ca-certificates git - run-valgrind: false # TODO: enable valgrind? (requires to install valgrind) diff --git a/.github/workflows/task-unit-test.yml b/.github/workflows/task-unit-test.yml index e7066b0c7..f0e5af619 100644 --- a/.github/workflows/task-unit-test.yml +++ b/.github/workflows/task-unit-test.yml @@ -13,10 +13,6 @@ on: pre-checkout-script: description: 'Script to run before checkout' type: string - run-valgrind: - description: 'Run valgrind tests' - type: boolean - default: true jobs: test: @@ -67,12 +63,3 @@ jobs: - name: unit tests run: make unit_test - - name: valgrind - if: ${{ inputs.run-valgrind }} - run: make valgrind - - name: Archive valgrind tests reports - if: ${{ inputs.run-valgrind && failure() }} - uses: actions/upload-artifact@v4 - with: - name: valgrind tests reports on ${{ steps.artifact-name.outputs.name }} - path: bin/Linux-x86_64-debug/unit_tests/Testing/Temporary/ diff --git a/.install/install_cmake.sh b/.install/install_cmake.sh index 418a8df22..b4b8787ff 100644 --- a/.install/install_cmake.sh +++ b/.install/install_cmake.sh @@ -6,7 +6,7 @@ MODE=$1 # whether to install using sudo or not if [[ $OS_TYPE = 'Darwin' ]] then - brew install cmake + cmake --version || brew install cmake # Installed via brew if not already present else if [[ $processor = 'x86_64' ]] then diff --git a/Makefile b/Makefile index 7aab4a92b..50bbc5ea7 100644 --- a/Makefile +++ b/Makefile @@ -8,14 +8,6 @@ ifneq ($(filter coverage show-cov upload-cov,$(MAKECMDGOALS)),) COV=1 endif -ifneq ($(VG),) -VALGRIND=$(VG) -endif - -ifeq ($(VALGRIND),1) -override DEBUG ?= 1 -endif - ifeq ($(COV),1) override DEBUG ?= 1 CMAKE_COV += -DUSE_COVERAGE=ON @@ -25,31 +17,22 @@ ifeq ($(NO_TESTS),1) CMAKE_TESTS += -DVECSIM_BUILD_TESTS=off endif -ifneq ($(SAN),) -override DEBUG ?= 1 -export ASAN_OPTIONS=detect_odr_violation=0:allocator_may_return_null=1 -export MSAN_OPTIONS=allocator_may_return_null=1 - -ifeq ($(SAN),mem) -override SAN=memory -else ifeq ($(SAN),addr) -override SAN=address +ifeq ($(ASAN),1) +override SAN ?= address endif -ifeq ($(SAN),memory) -CMAKE_SAN=-DUSE_MSAN=ON -override CTEST_ARGS += --exclude-regex BruteForceTest.sanity_rinsert_1280 +ifneq ($(SAN),) -else ifeq ($(SAN),address) +ifeq ($(SAN),address) +override SAN=address +export ASAN_OPTIONS=detect_odr_violation=0:allocator_may_return_null=1 CMAKE_SAN=-DUSE_ASAN=ON -else ifeq ($(SAN),leak) -else ifeq ($(SAN),thread) else -$(error SAN=mem|addr|leak|thread) +$(error SAN=address is currently the only supported option) endif export SAN -endif # SAN +endif # SAN != '' ROOT=. export ROOT @@ -60,8 +43,8 @@ make build DEBUG=1 # build debug variant COV=1 # build for code coverage VERBOSE=1 # print detailed build info - VG|VALGRIND=1 # build for Valgrind - SAN=type # build with LLVM sanitizer (type=address|memory|leak|thread) + ASAN=1 # build with AddressSanitizer (clang) + SAN=type # build with LLVM sanitizer (type=address) SLOW=1 # don't run build in parallel (for diagnostics) PROFILE=1 # enable profiling compile flags (and debug symbols) for release type. make pybind # build Python bindings @@ -70,9 +53,9 @@ make clean # remove binary files make unit_test # run unit tests CTEST_ARGS=args # extra CTest arguments - VG|VALGRIND=1 # run tests with valgrind + ASAN=1 # run tests with AddressSanitizer FP_64=1 # run tests with 64-bit floating point -make valgrind # build for Valgrind and run tests +make asan # build with AddressSanitizer and run tests make flow_test # run flow tests (with pytest) TEST=file::name # run specific test VERBOSE=1 # print detailed bindings build info @@ -96,6 +79,11 @@ FLAVOR=debug else FLAVOR=release endif + +ifeq ($(ASAN),1) +FLAVOR := ${FLAVOR}-asan +endif + FULL_VARIANT:=$(shell uname)-$(shell uname -m)-$(FLAVOR) BINROOT=$(ROOT)/bin/$(FULL_VARIANT) BINDIR=$(BINROOT) @@ -173,12 +161,7 @@ ifeq ($(VERBOSE),1) _CTEST_ARGS += -V endif -ifeq ($(VALGRIND),1) -_CTEST_ARGS += \ - -T memcheck \ - --overwrite MemoryCheckCommandOptions="--leak-check=full --fair-sched=yes --error-exitcode=255" -CMAKE_FLAGS += -DUSE_VALGRIND=ON -endif +# AddressSanitizer is handled via SAN=address in cmake/san.cmake unit_test: $(SHOW)mkdir -p $(BINDIR) @@ -186,10 +169,10 @@ unit_test: @make --no-print-directory -C $(BINDIR) $(MAKE_J) $(SHOW)cd $(TESTDIR) && GTEST_COLOR=1 ctest $(_CTEST_ARGS) -valgrind: - $(SHOW)$(MAKE) VG=1 unit_test +asan: + $(SHOW)$(MAKE) ASAN=1 unit_test -.PHONY: unit_test valgrind +.PHONY: unit_test asan #---------------------------------------------------------------------------------------------- ifeq ($(VERBOSE),1) diff --git a/cmake/san.cmake b/cmake/san.cmake index 4e4aa049b..31887ebcb 100644 --- a/cmake/san.cmake +++ b/cmake/san.cmake @@ -7,7 +7,7 @@ if (USE_ASAN OR USE_MSAN) set(CMAKE_LINKER "${CMAKE_C_COMPILER}") if (USE_ASAN) - set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=address") + set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=address -fsized-deallocation") elseif (USE_MSAN) set(CLANG_SAN_FLAGS "-fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins=2") diff --git a/cmake/svs.cmake b/cmake/svs.cmake index fe10970db..33ec76190 100644 --- a/cmake/svs.cmake +++ b/cmake/svs.cmake @@ -28,13 +28,6 @@ if(USE_SVS) if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "(x86_64)|(AMD64|amd64)") set(SVS_LVQ_SUPPORTED 1) - # Valgrind does not support AVX512 and Valgrind in running in Debug - # so disable it if we are in Debug mode - string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) - if(uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") - message(STATUS "SVS: Disabling AVX512 support in Debug mode due to Valgrind") - set(SVS_NO_AVX512 ON) - endif() else() set(SVS_LVQ_SUPPORTED 0) message(STATUS "SVS LVQ is not supported on this architecture") diff --git a/src/VecSim/vec_sim_index.h b/src/VecSim/vec_sim_index.h index 439f820d6..b39e2dcba 100644 --- a/src/VecSim/vec_sim_index.h +++ b/src/VecSim/vec_sim_index.h @@ -29,7 +29,7 @@ * @param allocator The allocator to use for the index. * @param dim The dimension of the vectors in the index. * @param vecType The type of the vectors in the index. - * @param dataSize The size of stored vectors in bytes. + * @param dataSize The size of stored vectors (possibly after pre-processing) in bytes. * @param metric The metric to use in the index. * @param blockSize The block size to use in the index. * @param multi Determines if the index should multi-index or not. @@ -68,18 +68,20 @@ struct IndexComponents { template struct VecSimIndexAbstract : public VecSimIndexInterface { protected: - size_t dim; // Vector's dimension. - VecSimType vecType; // Datatype to index. - size_t dataSize; // Vector size in bytes - VecSimMetric metric; // Distance metric to use in the index. - size_t blockSize; // Index's vector block size (determines by how many vectors to resize when - // resizing) + size_t dim; // Vector's dimension. + VecSimType vecType; // Datatype to index. + VecSimMetric metric; // Distance metric to use in the index. + size_t inputBlobSize; // The size of the vector input blob in bytes. + size_t blockSize; // Index's vector block size (determines by how many vectors to resize when + // resizing) IndexCalculatorInterface *indexCalculator; // Distance calculator. - PreprocessorsContainerAbstract *preprocessors; // Stroage and query preprocessors. + PreprocessorsContainerAbstract *preprocessors; // Storage and query preprocessors. mutable VecSearchMode lastMode; // The last search mode in RediSearch (used for debug/testing). bool isMulti; // Determines if the index should multi-index or not. void *logCallbackCtx; // Context for the log callback. + size_t dataSize; // Vector element data size in bytes to be stored + // (possibly after pre-processing and different from inputBlobSize). RawDataContainer *vectors; // The raw vectors data container. /** @@ -105,10 +107,11 @@ struct VecSimIndexAbstract : public VecSimIndexInterface { VecSimIndexAbstract(const AbstractIndexInitParams ¶ms, const IndexComponents &components) : VecSimIndexInterface(params.allocator), dim(params.dim), vecType(params.vecType), - dataSize(params.dataSize), metric(params.metric), + metric(params.metric), inputBlobSize(this->dim * sizeof(DataType)), blockSize(params.blockSize ? params.blockSize : DEFAULT_BLOCK_SIZE), indexCalculator(components.indexCalculator), preprocessors(components.preprocessors), - lastMode(EMPTY_MODE), isMulti(params.multi), logCallbackCtx(params.logCtx) { + lastMode(EMPTY_MODE), isMulti(params.multi), logCallbackCtx(params.logCtx), + dataSize(params.dataSize) { assert(VecSimType_sizeof(vecType)); assert(dataSize); this->vectors = new (this->allocator) DataBlocksContainer( @@ -323,23 +326,26 @@ struct VecSimIndexAbstract : public VecSimIndexInterface { template ProcessedBlobs VecSimIndexAbstract::preprocess(const void *blob) const { - return this->preprocessors->preprocess(blob, this->dataSize); + return this->preprocessors->preprocess(blob, inputBlobSize); } template MemoryUtils::unique_blob VecSimIndexAbstract::preprocessQuery(const void *queryBlob, bool force_copy) const { - return this->preprocessors->preprocessQuery(queryBlob, this->dataSize, force_copy); + // force_copy indicates that we copy a processed blob (e.g., for batch iterator) - hence we're + // currently using the dataSize (post-processed size) as the effective input size. + const auto effective_input_size = force_copy ? dataSize : inputBlobSize; + return this->preprocessors->preprocessQuery(queryBlob, effective_input_size, force_copy); } template MemoryUtils::unique_blob VecSimIndexAbstract::preprocessForStorage(const void *original_blob) const { - return this->preprocessors->preprocessForStorage(original_blob, this->dataSize); + return this->preprocessors->preprocessForStorage(original_blob, inputBlobSize); } template void VecSimIndexAbstract::preprocessStorageInPlace(void *blob) const { - this->preprocessors->preprocessStorageInPlace(blob, this->dataSize); + this->preprocessors->preprocessStorageInPlace(blob, inputBlobSize); } diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 7f3deda17..410a18193 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -28,12 +28,6 @@ if(FP64_TESTS) add_definitions(-DFP64_TESTS) endif() -option(USE_VALGRIND "Building for Valgrind" OFF) -if(USE_VALGRIND) - add_definitions(-DRUNNING_ON_VALGRIND) - message(STATUS "Building with RUNNING_ON_VALGRIND") -endif() - if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)|(ARM64)|(armv8)|(armv9)") include(${root}/cmake/aarch64InstructionFlags.cmake) else() diff --git a/tests/unit/test_index_test_utils.cpp b/tests/unit/test_index_test_utils.cpp index ce1cefba9..04c4a0c93 100644 --- a/tests/unit/test_index_test_utils.cpp +++ b/tests/unit/test_index_test_utils.cpp @@ -87,7 +87,7 @@ class Int8IndexTestUtilsTest : public IndexTestUtilsTest { std::vector> vectors; void GenerateRandomAndAddVector(size_t label, size_t id) override { std::vector v(dim); - test_utils::populate_int8_vec(v.data(), dim, id); + test_utils::populate_int8_vec(v.data(), dim, static_cast(id)); VecSimIndex_AddVector(index, v.data(), label); vectors.emplace_back(v); @@ -154,8 +154,8 @@ class Float32IndexTestUtilsTest : public IndexTestUtilsTest { TEST_P(Int8IndexTestUtilsTest, BF) { BFParams params = {.type = VecSimType_INT8, .dim = dim}; + sleep(10); SetUp(params); - EXPECT_NO_FATAL_FAILURE(get_stored_vector_data_single_test()); VecSimMetric metric = std::get<1>(GetParam()); if (metric == VecSimMetric_Cosine) { @@ -189,7 +189,6 @@ INSTANTIATE_TEST_SUITE_P(Int8IndexTestUtilsTest, Int8IndexTestUtilsTest, TEST_P(Float32IndexTestUtilsTest, BF) { BFParams params = {.type = VecSimType_FLOAT32, .dim = dim}; SetUp(params); - EXPECT_NO_FATAL_FAILURE(get_stored_vector_data_single_test()); VecSimMetric metric = std::get<1>(GetParam()); } @@ -215,11 +214,11 @@ INSTANTIATE_TEST_SUITE_P( }); void IndexTestUtilsTest::get_stored_vector_data_single_test() { - size_t n = this->labels_count * this->vec_per_label; + size_t n = IndexTestUtilsTest::labels_count * this->vec_per_label; // Add vectors to the index int id = 0; - for (size_t i = 0; i < this->labels_count; i++) { + for (size_t i = 0; i < IndexTestUtilsTest::labels_count; i++) { for (size_t j = 0; j < vec_per_label; j++) { this->GenerateRandomAndAddVector(i, id++); } diff --git a/tests/unit/test_int8.cpp b/tests/unit/test_int8.cpp index 7fa66db38..c79077cf7 100644 --- a/tests/unit/test_int8.cpp +++ b/tests/unit/test_int8.cpp @@ -297,10 +297,7 @@ TEST_F(INT8BruteForceTest, elementSizeEstimation) { TEST_F(INT8TieredTest, elementSizeEstimation) { size_t M = 64; HNSWParams hnsw_params = {.dim = 4, .M = M}; - VecSimParams vecsim_hnsw_params = CreateParams(hnsw_params); - TieredIndexParams tiered_params = - test_utils::CreateTieredParams(vecsim_hnsw_params, this->mock_thread_pool); - EXPECT_NO_FATAL_FAILURE(element_size_test(tiered_params)); + EXPECT_NO_FATAL_FAILURE(element_size_test(hnsw_params)); } /* ---------------------------- Functionality tests ---------------------------- */ diff --git a/tests/unit/test_uint8.cpp b/tests/unit/test_uint8.cpp index 345c49207..7efb21c3b 100644 --- a/tests/unit/test_uint8.cpp +++ b/tests/unit/test_uint8.cpp @@ -296,10 +296,7 @@ TEST_F(UINT8BruteForceTest, elementSizeEstimation) { TEST_F(UINT8TieredTest, elementSizeEstimation) { size_t M = 64; HNSWParams hnsw_params = {.dim = 4, .M = M}; - VecSimParams vecsim_hnsw_params = CreateParams(hnsw_params); - TieredIndexParams tiered_params = - test_utils::CreateTieredParams(vecsim_hnsw_params, this->mock_thread_pool); - EXPECT_NO_FATAL_FAILURE(element_size_test(tiered_params)); + EXPECT_NO_FATAL_FAILURE(element_size_test(hnsw_params)); } /* ---------------------------- Functionality tests ---------------------------- */