diff --git a/Jenkinsfile b/Jenkinsfile index 2f88ab18e..3f79d729f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -59,7 +59,7 @@ pipeline { bat """ call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Auxiliary\\Build\\vcvars64.bat" if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL% - call "winbuild-clang.bat" -DCMAKE_BUILD_TYPE=Release -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=False -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENABLE_TESTER4=False -DSLEEF_DISABLE_SSL=False + call "winbuild-clang.bat" -DCMAKE_BUILD_TYPE=Release -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=True -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENABLE_TESTER4=False -DSLEEF_DISABLE_SSL=True if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL% ctest -j 4 --output-on-failure exit /b %ERRORLEVEL% @@ -76,7 +76,7 @@ pipeline { bat """ call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Auxiliary\\Build\\vcvars64.bat" if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL% - call "winbuild-msvc.bat" -DCMAKE_BUILD_TYPE=Release -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=True -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENABLE_TESTER4=True -DSLEEF_DISABLE_SSL=True + call "winbuild-msvc.bat" -DCMAKE_BUILD_TYPE=Release -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=True -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENABLE_TESTER4=True if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL% ctest -j 4 --output-on-failure exit /b %ERRORLEVEL% @@ -189,6 +189,7 @@ pipeline { cmake -E time ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE + export LD_LIBRARY_PATH=/usr/powerpc64le-linux-gnu/lib ctest -j `nproc` ninja install ''' diff --git a/src/common/qtesterutil.h b/src/common/qtesterutil.h index d74e5c1a0..495a22d85 100644 --- a/src/common/qtesterutil.h +++ b/src/common/qtesterutil.h @@ -6,6 +6,10 @@ #include "quaddef.h" #include "testerutil.h" +#ifdef __cplusplus +extern "C" { +#endif + typedef struct { #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) uint64_t h, l; @@ -51,3 +55,21 @@ Sleef_quad cast_q_str(const char *s); Sleef_quad cast_q_str_hex(const char *s); Sleef_quad add_q_d(Sleef_quad q, double d); #endif + +#ifdef __cplusplus +} + +static tlfloat_quad rndf128_(Sleef_quad min, Sleef_quad max, int setSignRandomly) { + return std::bit_cast(rndf128(min, max, setSignRandomly)); +} + +#if !defined(TLFLOAT_COMPILER_SUPPORTS_FLOAT128) && !defined(TLFLOAT_LONGDOUBLE_IS_FLOAT128) +static Sleef_quad rndf128(tlfloat_quad min, tlfloat_quad max, int setSignRandomly) { + return rndf128(std::bit_cast(min), std::bit_cast(max), setSignRandomly); +} + +static tlfloat_quad rndf128_(tlfloat_quad min, tlfloat_quad max, int setSignRandomly) { + return std::bit_cast(rndf128(std::bit_cast(min), std::bit_cast(max), setSignRandomly)); +} +#endif +#endif diff --git a/src/common/quaddef.h b/src/common/quaddef.h index 0161c3a66..0fc21124a 100644 --- a/src/common/quaddef.h +++ b/src/common/quaddef.h @@ -9,7 +9,7 @@ #define SLEEF_FLOAT128_IS_IEEEQP #endif -#if !defined(SLEEF_FLOAT128_IS_IEEEQP) && defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && (defined(__aarch64__) || defined(__zarch__)) +#if !defined(SLEEF_FLOAT128_IS_IEEEQP) && defined(__SIZEOF_LONG_DOUBLE__) && __SIZEOF_LONG_DOUBLE__ == 16 && (defined(__aarch64__) || defined(__zarch__) || defined(__riscv)) #define SLEEF_LONGDOUBLE_IS_IEEEQP #endif @@ -81,7 +81,7 @@ SLEEFSHARPif !defined(SLEEFXXX__NVCC__) && ((defined(SLEEFXXX__SIZEOF_FLOAT128__ SLEEFSHARPdefine SLEEFXXXSLEEF_FLOAT128_IS_IEEEQP SLEEFSHARPendif -SLEEFSHARPif !defined(SLEEFXXXSLEEF_FLOAT128_IS_IEEEQP) && !defined(SLEEFXXX__NVCC__) && defined(SLEEFXXX__SIZEOF_LONG_DOUBLE__) && SLEEFXXX__SIZEOF_LONG_DOUBLE__ == 16 && (defined(SLEEFXXX__aarch64__) || defined(SLEEFXXX__zarch__)) +SLEEFSHARPif !defined(SLEEFXXXSLEEF_FLOAT128_IS_IEEEQP) && !defined(SLEEFXXX__NVCC__) && defined(SLEEFXXX__SIZEOF_LONG_DOUBLE__) && SLEEFXXX__SIZEOF_LONG_DOUBLE__ == 16 && (defined(SLEEFXXX__aarch64__) || defined(SLEEFXXX__zarch__) || defined(SLEEFXXX__riscv)) SLEEFSHARPdefine SLEEFXXXSLEEF_LONGDOUBLE_IS_IEEEQP SLEEFSHARPendif diff --git a/src/common/testerutil.h b/src/common/testerutil.h index ff2aadbe7..152cf30a7 100644 --- a/src/common/testerutil.h +++ b/src/common/testerutil.h @@ -3,6 +3,8 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) +#include + #ifdef __cplusplus #include using namespace tlfloat; @@ -11,6 +13,7 @@ using namespace tlfloat; #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic ignored "-Wuninitialized" #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#pragma GCC diagnostic ignored "-Wattributes" #endif #if defined(__clang__) @@ -61,39 +64,27 @@ void memrand(void *p, int size); // The following functions are meant to be inlined static double u2d(uint64_t u) { - union { - double f; - uint64_t i; - } tmp; - tmp.i = u; - return tmp.f; + double d = 0; + memcpy(&d, &u, sizeof(d)); + return d; } static uint64_t d2u(double d) { - union { - double f; - uint64_t i; - } tmp; - tmp.f = d; - return tmp.i; + uint64_t u = 0; + memcpy(&u, &d, sizeof(u)); + return u; } static float u2f(uint32_t u) { - union { - float f; - uint32_t i; - } tmp; - tmp.i = u; - return tmp.f; + float f = 0; + memcpy(&f, &u, sizeof(f)); + return f; } static uint32_t f2u(float d) { - union { - float f; - uint32_t i; - } tmp; - tmp.f = d; - return tmp.i; + uint32_t u = 0; + memcpy(&u, &d, sizeof(u)); + return u; } static int startsWith(char *str, char *prefix) { diff --git a/src/libm-tester/CMakeLists.txt b/src/libm-tester/CMakeLists.txt index c6c0a741b..2deeb6451 100644 --- a/src/libm-tester/CMakeLists.txt +++ b/src/libm-tester/CMakeLists.txt @@ -246,6 +246,7 @@ macro(test_extension SIMD) list(APPEND IUT_LIST ${IUTINAME}) endif(SLEEF_BUILD_INLINE_HEADERS AND SED_COMMAND) endif(SLEEF_ENABLE_TESTER) + # if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) @@ -257,7 +258,7 @@ macro(test_extension SIMD) target_compile_options(${TARGET_TESTER4_${SIMD}} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${TARGET_TESTER4_${SIMD}} - PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} TLFLOAT_ENABLE_INLINING=1) + PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) target_link_libraries(${TARGET_TESTER4_${SIMD}} ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ}) if (FORCE_AAVPCS) target_compile_definitions(${TARGET_TESTER4_${SIMD}} PRIVATE ENABLE_AAVPCS=1) @@ -281,7 +282,7 @@ macro(test_extension SIMD) target_compile_options(${TARGET_TESTER4Y_${SIMD}} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${TARGET_TESTER4Y_${SIMD}} - PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} DETERMINISTIC=1 TLFLOAT_ENABLE_INLINING=1) + PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} DETERMINISTIC=1) target_link_libraries(${TARGET_TESTER4Y_${SIMD}} ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ}) add_dependencies(${TARGET_TESTER4Y_${SIMD}} ${TARGET_HEADERS}) add_dependencies(${TARGET_TESTER4Y_${SIMD}} ${TARGET_LIBSLEEF}) @@ -310,7 +311,6 @@ macro(test_extension SIMD) USE_INLINE_HEADER="sleefinline_${LCSIMD}.h" MACRO_ONLY_HEADER="macroonly${SIMD}.h" SIMD_SUFFIX=_${LCSIMD}_sleef - TLFLOAT_ENABLE_INLINING=1 ) target_include_directories(${TARGET_TESTER4I_${SIMD}} PRIVATE ${PROJECT_BINARY_DIR}/include) add_dependencies(${TARGET_TESTER4I_${SIMD}} ${TARGET_INLINE_HEADERS}) @@ -511,7 +511,7 @@ if (SLEEF_ARCH_X86) # tester4dsp128 add_executable(tester4dsp128 ${TESTER4_SRC}) target_compile_definitions(tester4dsp128 PRIVATE - ENABLE_DSP128=1 ${COMMON_TARGET_DEFINITIONS} TLFLOAT_ENABLE_INLINING=1) + ENABLE_DSP128=1 ${COMMON_TARGET_DEFINITIONS}) target_compile_options(tester4dsp128 PRIVATE ${FLAGS_ENABLE_SSE2}) target_link_libraries(tester4dsp128 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ}) add_dependencies(tester4dsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ext_tlfloat) @@ -520,7 +520,7 @@ if (SLEEF_ARCH_X86) # tester4dsp256 add_executable(tester4dsp256 ${TESTER4_SRC}) target_compile_definitions(tester4dsp256 PRIVATE - ENABLE_DSP256=1 ${COMMON_TARGET_DEFINITIONS} TLFLOAT_ENABLE_INLINING=1) + ENABLE_DSP256=1 ${COMMON_TARGET_DEFINITIONS}) target_compile_options(tester4dsp256 PRIVATE ${FLAGS_ENABLE_AVX}) target_link_libraries(tester4dsp256 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ}) add_dependencies(tester4dsp256 ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ext_tlfloat) @@ -538,6 +538,15 @@ if (SLEEF_ARCH_PPC64) add_test_iut(iutdsp128 1.0) list(APPEND IUT_LIST iutdsp128) endif(SLEEF_ENABLE_TESTER) + + if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) + add_executable(tester4dsp128 ${TESTER4_SRC}) + target_compile_definitions(tester4dsp128 PRIVATE ENABLE_DSPPOWER_128=1 ${COMMON_TARGET_DEFINITIONS}) + target_compile_options(tester4dsp128 PRIVATE ${FLAGS_ENABLE_VSX}) + target_link_libraries(tester4dsp128 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ}) + add_dependencies(tester4dsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ext_tlfloat) + add_test_with_emu(1.0 tester4dsp128) + endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) endif(SLEEF_ARCH_PPC64) if (SLEEF_ARCH_S390X) @@ -550,6 +559,15 @@ if (SLEEF_ARCH_S390X) add_test_iut(iutdsp128 1.0) list(APPEND IUT_LIST iutdsp128) endif(SLEEF_ENABLE_TESTER) + + if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) + add_executable(tester4dsp128 ${TESTER4_SRC}) + target_compile_definitions(tester4dsp128 PRIVATE ENABLE_DSPS390X_128=1 ${COMMON_TARGET_DEFINITIONS}) + target_compile_options(tester4dsp128 PRIVATE ${FLAGS_ENABLE_VXE}) + target_link_libraries(tester4dsp128 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ}) + add_dependencies(tester4dsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ext_tlfloat) + add_test_with_emu(1.0 tester4dsp128) + endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) endif(SLEEF_ARCH_S390X) if(SLEEF_BUILD_SCALAR_LIB) diff --git a/src/quad-tester/CMakeLists.txt b/src/quad-tester/CMakeLists.txt index f4f289bd6..84c675f96 100644 --- a/src/quad-tester/CMakeLists.txt +++ b/src/quad-tester/CMakeLists.txt @@ -20,6 +20,7 @@ else() endif() set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${SLEEF_C_FLAGS} ${FLAGS_NOSTRICTALIASING}") +set(CMAKE_CXX_FLAGS "${ORG_CMAKE_CXX_FLAGS} ${SLEEF_C_FLAGS} ${FLAGS_NOSTRICTALIASING}") if(COMPILER_SUPPORTS_FLOAT128) list(APPEND COMMON_TARGET_DEFINITIONS ENABLEFLOAT128=1) @@ -94,25 +95,52 @@ set(IUT_SRC qiutsimd.c ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c) macro(test_extension SIMD) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} LCSIMD) - string(CONCAT TARGET_IUT${SIMD} "qiut" ${LCSIMD}) - - add_executable(${TARGET_IUT${SIMD}} ${IUT_SRC}) - target_compile_options(${TARGET_IUT${SIMD}} - PRIVATE ${FLAGS_ENABLE_${SIMD}}) - target_compile_definitions(${TARGET_IUT${SIMD}} - PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) - target_link_libraries(${TARGET_IUT${SIMD}} sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) - - add_dependencies(${TARGET_IUT${SIMD}} sleefquad_headers ${TARGET_HEADERS}) - add_dependencies(${TARGET_IUT${SIMD}} sleefquad ${TARGET_LIBSLEEF}) - set_target_properties(${TARGET_IUT${SIMD}} PROPERTIES C_STANDARD 99) - if (DEFINED COSTOVERRIDE_${SIMD}) - math(EXPR C "${COSTOVERRIDE_${SIMD}} + 1") - add_test_iut(${TARGET_IUT${SIMD}} ${C}) - else() - add_test_iut(${TARGET_IUT${SIMD}} 0.5) - endif() - list(APPEND IUT_LIST ${TARGET_IUT${SIMD}}) + + if (SLEEF_ENABLE_TESTER) + string(CONCAT TARGET_IUT${SIMD} "qiut" ${LCSIMD}) + + add_executable(${TARGET_IUT${SIMD}} ${IUT_SRC}) + target_compile_options(${TARGET_IUT${SIMD}} + PRIVATE ${FLAGS_ENABLE_${SIMD}}) + target_compile_definitions(${TARGET_IUT${SIMD}} + PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(${TARGET_IUT${SIMD}} sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) + + add_dependencies(${TARGET_IUT${SIMD}} sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(${TARGET_IUT${SIMD}} sleefquad ${TARGET_LIBSLEEF}) + set_target_properties(${TARGET_IUT${SIMD}} PROPERTIES C_STANDARD 99) + if (DEFINED COSTOVERRIDE_${SIMD}) + math(EXPR C "${COSTOVERRIDE_${SIMD}} + 1") + add_test_iut(${TARGET_IUT${SIMD}} ${C}) + else() + add_test_iut(${TARGET_IUT${SIMD}} 0.5) + endif() + list(APPEND IUT_LIST ${TARGET_IUT${SIMD}}) + endif(SLEEF_ENABLE_TESTER) + + # + + if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) + set(TESTER4_SRC qtester4simd.cpp ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c) + string(CONCAT TARGET_TESTER4_${SIMD} "qtester4" ${LCSIMD}) + + add_executable(${TARGET_TESTER4_${SIMD}} ${TESTER4_SRC}) + target_compile_options(${TARGET_TESTER4_${SIMD}} + PRIVATE ${FLAGS_ENABLE_${SIMD}}) + target_compile_definitions(${TARGET_TESTER4_${SIMD}} + PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${TLFLOAT_LIBRARIES} ${LIBM}) + + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF}) + add_dependencies(${TARGET_TESTER4_${SIMD}} ext_tlfloat) + set_target_properties(${TARGET_TESTER4_${SIMD}} PROPERTIES C_STANDARD 99) + if (DEFINED COSTOVERRIDE_${SIMD}) + add_test_with_emu(${COSTOVERRIDE_${SIMD}} ${TARGET_TESTER4_${SIMD}}) + else() + add_test_with_emu(1.0 ${TARGET_TESTER4_${SIMD}}) + endif() + endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) # The iut programs whose names begin with "qiuti" are the iut for the # inline version of quad functions. @@ -120,31 +148,65 @@ macro(test_extension SIMD) if (SLEEF_BUILD_INLINE_HEADERS AND SED_COMMAND) if (MSVC AND NOT SLEEF_CLANG_ON_WINDOWS) message(STATUS "Quad inline headers are not tested with MSVC") - else() - string(CONCAT IUTINAME "qiuti" ${LCSIMD}) - add_executable(${IUTINAME} ${IUT_SRC}) - target_compile_options(${IUTINAME} - PRIVATE ${FLAGS_ENABLE_${SIMD}}) - target_compile_definitions(${IUTINAME} - PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} - USE_INLINE_HEADER="sleefquadinline_${LCSIMD}.h" - MACRO_ONLY_HEADER="qmacroonly${SIMD}.h" - SIMD_SUFFIX=_${LCSIMD}_sleefq + else(MSVC AND NOT SLEEF_CLANG_ON_WINDOWS) + if (SLEEF_ENABLE_TESTER) + string(CONCAT IUTINAME "qiuti" ${LCSIMD}) + add_executable(${IUTINAME} ${IUT_SRC}) + target_compile_options(${IUTINAME} + PRIVATE ${FLAGS_ENABLE_${SIMD}}) + target_compile_definitions(${IUTINAME} + PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} + USE_INLINE_HEADER="sleefquadinline_${LCSIMD}.h" + MACRO_ONLY_HEADER="qmacroonly${SIMD}.h" + SIMD_SUFFIX=_${LCSIMD}_sleefq ) - target_include_directories(${IUTINAME} PRIVATE ${PROJECT_BINARY_DIR}/include) - target_link_libraries(${IUTINAME} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) - add_dependencies(${IUTINAME} ${TARGET_QINLINE_HEADERS}) - set_target_properties(${IUTINAME} PROPERTIES C_STANDARD 99) - if (DEFINED COSTOVERRIDE_${SIMD}) - math(EXPR C "${COSTOVERRIDE_${SIMD}} + 1") - add_test_iut(${IUTINAME} ${C}) - else() - add_test_iut(${IUTINAME} 0.5) - endif() - list(APPEND IUT_LIST ${IUTINAME}) - endif() + target_include_directories(${IUTINAME} PRIVATE ${PROJECT_BINARY_DIR}/include) + target_link_libraries(${IUTINAME} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) + add_dependencies(${IUTINAME} ${TARGET_QINLINE_HEADERS}) + set_target_properties(${IUTINAME} PROPERTIES C_STANDARD 99) + if (DEFINED COSTOVERRIDE_${SIMD}) + math(EXPR C "${COSTOVERRIDE_${SIMD}} + 1") + add_test_iut(${IUTINAME} ${C}) + else() + add_test_iut(${IUTINAME} 0.5) + endif() + list(APPEND IUT_LIST ${IUTINAME}) + endif(SLEEF_ENABLE_TESTER) + + # + + if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) + string(CONCAT TARGET_TESTER4I_${SIMD} "qtester4i" ${LCSIMD}) + add_executable(${TARGET_TESTER4I_${SIMD}} ${TESTER4_SRC}) + target_compile_options(${TARGET_TESTER4I_${SIMD}} + PRIVATE ${FLAGS_ENABLE_${SIMD}}) + target_compile_definitions(${TARGET_TESTER4I_${SIMD}} + PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(${TARGET_TESTER4I_${SIMD}} sleefquad ${TARGET_LIBSLEEF} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${TLFLOAT_LIBRARIES}) + if(CMAKE_C_COMPILER_ID MATCHES "GNU") + target_compile_options(${TARGET_TESTER4I_${SIMD}} PRIVATE "-Wno-unknown-pragmas") + endif() + target_compile_definitions(${TARGET_TESTER4I_${SIMD}} + PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} + USE_INLINE_HEADER="sleefquadinline_${LCSIMD}.h" + MACRO_ONLY_HEADER="qmacroonly${SIMD}.h" + SIMD_SUFFIX=_${LCSIMD}_sleefq + ) + target_include_directories(${TARGET_TESTER4I_${SIMD}} PRIVATE ${PROJECT_BINARY_DIR}/include) + add_dependencies(${TARGET_TESTER4I_${SIMD}} sleefquad sleefquad_headers ${TARGET_QINLINE_HEADERS} ext_tlfloat) + #set_target_properties(${TARGET_TESTER4I_${SIMD}} PROPERTIES ${COMMON_TARGET_PROPERTIES}) + if (DEFINED COSTOVERRIDE_${SIMD}) + add_test_with_emu(${COSTOVERRIDE_${SIMD}} ${TARGET_TESTER4I_${SIMD}}) + else() + add_test_with_emu(1.0 ${TARGET_TESTER4I_${SIMD}}) + endif() + endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) + + endif(MSVC AND NOT SLEEF_CLANG_ON_WINDOWS) endif(SLEEF_BUILD_INLINE_HEADERS AND SED_COMMAND) + # + if(LIB_MPFR AND NOT MINGW) # Build qtester2 SIMD string(TOLOWER ${SIMD} SIMDLC) @@ -168,63 +230,180 @@ endforeach() # Compile executable 'qiutdspscalar' -add_executable(qiutdspscalar ${IUT_SRC}) -target_compile_definitions(qiutdspscalar PRIVATE ENABLE_DSPSCALAR=1 ${COMMON_TARGET_DEFINITIONS}) -target_link_libraries(qiutdspscalar sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) -set_target_properties(qiutdspscalar PROPERTIES C_STANDARD 99) -add_dependencies(qiutdspscalar sleefquad_headers ${TARGET_HEADERS}) -add_dependencies(qiutdspscalar sleefquad ${TARGET_LIBSLEEF}) -add_test_iut(qiutdspscalar 0.5) -list(APPEND IUT_LIST qiutdspscalar) +if (SLEEF_ENABLE_TESTER) + add_executable(qiutdspscalar ${IUT_SRC}) + target_compile_definitions(qiutdspscalar PRIVATE ENABLE_DSPSCALAR=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(qiutdspscalar sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) + set_target_properties(qiutdspscalar PROPERTIES C_STANDARD 99) + add_dependencies(qiutdspscalar sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(qiutdspscalar sleefquad ${TARGET_LIBSLEEF}) + add_test_iut(qiutdspscalar 0.5) + list(APPEND IUT_LIST qiutdspscalar) +endif(SLEEF_ENABLE_TESTER) + +if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) + # Compile executable 'qtester4dspscalar' + set(TESTER4_SRC qtester4simd.cpp ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c) + set(SIMD "DSPSCALAR") + set(LCSIMD "dspscalar") + string(CONCAT TARGET_TESTER4_${SIMD} "qtester4" ${LCSIMD}) + + add_executable(${TARGET_TESTER4_${SIMD}} ${TESTER4_SRC}) + target_compile_definitions(${TARGET_TESTER4_${SIMD}} PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${TLFLOAT_LIBRARIES} ${LIBM}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF}) + add_dependencies(${TARGET_TESTER4_${SIMD}} ext_tlfloat) + set_target_properties(${TARGET_TESTER4_${SIMD}} PROPERTIES C_STANDARD 99) + if (DEFINED COSTOVERRIDE_${SIMD}) + add_test_with_emu(${COSTOVERRIDE_${SIMD}} ${TARGET_TESTER4_${SIMD}}) + else() + add_test_with_emu(1.0 ${TARGET_TESTER4_${SIMD}}) + endif() +endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) if (SLEEF_ARCH_X86) - # Compile executable 'qiutdspx2' - add_executable(qiutdspx2 ${IUT_SRC}) - target_compile_definitions(qiutdspx2 PRIVATE ENABLE_DSPX2_X86=1 ${COMMON_TARGET_DEFINITIONS}) - target_link_libraries(qiutdspx2 sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) - set_target_properties(qiutdspx2 PROPERTIES C_STANDARD 99) - add_dependencies(qiutdspx2 sleefquad_headers ${TARGET_HEADERS}) - add_dependencies(qiutdspx2 sleefquad ${TARGET_LIBSLEEF}) - add_test_iut(qiutdspx2 0.5) - list(APPEND IUT_LIST qiutdspx2) + if (SLEEF_ENABLE_TESTER) + # Compile executable 'qiutdspx2' + add_executable(qiutdspx2 ${IUT_SRC}) + target_compile_definitions(qiutdspx2 PRIVATE ENABLE_DSPX2_X86=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(qiutdspx2 sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) + set_target_properties(qiutdspx2 PROPERTIES C_STANDARD 99) + add_dependencies(qiutdspx2 sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(qiutdspx2 sleefquad ${TARGET_LIBSLEEF}) + add_test_iut(qiutdspx2 0.5) + list(APPEND IUT_LIST qiutdspx2) + endif(SLEEF_ENABLE_TESTER) + + if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) + # Compile executable 'qtester4dspx2' + set(TESTER4_SRC qtester4simd.cpp ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c) + set(SIMD "DSPX2") + set(LCSIMD "dspx2") + string(CONCAT TARGET_TESTER4_${SIMD} "qtester4" ${LCSIMD}) + + add_executable(${TARGET_TESTER4_${SIMD}} ${TESTER4_SRC}) + target_compile_definitions(${TARGET_TESTER4_${SIMD}} PRIVATE ENABLE_DSPX2_X86=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${TLFLOAT_LIBRARIES} ${LIBM}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF}) + add_dependencies(${TARGET_TESTER4_${SIMD}} ext_tlfloat) + set_target_properties(${TARGET_TESTER4_${SIMD}} PROPERTIES C_STANDARD 99) + if (DEFINED COSTOVERRIDE_${SIMD}) + add_test_with_emu(${COSTOVERRIDE_${SIMD}} ${TARGET_TESTER4_${SIMD}}) + else() + add_test_with_emu(1.0 ${TARGET_TESTER4_${SIMD}}) + endif() + endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) endif() if (SLEEF_ARCH_AARCH64) - # Compile executable 'qiutdspx2' - add_executable(qiutdspx2 ${IUT_SRC}) - target_compile_definitions(qiutdspx2 PRIVATE ENABLE_DSPX2_AARCH64=1 ${COMMON_TARGET_DEFINITIONS}) - set_target_properties(qiutdspx2 PROPERTIES C_STANDARD 99) - target_link_libraries(qiutdspx2 sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) - add_dependencies(qiutdspx2 sleefquad_headers ${TARGET_HEADERS}) - add_dependencies(qiutdspx2 sleefquad ${TARGET_LIBSLEEF}) - add_test_iut(qiutdspx2 0.5) - list(APPEND IUT_LIST qiutdspx2) + if (SLEEF_ENABLE_TESTER) + # Compile executable 'qiutdspx2' + add_executable(qiutdspx2 ${IUT_SRC}) + target_compile_definitions(qiutdspx2 PRIVATE ENABLE_DSPX2_AARCH64=1 ${COMMON_TARGET_DEFINITIONS}) + set_target_properties(qiutdspx2 PROPERTIES C_STANDARD 99) + target_link_libraries(qiutdspx2 sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) + add_dependencies(qiutdspx2 sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(qiutdspx2 sleefquad ${TARGET_LIBSLEEF}) + add_test_iut(qiutdspx2 0.5) + list(APPEND IUT_LIST qiutdspx2) + endif(SLEEF_ENABLE_TESTER) + + if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) + # Compile executable 'qtester4dspx2' + set(TESTER4_SRC qtester4simd.cpp ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c) + set(SIMD "DSPX2") + set(LCSIMD "dspx2") + string(CONCAT TARGET_TESTER4_${SIMD} "qtester4" ${LCSIMD}) + + add_executable(${TARGET_TESTER4_${SIMD}} ${TESTER4_SRC}) + target_compile_definitions(${TARGET_TESTER4_${SIMD}} PRIVATE ENABLE_DSPX2_AARCH64=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${TLFLOAT_LIBRARIES} ${LIBM}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF}) + add_dependencies(${TARGET_TESTER4_${SIMD}} ext_tlfloat) + set_target_properties(${TARGET_TESTER4_${SIMD}} PROPERTIES C_STANDARD 99) + if (DEFINED COSTOVERRIDE_${SIMD}) + add_test_with_emu(${COSTOVERRIDE_${SIMD}} ${TARGET_TESTER4_${SIMD}}) + else() + add_test_with_emu(1.0 ${TARGET_TESTER4_${SIMD}}) + endif() + endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) endif() if (SLEEF_ARCH_PPC64) - # Compile executable 'qiutdspx2' - add_executable(qiutdspx2 ${IUT_SRC}) - target_compile_options(qiutdspx2 PRIVATE ${FLAGS_ENABLE_VSX}) - set_target_properties(qiutdspx2 PROPERTIES C_STANDARD 99) - target_compile_definitions(qiutdspx2 PRIVATE ENABLE_DSPX2_PPC64=1 ${COMMON_TARGET_DEFINITIONS}) - target_link_libraries(qiutdspx2 sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) - add_dependencies(qiutdspx2 sleefquad_headers ${TARGET_HEADERS}) - add_dependencies(qiutdspx2 sleefquad ${TARGET_LIBSLEEF}) - add_test_iut(qiutdspx2 0.5) - list(APPEND IUT_LIST qiutdspx2) + if (SLEEF_ENABLE_TESTER) + # Compile executable 'qiutdspx2' + add_executable(qiutdspx2 ${IUT_SRC}) + target_compile_options(qiutdspx2 PRIVATE ${FLAGS_ENABLE_VSX}) + set_target_properties(qiutdspx2 PROPERTIES C_STANDARD 99) + target_compile_definitions(qiutdspx2 PRIVATE ENABLE_DSPX2_PPC64=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(qiutdspx2 sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) + add_dependencies(qiutdspx2 sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(qiutdspx2 sleefquad ${TARGET_LIBSLEEF}) + add_test_iut(qiutdspx2 0.5) + list(APPEND IUT_LIST qiutdspx2) + endif(SLEEF_ENABLE_TESTER) + + if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) + # Compile executable 'qtester4dspx2' + set(TESTER4_SRC qtester4simd.cpp ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c) + set(SIMD "DSPX2") + set(LCSIMD "dspx2") + string(CONCAT TARGET_TESTER4_${SIMD} "qtester4" ${LCSIMD}) + + add_executable(${TARGET_TESTER4_${SIMD}} ${TESTER4_SRC}) + target_compile_options(${TARGET_TESTER4_${SIMD}} PRIVATE ${FLAGS_ENABLE_VSX}) + target_compile_definitions(${TARGET_TESTER4_${SIMD}} PRIVATE ENABLE_DSPX2_PPC64=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${TLFLOAT_LIBRARIES} ${LIBM}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF}) + add_dependencies(${TARGET_TESTER4_${SIMD}} ext_tlfloat) + set_target_properties(${TARGET_TESTER4_${SIMD}} PROPERTIES C_STANDARD 99) + if (DEFINED COSTOVERRIDE_${SIMD}) + add_test_with_emu(${COSTOVERRIDE_${SIMD}} ${TARGET_TESTER4_${SIMD}}) + else() + add_test_with_emu(1.0 ${TARGET_TESTER4_${SIMD}}) + endif() + endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) endif() if (SLEEF_ARCH_S390X) - # Compile executable 'qiutdspx2' - add_executable(qiutdspx2 ${IUT_SRC}) - target_compile_options(qiutdspx2 PRIVATE ${FLAGS_ENABLE_VXE}) - set_target_properties(qiutdspx2 PROPERTIES C_STANDARD 99) - target_compile_definitions(qiutdspx2 PRIVATE ENABLE_DSPX2_S390X=1 ${COMMON_TARGET_DEFINITIONS}) - target_link_libraries(qiutdspx2 sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) - add_dependencies(qiutdspx2 sleefquad_headers ${TARGET_HEADERS}) - add_dependencies(qiutdspx2 sleefquad ${TARGET_LIBSLEEF}) - add_test_iut(qiutdspx2 0.5) - list(APPEND IUT_LIST qiutdspx2) + if (SLEEF_ENABLE_TESTER) + # Compile executable 'qiutdspx2' + add_executable(qiutdspx2 ${IUT_SRC}) + target_compile_options(qiutdspx2 PRIVATE ${FLAGS_ENABLE_VXE}) + set_target_properties(qiutdspx2 PROPERTIES C_STANDARD 99) + target_compile_definitions(qiutdspx2 PRIVATE ENABLE_DSPX2_S390X=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(qiutdspx2 sleefquad ${TARGET_LIBSLEEF} ${LIBRT} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${LIBQUADMATH} ${LIBM}) + add_dependencies(qiutdspx2 sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(qiutdspx2 sleefquad ${TARGET_LIBSLEEF}) + add_test_iut(qiutdspx2 0.5) + list(APPEND IUT_LIST qiutdspx2) + endif(SLEEF_ENABLE_TESTER) + + if (SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) + # Compile executable 'qtester4dspx2' + set(TESTER4_SRC qtester4simd.cpp ${sleef_SOURCE_DIR}/src/common/main_checkfeature.c) + set(SIMD "DSPX2") + set(LCSIMD "dspx2") + string(CONCAT TARGET_TESTER4_${SIMD} "qtester4" ${LCSIMD}) + + add_executable(${TARGET_TESTER4_${SIMD}} ${TESTER4_SRC}) + target_compile_options(${TARGET_TESTER4_${SIMD}} PRIVATE ${FLAGS_ENABLE_VXE}) + target_compile_definitions(${TARGET_TESTER4_${SIMD}} PRIVATE ENABLE_DSPX2_S390X=1 ${COMMON_TARGET_DEFINITIONS}) + target_link_libraries(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF} ${TARGET_QTESTERUTIL_OBJ} ${TARGET_TESTERUTIL_OBJ} ${TLFLOAT_LIBRARIES} ${LIBM}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad_headers ${TARGET_HEADERS}) + add_dependencies(${TARGET_TESTER4_${SIMD}} sleefquad ${TARGET_LIBSLEEF}) + add_dependencies(${TARGET_TESTER4_${SIMD}} ext_tlfloat) + set_target_properties(${TARGET_TESTER4_${SIMD}} PROPERTIES C_STANDARD 99) + if (DEFINED COSTOVERRIDE_${SIMD}) + add_test_with_emu(${COSTOVERRIDE_${SIMD}} ${TARGET_TESTER4_${SIMD}}) + else() + add_test_with_emu(1.0 ${TARGET_TESTER4_${SIMD}}) + endif() + endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES) endif() # Compile executable 'qiutcuda' diff --git a/src/quad-tester/qtester.c b/src/quad-tester/qtester.c index ce32db91a..2bad98ba9 100644 --- a/src/quad-tester/qtester.c +++ b/src/quad-tester/qtester.c @@ -903,9 +903,11 @@ void do_test(int options) { testComparisonOuterLoop(mpfr_equal_p, child_icmpeqq, stdCheckVals); checkResult(success, -1); +#if 0 fprintf(stderr, "icmpne : "); testComparisonOuterLoop(mpfr_lessgreater_p, child_icmpneq, stdCheckVals); checkResult(success, -1); +#endif fprintf(stderr, "icmpq : "); testComparisonOuterLoop(mpfr_cmp, child_icmpq, stdCheckVals); diff --git a/src/quad-tester/qtester4simd.cpp b/src/quad-tester/qtester4simd.cpp new file mode 100644 index 000000000..6c6f99f31 --- /dev/null +++ b/src/quad-tester/qtester4simd.cpp @@ -0,0 +1,1323 @@ +// Copyright Naoki Shibata and contributors 2010 - 2024. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "quaddef.h" +#include "misc.h" +#include "qtesterutil.h" + +using namespace std; + +// + +#if !defined(USE_INLINE_HEADER) +#include "sleef.h" +#include "sleefquad.h" +#else // #if !defined(USE_INLINE_HEADER) +#include +#include +#include +#include + +#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) +#ifndef FP_FAST_FMA +#define FP_FAST_FMA +#endif +#endif + +#if defined(_MSC_VER) && !defined(__STDC__) +#define __STDC__ 1 +#endif + +#if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) +#include +#endif + +#if (defined(_MSC_VER)) +#include +#endif + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#include +#endif + +#if defined(__ARM_FEATURE_SVE) +#include +#endif + +#if defined(__riscv) && defined(__riscv_v) +#include +#endif + +#if defined(__VSX__) +#include +#endif + +#if defined(__VX__) +#include +#endif + +#define SLEEF_ALWAYS_INLINE inline +#define SLEEF_INLINE +#define SLEEF_CONST +#include USE_INLINE_HEADER +#include MACRO_ONLY_HEADER + +#ifndef ENABLE_PUREC_SCALAR +#include "sleefquadinline_purec_scalar.h" +#endif + +#endif // #if !defined(USE_INLINE_HEADER) + +// + +#ifdef ENABLE_PUREC_SCALAR +#include "qrenamepurec_scalar.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 1 +#include "helperpurec_scalar.h" +#define VARGQUAD Sleef_quad +#endif +#endif + +#ifdef ENABLE_PURECFMA_SCALAR +#include "qrenamepurecfma_scalar.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 2 +#include "helperpurec_scalar.h" +#define VARGQUAD Sleef_quad +#endif +#endif + +#ifdef ENABLE_DSPSCALAR +#include "qrenamedspscalar.h" +#define CONFIG 1 +#include "helperpurec_scalar.h" +#define VARGQUAD Sleef_quad +#endif + +#ifdef ENABLE_SSE2 +#include "qrenamesse2.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 2 +#include "helpersse2.h" +#define VARGQUAD Sleef_quadx2 +#endif +#endif + +#ifdef ENABLE_AVX2128 +#include "qrenameavx2128.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 1 +#include "helperavx2_128.h" +#define VARGQUAD Sleef_quadx2 +#endif +#endif + +#ifdef ENABLE_DSPX2_X86 +#include "qrenamedspx2.h" +#define CONFIG 2 +#include "helpersse2.h" +#define VARGQUAD Sleef_quadx2 +#endif + +#ifdef ENABLE_AVX2 +#include "qrenameavx2.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 1 +#include "helperavx2.h" +#define VARGQUAD Sleef_quadx4 +#endif +#endif + +#ifdef ENABLE_AVX512F +#include "qrenameavx512f.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 1 +#include "helperavx512f.h" +#define VARGQUAD Sleef_quadx8 +#endif +#endif + +#ifdef ENABLE_ADVSIMD +#include "qrenameadvsimd.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 1 +#include "helperadvsimd.h" +#define VARGQUAD Sleef_quadx2 +#endif +#endif + +#ifdef ENABLE_DSPX2_AARCH64 +#include "qrenamedspx2.h" +#define CONFIG 2 +#include "helperadvsimd.h" +#define VARGQUAD Sleef_quadx2 +#endif + +#ifdef ENABLE_SVE +#include "qrenamesve.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 1 +#include "helpersve.h" +#define VARGQUAD Sleef_svquad +#endif +#define SIZEOF_VARGQUAD (svcntd()*8) +#endif + +#ifdef ENABLE_VSX +#include "qrenamevsx.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 1 +#include "helperpower_128.h" +#define VARGQUAD Sleef_quadx2 +#endif +#endif + +#ifdef ENABLE_VSX3 +#include "qrenamevsx3.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 3 +#include "helperpower_128.h" +#define VARGQUAD Sleef_quadx2 +#endif +#endif + +#ifdef ENABLE_DSPX2_PPC64 +#include "qrenamedspx2.h" +#define CONFIG 1 +#include "helperpower_128.h" +#define VARGQUAD Sleef_quadx2 +#endif + +#ifdef ENABLE_VXE +#include "qrenamevxe.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 140 +#include "helpers390x_128.h" +#define VARGQUAD Sleef_quadx2 +#endif +#endif + +#ifdef ENABLE_VXE2 +#include "qrenamevxe2.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 150 +#include "helpers390x_128.h" +#define VARGQUAD Sleef_quadx2 +#endif +#endif + +#ifdef ENABLE_DSPX2_S390X +#include "qrenamedspx2.h" +#define CONFIG 140 +#include "helpers390x_128.h" +#define VARGQUAD Sleef_quadx2 +#endif + +#ifdef ENABLE_RVVM1 +#include "qrenamervvm1.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 1 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#define VARGQUAD Sleef_rvvm1quad +#endif +#define SIZEOF_VARGQUAD (__riscv_vsetvlmax_e64m1()*8) +#endif + +#ifdef ENABLE_RVVM2 +#include "qrenamervvm2.h" +#if !defined(USE_INLINE_HEADER) +#define CONFIG 1 +#define ENABLE_RVV_DP +#include "helperrvv.h" +#define VARGQUAD Sleef_rvvm2quad +#endif +#define SIZEOF_VARGQUAD (__riscv_vsetvlmax_e64m2()*8) +#endif + + +#ifndef VARGQUAD +#define VARGQUAD vargquad +#endif + +#ifndef SIZEOF_VARGQUAD +#define SIZEOF_VARGQUAD sizeof(VARGQUAD) +#endif + +#ifdef USE_INLINE_HEADER +#ifdef vopmask +#undef vopmask +#endif + +#define CONCAT_SIMD_SUFFIX_(keyword, suffix) keyword ## suffix +#define CONCAT_SIMD_SUFFIX(keyword, suffix) CONCAT_SIMD_SUFFIX_(keyword, suffix) +#define vmask CONCAT_SIMD_SUFFIX(vmask, SIMD_SUFFIX) +#define vopmask CONCAT_SIMD_SUFFIX(vopmask, SIMD_SUFFIX) +#define vdouble CONCAT_SIMD_SUFFIX(vdouble, SIMD_SUFFIX) +#define vargquad CONCAT_SIMD_SUFFIX(vargquad, SIMD_SUFFIX) +#define vint CONCAT_SIMD_SUFFIX(vint, SIMD_SUFFIX) +#define vint2 CONCAT_SIMD_SUFFIX(vint2, SIMD_SUFFIX) +#define vdouble2 CONCAT_SIMD_SUFFIX(vdouble2, SIMD_SUFFIX) +#define vd2getx_vd_vd2 CONCAT_SIMD_SUFFIX(vd2getx_vd_vd2, SIMD_SUFFIX) +#define vd2gety_vd_vd2 CONCAT_SIMD_SUFFIX(vd2gety_vd_vd2, SIMD_SUFFIX) +#define vloadu_vd_p CONCAT_SIMD_SUFFIX(vloadu_vd_p, SIMD_SUFFIX) +#define vstoreu_v_p_vd CONCAT_SIMD_SUFFIX(vstoreu_v_p_vd, SIMD_SUFFIX) +#define vloadu_vi_p CONCAT_SIMD_SUFFIX(vloadu_vi_p, SIMD_SUFFIX) +#define vstoreu_v_p_vi CONCAT_SIMD_SUFFIX(vstoreu_v_p_vi, SIMD_SUFFIX) +#define vreinterpret_vm_vu64 CONCAT_SIMD_SUFFIX(vreinterpret_vm_vu64, SIMD_SUFFIX) +#define vreinterpret_vu64_vm CONCAT_SIMD_SUFFIX(vreinterpret_vu64_vm, SIMD_SUFFIX) +#define vreinterpret_vm_vi64 CONCAT_SIMD_SUFFIX(vreinterpret_vm_vi64, SIMD_SUFFIX) +#define vreinterpret_vi64_vm CONCAT_SIMD_SUFFIX(vreinterpret_vi64_vm, SIMD_SUFFIX) +#define vreinterpret_vm_vd CONCAT_SIMD_SUFFIX(vreinterpret_vm_vd, SIMD_SUFFIX) +#define vreinterpret_vd_vm CONCAT_SIMD_SUFFIX(vreinterpret_vd_vm, SIMD_SUFFIX) +#endif + +// + +extern "C" { + int check_feature(double d, float f) { + double s[VECTLENDP]; + for(int i=0;i(xgetq(aq, idx)); } + +#if !defined(TLFLOAT_COMPILER_SUPPORTS_FLOAT128) && !defined(TLFLOAT_LONGDOUBLE_IS_FLOAT128) +static VARGQUAD xsetq(VARGQUAD aq, int idx, tlfloat_quad q) { return xsetq(aq, idx, bit_cast(q)); } +#endif + +static bool check_q_q(const char *msg, VARGQUAD (*vfunc)(VARGQUAD), tlfloat_octuple (*tlfunc)(const tlfloat_octuple), + const tlfloat_quad *a0, size_t z, double tol, bool checkSignedZero) { + VARGQUAD v0; + for(size_t i=0;i(t, c, SLEEF_QUAD_MANT_DIG, + (tlfloat_quad)TLFLOAT_FLT128_DENORM_MIN, (tlfloat_quad)TLFLOAT_FLT128_MAX, checkSignedZero); + // tlfloat_printf("t = %.35Og, c = %.35Og, ulp = %g\n", t, c, u); + if (u > maxULP) maxULP = u; + if (u > tol) { + tlfloat_printf("%s : arg = %Qa (%.35Qg), ulp = %g, t = %.35Og, c = %.35Og\n", msg, a0[i], a0[i], u, t, c); + return false; + } + } + return true; +} + +static bool check_q_q_(const char *msg, VARGQUAD (*vfunc)(VARGQUAD), tlfloat_octuple_ (*tlfunc)(const tlfloat_octuple_), + const tlfloat_quad *a0, size_t z, double tol, bool checkSignedZero) { + VARGQUAD v0; + for(size_t i=0;i(t, c, SLEEF_QUAD_MANT_DIG, + (tlfloat_quad)TLFLOAT_FLT128_DENORM_MIN, (tlfloat_quad)TLFLOAT_FLT128_MAX, checkSignedZero); + // tlfloat_printf("t = %.35Og, c = %.35Og, ulp = %g\n", t, c, u); + if (u > maxULP) maxULP = u; + if (u > tol) { + tlfloat_printf("%s : arg = %Qa (%.35Qg), ulp = %g, t = %.35Og, c = %.35Og\n", msg, a0[i], a0[i], u, t, c); + return false; + } + } + return true; +} + +static bool check_q_q(const char *msg, VARGQUAD (*vfunc)(VARGQUAD), tlfloat_octuple (*tlfunc)(const tlfloat_octuple), + const char *minStr, const char *maxStr, bool sign, int nLoop, uint64_t seed, double tol, bool checkSignedZero) { + xsrand(seed); + tlfloat_quad min = tlfloat_strtoq(minStr, nullptr), max = tlfloat_strtoq(maxStr, nullptr); + VARGQUAD v0; + for(int i=0;i(t, c, SLEEF_QUAD_MANT_DIG, + (tlfloat_quad)TLFLOAT_FLT128_DENORM_MIN, (tlfloat_quad)TLFLOAT_FLT128_MAX, checkSignedZero); + // tlfloat_printf("t = %.35Og, c = %.35Og, ulp = %g\n", t, c, u); + if (u > maxULP) maxULP = u; + if (u > tol) { + tlfloat_printf("%s : arg = %Qa (%.35Qg), ulp = %g, t = %.35Og, c = %.35Og\n", msg, x, x, u, t, c); + return false; + } + } + return true; +} + +static bool check_q_q_(const char *msg, VARGQUAD (*vfunc)(VARGQUAD), tlfloat_octuple_ (*tlfunc)(const tlfloat_octuple_), + const char *minStr, const char *maxStr, bool sign, int nLoop, uint64_t seed, double tol, bool checkSignedZero) { + xsrand(seed); + tlfloat_quad min = tlfloat_strtoq(minStr, nullptr), max = tlfloat_strtoq(maxStr, nullptr); + VARGQUAD v0; + for(int i=0;i(t, c, SLEEF_QUAD_MANT_DIG, + (tlfloat_quad)TLFLOAT_FLT128_DENORM_MIN, (tlfloat_quad)TLFLOAT_FLT128_MAX, checkSignedZero); + // tlfloat_printf("t = %.35Og, c = %.35Og, ulp = %g\n", t, c, u); + if (u > maxULP) maxULP = u; + if (u > tol) { + tlfloat_printf("%s : arg = %Qa (%.35Qg), ulp = %g, t = %.35Og, c = %.35Og\n", msg, x, x, u, t, c); + return false; + } + } + return true; +} + +static bool check_q_q_q(const char *msg, VARGQUAD (*vfunc)(VARGQUAD, VARGQUAD), + tlfloat_octuple (*tlfunc)(const tlfloat_octuple, const tlfloat_octuple), + const tlfloat_quad *a, size_t z, double tol, bool checkSignedZero) { + VARGQUAD v0, v1; + for(size_t i=0;i(t, c, SLEEF_QUAD_MANT_DIG, + (tlfloat_quad)TLFLOAT_FLT128_DENORM_MIN, (tlfloat_quad)TLFLOAT_FLT128_MAX, checkSignedZero); + //tlfloat_printf("t = %.35Og, c = %.35Og, ulp = %g\n", t, c, u); + if (u > maxULP) maxULP = u; + if (u > tol) { + tlfloat_printf("%s : arg0 = %Qa (%.35Qg), arg1 = %Qa (%.35Qg), ulp = %g, t = %Oa (%.35Og), c = %Oa (%.35Og)\n", msg, a[i], a[i], a[j], a[j], u, t, t, c, c); + tlfloat_printf("c = %Qa (%.35Qg)\n", (tlfloat_quad)c, (tlfloat_quad)c); + return false; + } + } + } + return true; +} + +static bool check_q_q_q_(const char *msg, VARGQUAD (*vfunc)(VARGQUAD, VARGQUAD), + tlfloat_octuple_ (*tlfunc)(const tlfloat_octuple_, const tlfloat_octuple_), + const tlfloat_quad *a, size_t z, double tol, bool checkSignedZero) { + VARGQUAD v0, v1; + for(size_t i=0;i(t, c, SLEEF_QUAD_MANT_DIG, + (tlfloat_quad)TLFLOAT_FLT128_DENORM_MIN, (tlfloat_quad)TLFLOAT_FLT128_MAX, checkSignedZero); + //tlfloat_printf("t = %.35Og, c = %.35Og, ulp = %g\n", t, c, u); + if (u > maxULP) maxULP = u; + if (u > tol) { + tlfloat_printf("%s : arg0 = %Qa (%.35Qg), arg1 = %Qa (%.35Qg), ulp = %g, t = %Oa (%.35Og), c = %Oa (%.35Og)\n", msg, a[i], a[i], a[j], a[j], u, t, t, c, c); + return false; + } + } + } + return true; +} + +static bool check_q_q_q(const char *msg, VARGQUAD (*vfunc)(VARGQUAD, VARGQUAD), + tlfloat_octuple (*tlfunc)(const tlfloat_octuple, const tlfloat_octuple), + const char *minStr, const char *maxStr, bool sign, int nLoop, uint64_t seed, double tol, bool checkSignedZero) { + xsrand(seed); + tlfloat_quad min = tlfloat_strtoq(minStr, nullptr), max = tlfloat_strtoq(maxStr, nullptr); + VARGQUAD v0, v1; + for(int i=0;i(t, c, SLEEF_QUAD_MANT_DIG, + (tlfloat_quad)TLFLOAT_FLT128_DENORM_MIN, (tlfloat_quad)TLFLOAT_FLT128_MAX, checkSignedZero); + //tlfloat_printf("t = %.35Og, c = %.35Og, ulp = %g\n", t, c, u); + if (u > maxULP) maxULP = u; + if (u > tol) { + tlfloat_printf("%s : arg0 = %Qa (%.35Qg), arg1 = %Qa (%.35Qg), ulp = %g, t = %Oa (%.35Og), c = %Oa (%.35Og)\n", msg, x, x, y, y, u, t, t, c, c); + return false; + } + } + return true; +} + +static bool check_q_q_q_(const char *msg, VARGQUAD (*vfunc)(VARGQUAD, VARGQUAD), + tlfloat_octuple_ (*tlfunc)(const tlfloat_octuple_, const tlfloat_octuple_), + const char *minStr, const char *maxStr, bool sign, int nLoop, uint64_t seed, double tol, bool checkSignedZero) { + xsrand(seed); + tlfloat_quad min = tlfloat_strtoq(minStr, nullptr), max = tlfloat_strtoq(maxStr, nullptr); + VARGQUAD v0, v1; + for(int i=0;i(t, c, SLEEF_QUAD_MANT_DIG, + (tlfloat_quad)TLFLOAT_FLT128_DENORM_MIN, (tlfloat_quad)TLFLOAT_FLT128_MAX, checkSignedZero); + //tlfloat_printf("t = %.35Og, c = %.35Og, ulp = %g\n", t, c, u); + if (u > maxULP) maxULP = u; + if (u > tol) { + tlfloat_printf("%s : arg0 = %Qa (%.35Qg), arg1 = %Qa (%.35Qg), ulp = %g, t = %Oa (%.35Og), c = %Oa (%.35Og)\n", msg, x, x, y, y, u, t, t, c, c); + return false; + } + } + return true; +} + +static bool check_q_q_q_q_(const char *msg, VARGQUAD (*vfunc)(VARGQUAD, VARGQUAD, VARGQUAD), + tlfloat_octuple_ (*tlfunc)(const tlfloat_octuple_, const tlfloat_octuple_, const tlfloat_octuple_), + const tlfloat_quad *a, size_t z, double tol, bool checkSignedZero) { + VARGQUAD v0, v1, v2; + for(size_t i=0;i(t, c, SLEEF_QUAD_MANT_DIG, + (tlfloat_quad)TLFLOAT_FLT128_DENORM_MIN, (tlfloat_quad)TLFLOAT_FLT128_MAX, checkSignedZero); + //tlfloat_printf("t = %.35Og, c = %.35Og, ulp = %g\n", t, c, u); + if (u > maxULP) maxULP = u; + if (u > tol) { + tlfloat_printf("%s : arg0 = %Qa (%.35Qg), arg1 = %Qa (%.35Qg), arg2 = %Qa (%.35Qg), ulp = %g, t = %Oa (%.35Og), c = %Oa (%.35Og)\n", msg, a[i], a[i], a[j], a[j], a[k], a[k], u, t, t, c, c); + return false; + } + } + } + } + return true; +} + +static bool check_q_q_q_q_(const char *msg, VARGQUAD (*vfunc)(VARGQUAD, VARGQUAD, VARGQUAD), + tlfloat_octuple_ (*tlfunc)(const tlfloat_octuple_, const tlfloat_octuple_, const tlfloat_octuple_), + const char *minStr, const char *maxStr, bool sign, int nLoop, uint64_t seed, double tol, bool checkSignedZero) { + xsrand(seed); + tlfloat_quad min = tlfloat_strtoq(minStr, nullptr), max = tlfloat_strtoq(maxStr, nullptr); + VARGQUAD v0, v1, v2; + for(int i=0;i(t, c, SLEEF_QUAD_MANT_DIG, + (tlfloat_quad)TLFLOAT_FLT128_DENORM_MIN, (tlfloat_quad)TLFLOAT_FLT128_MAX, checkSignedZero); + //tlfloat_printf("t = %.35Og, c = %.35Og, ulp = %g\n", t, c, u); + if (u > maxULP) maxULP = u; + if (u > tol) { + tlfloat_printf("%s : arg0 = %Qa (%.35Qg), arg1 = %Qa (%.35Qg), arg1 = %Qa (%.35Qg), ulp = %g, t = %Oa (%.35Og), c = %Oa (%.35Og)\n", msg, x, x, y, y, z, z, u, t, t, c, c); + return false; + } + } + return true; +} + +static bool check_i_q_q_(const char *msg, vint (*vfunc)(VARGQUAD, VARGQUAD), int (*tlfunc)(const tlfloat_octuple_, const tlfloat_octuple_), + const tlfloat_quad *a, size_t z) { + VARGQUAD v0, v1; + for(size_t i=0;i(t, c, nbmant, flmin, flmax, true); + if (tulp != culp) { + cout << "NG" << endl; + tlfloat_printf("t = %Oa %.35Og\n", t, t); + tlfloat_printf("c = %Oa %.35Og\n", c, c); + printf("tulp = %g\n", tulp); + printf("culp = %g\n", culp); + exit(-1); + } +} + +void showULP(bool success) { + printf("%s (%g ulp)\n", success ? "OK" : "NG", maxULP); + maxULP = 0; +} + +// + +extern "C" { + int main2(int argc, char **argv); +} + +int main2(int argc, char **argv) { + bool success = true; + const int NTEST = 1000; + + // Tests if counting ulp numbers is correct + + check(+0.0, +0.0, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 0); + check(-0.0, +0.0, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 10002); + check(+0.0, -0.0, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 10002); + check(-0.0, -0.0, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 0); + + check(+1.0, +1.0, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 0); + check(tlfloat_nextafterq(+1.0, +INFINITY), +1.0, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 1.0); + check(tlfloat_nextafterq(+1.0, -INFINITY), +1.0, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 0.5); + + check(-1.0, -1.0, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 0); + check(tlfloat_nextafterq(-1.0, +INFINITY), -1.0, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 0.5); + check(tlfloat_nextafterq(-1.0, -INFINITY), -1.0, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 1.0); + + check(INFINITY, INFINITY, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 0); + check(tlfloat_nextafterq(INFINITY, 0), INFINITY, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, INFINITY); + check(INFINITY, tlfloat_nextafterq(INFINITY, 0), SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 1.0); + + check(-INFINITY, -INFINITY, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 0); + check(tlfloat_nextafterq(-INFINITY, 0), -INFINITY, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, INFINITY); + check(-INFINITY, tlfloat_nextafterq(-INFINITY, 0), SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 1.0); + + check(TLFLOAT_FLT128_MIN, TLFLOAT_FLT128_MIN, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 0); + check(tlfloat_nextafterq(TLFLOAT_FLT128_MIN, 0.0), TLFLOAT_FLT128_MIN, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 1.0); + check(tlfloat_nextafterq(TLFLOAT_FLT128_MIN, 1.0), TLFLOAT_FLT128_MIN, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 1.0); + + check(-(tlfloat_quad)TLFLOAT_FLT128_MIN, -(tlfloat_quad)TLFLOAT_FLT128_MIN, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 0); + check(tlfloat_nextafterq(-(tlfloat_quad)TLFLOAT_FLT128_MIN, 0.0), -(tlfloat_quad)TLFLOAT_FLT128_MIN, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 1.0); + check(tlfloat_nextafterq(-(tlfloat_quad)TLFLOAT_FLT128_MIN, 1.0), -(tlfloat_quad)TLFLOAT_FLT128_MIN, SLEEF_QUAD_MANT_DIG, TLFLOAT_FLT128_DENORM_MIN, TLFLOAT_FLT128_MAX, 1.0); + + // + +#if !defined(ENABLE_PUREC_SCALAR) && !defined(ENABLE_PURECFMA_SCALAR) && !defined(ENABLE_DSPSCALAR) + // Do simple testing on splat, select and sleef_q + { + VARGQUAD v0 = xsplatq(sleef_q(+0x1921fb54442d1LL, 0x8469898cc51701b8ULL, 1)); + VARGQUAD v1 = xsplatq(sleef_q(+0x0000000000000LL, 0x0000000000000000ULL, 0)); + v1 = xsetq(v1, 1, sleef_q(+0x15bf0a8b14576LL, 0x95355fb8ac404e7aULL, 1)); + v1 = xmulq_u05(v0, v1); + + vint vi = xicmpeqq(v1, xsplatq(sleef_q(+0x1114580b45d47LL, 0x49e6108579a2d0caULL, 3))); + int t[VECTLENDP*2]; + memset(t, 0, sizeof(t)); + vstoreu_v_p_vi(t, vi); + + if (!(t[0] == 0 && t[1] == 1)) { + fprintf(stderr, "Testing on splat and select failed\n"); + exit(-1); + } + } +#endif + +#if defined(SLEEF_QUAD_C) + { + VARGQUAD v0 = xsplatq(SLEEF_QUAD_C(3.141592653589793238462643383279502884)); + VARGQUAD v1 = xsplatq(sleef_q(+0x1921fb54442d1LL, 0x8469898cc51701b8ULL, 1)); + if (Sleef_icmpneq1_purec(xgetq(v0, 0), xgetq(v1, 0))) { + fprintf(stderr, "Testing on SLEEF_QUAD_C failed\n"); + exit(-1); + } + } +#elif defined(ENABLE_PUREC_SCALAR) +#pragma message ("SLEEF_QUAD_C not defined") +#endif + + { + VARGQUAD v0 = xsplatq(SLEEF_M_PIq); + VARGQUAD v1 = xsplatq(bit_cast(tlfloat_strtoq("2.718281828459045235360287471352662498", NULL))); + Sleef_quad q = xgetq(xmulq_u05(v0, v1), 0); + if (Sleef_icmpneq1_purec(q, bit_cast(tlfloat_strtoq("8.539734222673567065463550869546573820", NULL)))) { + tlfloat_printf("Testing with xgetq failed : %.35Qg\n", q); + exit(-1); + } + } + + // + +#define STR_QUAD_MIN "3.36210314311209350626267781732175260e-4932" +#define STR_QUAD_MAX "1.18973149535723176508575932662800702e+4932" +#define STR_QUAD_DENORM_MIN "6.475175119438025110924438958227646552e-4966" + + static const char *stdCheckValsStr[] = { + "-0.0", "0.0", "+0.25", "-0.25", "+0.5", "-0.5", "+0.75", "-0.75", "+1.0", "-1.0", + "+1.25", "-1.25", "+1.5", "-1.5", "+2.0", "-2.0", "+2.5", "-2.5", "+3.0", "-3.0", + "+4.0", "-4.0", "+5.0", "-5.0", "+6.0", "-6.0", "+7.0", "-7.0", + "1.234", "-1.234", "+1.234e+100", "-1.234e+100", "+1.234e-100", "-1.234e-100", + "+1.234e+3000", "-1.234e+3000", "+1.234e-3000", "-1.234e-3000", + "3.1415926535897932384626433832795028841971693993751058209749445923078164", + "+" STR_QUAD_MIN, "-" STR_QUAD_MIN, + "+" STR_QUAD_DENORM_MIN, "-" STR_QUAD_DENORM_MIN, + "Inf", "-Inf", "NaN" + }; + +#if 0 + static const char *noNegZeroCheckValsStr[] = { + "0.0", "+0.25", "-0.25", "+0.5", "-0.5", "+0.75", "-0.75", "+1.0", "-1.0", + "+1.25", "-1.25", "+1.5", "-1.5", "+2.0", "-2.0", "+2.5", "-2.5", "+3.0", "-3.0", + "+4.0", "-4.0", "+5.0", "-5.0", "+6.0", "-6.0", "+7.0", "-7.0", + "1.234", "-1.234", "+1.234e+100", "-1.234e+100", "+1.234e-100", "-1.234e-100", + "+1.234e+3000", "-1.234e+3000", "+1.234e-3000", "-1.234e-3000", + "3.1415926535897932384626433832795028841971693993751058209749445923078164", + "+" STR_QUAD_MIN, "-" STR_QUAD_MIN, + "+" STR_QUAD_DENORM_MIN, "-" STR_QUAD_DENORM_MIN, + "Inf", "-Inf", "NaN" + }; + + static const char *noNanCheckValsStr[] = { + "-0.0", "0.0", "+0.25", "-0.25", "+0.5", "-0.5", "+0.75", "-0.75", "+1.0", "-1.0", + "+1.25", "-1.25", "+1.5", "-1.5", "+2.0", "-2.0", "+2.5", "-2.5", "+3.0", "-3.0", + "+4.0", "-4.0", "+5.0", "-5.0", "+6.0", "-6.0", "+7.0", "-7.0", + "1.234", "-1.234", "+1.234e+100", "-1.234e+100", "+1.234e-100", "-1.234e-100", + "+1.234e+3000", "-1.234e+3000", "+1.234e-3000", "-1.234e-3000", + "3.1415926535897932384626433832795028841971693993751058209749445923078164", + "+" STR_QUAD_MIN, "-" STR_QUAD_MIN, + "+" STR_QUAD_DENORM_MIN, "-" STR_QUAD_DENORM_MIN, + "Inf", "-Inf" + }; +#endif + + static const char *noInfCheckValsStr[] = { + "-0.0", "0.0", "+0.25", "-0.25", "+0.5", "-0.5", "+0.75", "-0.75", "+1.0", "-1.0", + "+1.25", "-1.25", "+1.5", "-1.5", "+2.0", "-2.0", "+2.5", "-2.5", "+3.0", "-3.0", + "+4.0", "-4.0", "+5.0", "-5.0", "+6.0", "-6.0", "+7.0", "-7.0", + "1.234", "-1.234", "+1.234e+100", "-1.234e+100", "+1.234e-100", "-1.234e-100", + "+1.234e+3000", "-1.234e+3000", "+1.234e-3000", "-1.234e-3000", + "3.1415926535897932384626433832795028841971693993751058209749445923078164", + "+" STR_QUAD_MIN, "-" STR_QUAD_MIN, + "+" STR_QUAD_DENORM_MIN, "-" STR_QUAD_DENORM_MIN, + "NaN" + }; + +#if 0 + static const char *finiteCheckValsStr[] = { + "-0.0", "0.0", "+0.25", "-0.25", "+0.5", "-0.5", "+0.75", "-0.75", "+1.0", "-1.0", + "+1.25", "-1.25", "+1.5", "-1.5", "+2.0", "-2.0", "+2.5", "-2.5", "+3.0", "-3.0", + "+4.0", "-4.0", "+5.0", "-5.0", "+6.0", "-6.0", "+7.0", "-7.0", + "1.234", "-1.234", "+1.234e+100", "-1.234e+100", "+1.234e-100", "-1.234e-100", + "+1.234e+3000", "-1.234e+3000", "+1.234e-3000", "-1.234e-3000", + "3.1415926535897932384626433832795028841971693993751058209749445923078164", + "+" STR_QUAD_MIN, "-" STR_QUAD_MIN, + "+" STR_QUAD_DENORM_MIN, "-" STR_QUAD_DENORM_MIN, + }; +#endif + + static const char *trigCheckValsStr[] = { + "3.141592653589793238462643383279502884197169399375105820974944592307", + "6.283185307179586476925286766559005768394338798750211641949889184615", + "25.13274122871834590770114706623602307357735519500084656779955673846", + "402.1238596594935345232183530597763691772376831200135450847929078154", + "102943.7080728303448379438983833027505093728468787234675417069844007", + "6746518852.261009479299491324448129057382258893044021168813308929687", + "28976077832308491369.53730422794043954984410931622923280838485698255", + "534514292032483373929840186580935391650.3203828374578833308216124114", + "1.8188578844588316214011747138886493132669668866419621497938607555896e+77" + "3.141592653589793238462643383279502884197169399375105820974944592307e+1000", + "3.141592653589793238462643383279502884197169399375105820974944592307e+2000", + }; + + static const char *bigIntCheckValsStr[] = { + "+5192296858534827628530496329220094.0", + "+5192296858534827628530496329220094.25", + "+5192296858534827628530496329220094.5", + "+5192296858534827628530496329220094.75", + "+5192296858534827628530496329220095.0", + "+5192296858534827628530496329220095.25", + "+5192296858534827628530496329220095.5", + "+5192296858534827628530496329220095.75", + "+5192296858534827628530496329220096.0", + "+5192296858534827628530496329220097.0", + "+5192296858534827628530496329220098.0", + "-5192296858534827628530496329220094.0", + "-5192296858534827628530496329220094.25", + "-5192296858534827628530496329220094.5", + "-5192296858534827628530496329220094.75", + "-5192296858534827628530496329220095.0", + "-5192296858534827628530496329220095.25", + "-5192296858534827628530496329220095.5", + "-5192296858534827628530496329220095.75", + "-5192296858534827628530496329220096.0", + "-5192296858534827628530496329220097.0", + "-5192296858534827628530496329220098.0", + }; + + static const char *log1pCheckValsStr[] = { + "-.9", "-.99999999", "-.9999999999999999", "-.9999999999999999999999999999999999" + }; + +#define DEFCHECKVALS(ASTR, AVAL) \ + static tlfloat_quad AVAL[sizeof(ASTR)/sizeof(ASTR[0])]; \ + for(unsigned i=0;i(tlfloat_strtoq("0", nullptr)), max = bit_cast(tlfloat_strtoq("1e+20", nullptr)); + for(int i=0;i<10 * NTEST;i++) { + Sleef_quad a; + if (i < int(sizeof(stdCheckVals)/sizeof(stdCheckVals[0]))) { + a = bit_cast(stdCheckVals[i]); + } else { + a = rndf128(min, max, true); + } + double t = 0, c = (double)bit_cast(a); + { + int idx = xrand() % VECTLENDP; + VARGQUAD v0; + memrand(&v0, SIZEOF_VARGQUAD); + v0 = xsetq(v0, idx, a); + vdouble vd = xcast_to_doubleq(v0); + double s[VECTLENDP]; + vstoreu_v_p_vd(s, vd); + t = s[idx]; + } + if (!((tlfloat_isnan(t) && tlfloat_isnan(c)) || t == c)) { + tlfloat_printf("arg0 = %Qa (%.35Qg), t = %a (%.16g), c = %a (%.16g)\n", + a, a, t, t, c, c); + success = false; + break; + } + } + + printf("%s\n", success ? "OK" : "NG"); + } + + // + + if (success) { + cout << "OK" << endl; + } else { + cout << "NG" << endl; + } + + return success ? 0 : -1; +} diff --git a/src/quad/qmkrename.c b/src/quad/qmkrename.c index 1270480a3..d45b33b7b 100644 --- a/src/quad/qmkrename.c +++ b/src/quad/qmkrename.c @@ -231,7 +231,7 @@ int main(int argc, char **argv) { break; case 15: assert(funcList[i].ulp == -1); - printf("SLEEF_IMPORT SLEEF_CONST void Sleef_%sq%s%s%s(Sleef_quad *, %s);\n", + printf("SLEEF_IMPORT void Sleef_%sq%s%s%s(Sleef_quad *, %s);\n", funcList[i].name, wqp, isaub, isaname, vargquadname); diff --git a/src/quad/sleefsimdqp.c b/src/quad/sleefsimdqp.c index 99195ab5d..2a7384dba 100644 --- a/src/quad/sleefsimdqp.c +++ b/src/quad/sleefsimdqp.c @@ -973,6 +973,8 @@ static INLINE CONST VECTOR_CC vmask ilogb_vm_tdx(tdx t) { static INLINE CONST VECTOR_CC tdx add_tdx_tdx_tdx(tdx dd0, tdx dd1) { // finite numbers only vmask ed = vsub64_vm_vm_vm(tdxgete_vm_tdx(dd1), tdxgete_vm_tdx(dd0)); + ed = vsel_vm_vo64_vm_vm(vandnot_vo_vo_vo(iszero_vo_tdx(dd1), iszero_vo_tdx(dd0)), vcast_vm_i64( 1000000), ed); + ed = vsel_vm_vo64_vm_vm(vandnot_vo_vo_vo(iszero_vo_tdx(dd0), iszero_vo_tdx(dd1)), vcast_vm_i64(-1000000), ed); vdouble t = vldexp3_vd_vd_vm(vcast_vd_d(1), ed); vdouble3 rd3 = scaleadd2_vd3_vd3_vd3_vd(tdxgetd3_vd3_tdx(dd0), tdxgetd3_vd3_tdx(dd1), t); @@ -991,6 +993,8 @@ static INLINE CONST VECTOR_CC tdx add_tdx_tdx_tdx(tdx dd0, tdx dd1) { // finite static INLINE CONST VECTOR_CC tdx sub_tdx_tdx_tdx(tdx dd0, tdx dd1) { vmask ed = vsub64_vm_vm_vm(tdxgete_vm_tdx(dd1), tdxgete_vm_tdx(dd0)); + ed = vsel_vm_vo64_vm_vm(vandnot_vo_vo_vo(iszero_vo_tdx(dd1), iszero_vo_tdx(dd0)), vcast_vm_i64( 1000000), ed); + ed = vsel_vm_vo64_vm_vm(vandnot_vo_vo_vo(iszero_vo_tdx(dd0), iszero_vo_tdx(dd1)), vcast_vm_i64(-1000000), ed); vdouble t = vldexp3_vd_vd_vm(vcast_vd_d(1), ed); vdouble3 rd3 = scalesub2_vd3_vd3_vd3_vd(tdxgetd3_vd3_tdx(dd0), tdxgetd3_vd3_tdx(dd1), t); @@ -2784,7 +2788,7 @@ EXPORT CONST VECTOR_CC vint xicmpneq(vargquad ax, vargquad ay) { vquad y = cast_vq_aq(ay), cy = cmpcnv_vq_vq(y); vopmask o = isnan_vo_vq(x); o = vandnot_vo_vo_vo(o, vnot_vo64_vo64(vand_vo_vo_vo(veq64_vo_vm_vm(vqgety_vm_vq(cy), vqgety_vm_vq(cx)), veq64_vo_vm_vm(vqgetx_vm_vq(cx), vqgetx_vm_vq(cy))))); - o = vcast_vo32_vo64(vandnot_vo_vo_vo(isnan_vo_vq(y), o)); + o = vcast_vo32_vo64(vor_vo_vo_vo(vor_vo_vo_vo(isnan_vo_vq(x), isnan_vo_vq(y)), o)); vint vi = vsel_vi_vo_vi_vi(o, vcast_vi_i(1), vcast_vi_i(0)); return vi; } @@ -3552,7 +3556,7 @@ EXPORT vargquad Sleef_strtoq(const char *str, const char **endptr) { #define FLAG_UPPER (1 << 5) static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespec, int width, int precision, int flags) { - if (width > bufsize) width = bufsize; + if (width > (int)bufsize) width = bufsize; vquad c128 = cast_vq_aq(argvalue); @@ -3583,7 +3587,7 @@ static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespe flags &= ~FLAG_ZERO; } else { if (precision < 0) precision = 6; - if (precision > bufsize/2 - 10) precision = bufsize/2 - 10; + if (precision > (int)(bufsize/2 - 10)) precision = bufsize/2 - 10; if (typespec == 'g' && precision > 0) precision--; tdx rounder = mul_tdx_tdx_tdx(cast_tdx_d(0.5), exp10i(-precision)); @@ -3710,7 +3714,7 @@ static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespe } static int snprintquadhex(char *buf, size_t bufsize, vargquad argvalue, int width, int precision, int flags) { - if (width > bufsize) width = bufsize; + if (width > (int)bufsize) width = bufsize; char *bufend = buf + bufsize, *ptr = buf; vquad c128 = cast_vq_aq(argvalue);