From 9c022b20fbd04fe2f4dc19ed0ce37a648badaa41 Mon Sep 17 00:00:00 2001 From: "se.gorbunov" <se.gorbunov@gsi.de> Date: Sat, 3 Sep 2022 22:00:12 +0000 Subject: [PATCH] L1: switch to Vc library for SIMD vectors --- reco/L1/CMakeLists.txt | 472 +++++++++++++++++++------------------- reco/L1/vectors/L1vec.h | 10 + reco/L1/vectors/L1vecVc.h | 86 +++++++ 3 files changed, 336 insertions(+), 232 deletions(-) create mode 100644 reco/L1/vectors/L1vec.h create mode 100644 reco/L1/vectors/L1vecVc.h diff --git a/reco/L1/CMakeLists.txt b/reco/L1/CMakeLists.txt index 1d7f583d1f..3f16fdbafa 100644 --- a/reco/L1/CMakeLists.txt +++ b/reco/L1/CMakeLists.txt @@ -1,77 +1,76 @@ -#Create a library called "libL1" which includes the source files given in -#the array. -#The extension is already found.Any number of sources could be listed here. +# Create a library called "libL1" which includes the source files given in +# the array. +# The extension is already found.Any number of sources could be listed here. # extra warnings to examine the code -#ADD_DEFINITIONS(-Wall -Wextra -Wshadow -Weffc++) +# ADD_DEFINITIONS(-Wall -Wextra -Wshadow -Weffc++) # L1 defines ADD_DEFINITIONS(-DDO_TPCCATRACKER_EFF_PERFORMANCE -DNonhomogeneousField -DCBM -DUSE_TIMERS) -IF (SSE_FOUND) +IF(SSE_FOUND) ADD_DEFINITIONS(-DHAVE_SSE) Message(STATUS "L1 will be compiled with SSE support") -ELSE (SSE_FOUND) +ELSE(SSE_FOUND) Message(FATAL_ERROR "L1 can not be compiled without SSE support") -ENDIF (SSE_FOUND) - +ENDIF(SSE_FOUND) Set(INCLUDE_DIRECTORIES -${CBMROOT_SOURCE_DIR}/reco/base -${CBMROOT_SOURCE_DIR}/reco/detectors/sts -${CBMROOT_SOURCE_DIR}/reco/detectors/rich -${CBMROOT_SOURCE_DIR}/reco/detectors/rich/fitter -${CBMBASE_DIR} - -${CBMDATA_DIR} -${CBMDATA_DIR}/raw -${CBMDATA_DIR}/sts -${CBMDATA_DIR}/much -${CBMDATA_DIR}/mvd -${CBMDATA_DIR}/trd -${CBMDATA_DIR}/tof -${CBMDATA_DIR}/rich -${CBMDATA_DIR}/global - -${CBMROOT_SOURCE_DIR}/core/qa - -${CBMROOT_SOURCE_DIR}/rich/reco/fitter - -${CBMROOT_SOURCE_DIR}/reco/L1 -${CBMROOT_SOURCE_DIR}/reco/L1/L1Algo -${CBMROOT_SOURCE_DIR}/reco/L1/OffLineInterface -${CBMROOT_SOURCE_DIR}/reco/L1/ParticleFinder -${CBMROOT_SOURCE_DIR}/reco/L1/qa - -${CBMROOT_SOURCE_DIR}/reco/KF -${CBMROOT_SOURCE_DIR}/reco/KF/KFQA -${CBMROOT_SOURCE_DIR}/reco/KF/Interface -${CBMROOT_SOURCE_DIR}/run -${CBMROOT_SOURCE_DIR}/sim/transport/steer -${CBMROOT_SOURCE_DIR}/sim/transport/geosetup - -${CBMDATA_DIR} -${CBMDATA_DIR}/base - -${CBMDETECTORBASE_DIR}/sts - -${CBMROOT_SOURCE_DIR}/mvd + ${CBMROOT_SOURCE_DIR}/reco/base + ${CBMROOT_SOURCE_DIR}/reco/detectors/sts + ${CBMROOT_SOURCE_DIR}/reco/detectors/rich + ${CBMROOT_SOURCE_DIR}/reco/detectors/rich/fitter + ${CBMBASE_DIR} + + ${CBMDATA_DIR} + ${CBMDATA_DIR}/raw + ${CBMDATA_DIR}/sts + ${CBMDATA_DIR}/much + ${CBMDATA_DIR}/mvd + ${CBMDATA_DIR}/trd + ${CBMDATA_DIR}/tof + ${CBMDATA_DIR}/rich + ${CBMDATA_DIR}/global + + ${CBMROOT_SOURCE_DIR}/core/qa + + ${CBMROOT_SOURCE_DIR}/rich/reco/fitter + + ${CBMROOT_SOURCE_DIR}/reco/L1 + ${CBMROOT_SOURCE_DIR}/reco/L1/L1Algo + ${CBMROOT_SOURCE_DIR}/reco/L1/OffLineInterface + ${CBMROOT_SOURCE_DIR}/reco/L1/ParticleFinder + ${CBMROOT_SOURCE_DIR}/reco/L1/qa + + ${CBMROOT_SOURCE_DIR}/reco/KF + ${CBMROOT_SOURCE_DIR}/reco/KF/KFQA + ${CBMROOT_SOURCE_DIR}/reco/KF/Interface + ${CBMROOT_SOURCE_DIR}/run + ${CBMROOT_SOURCE_DIR}/sim/transport/steer + ${CBMROOT_SOURCE_DIR}/sim/transport/geosetup + + ${CBMDATA_DIR} + ${CBMDATA_DIR}/base + + ${CBMDETECTORBASE_DIR}/sts + + ${CBMROOT_SOURCE_DIR}/mvd ${CBMDETECTORBASE_DIR}/trd ${CBMDETECTORBASE_DIR}/much ${CBMDETECTORBASE_DIR}/tof ) -Include_Directories( ${INCLUDE_DIRECTORIES}) +Include_Directories(${INCLUDE_DIRECTORIES}) Set(SYSTEM_INCLUDE_DIRECTORIES ${VC_INCLUDE_DIRS} - ${BASE_INCLUDE_DIRECTORIES} + ${BASE_INCLUDE_DIRECTORIES} ${KFParticle_INCLUDE_DIR} ) -if (OPENMP_FOUND) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +if(OPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set(SYSTEM_INCLUDE_DIRECTORIES ${SYSTEM_INCLUDE_DIRECTORIES} ${OpenMP_CXX_INCLUDE_DIRS} @@ -81,234 +80,243 @@ endif() Include_Directories(SYSTEM ${SYSTEM_INCLUDE_DIRECTORIES}) set(LINK_DIRECTORIES -${Vc_LIB_DIR} -${KFParticle_LIB_DIR} -${FAIRROOT_LIBRARY_DIR} -${ROOT_LIBRARY_DIR} -${Boost_LIBRARY_DIRS} + ${Vc_LIB_DIR} + ${KFParticle_LIB_DIR} + ${FAIRROOT_LIBRARY_DIR} + ${ROOT_LIBRARY_DIR} + ${Boost_LIBRARY_DIRS} ) -if (OPENMP_FOUND AND APPLE) +if(OPENMP_FOUND AND APPLE) get_filename_component(OpenMP_CXX_LIBRARY_DIR ${OpenMP_CXX_LIBRARIES} DIRECTORY) Set(LINK_DIRECTORIES ${LINK_DIRECTORIES} ${OpenMP_CXX_LIBRARY_DIR} ) endif() - -link_directories( ${LINK_DIRECTORIES}) + +link_directories(${LINK_DIRECTORIES}) # ----- Compilation sources -------------------------------------------- set(SRCS -#L1Algo / L1CATrackFinder.cxx -#CbmL1Performance.cxx -#CbmL1ReadEvent.cxx -#CbmL1CATrdTrackFinderSA.cxx -CbmL1TrdTracklet.cxx -CbmL1TrdTracklet4.cxx -CbmL1.cxx -#CbmL1TrdTrackFinderSts.cxx -CbmL1TrackMerger.cxx -CbmL1TofMerger.cxx -OffLineInterface/CbmL1RichENNRingFinder.cxx -OffLineInterface/CbmL1RichENNRingFinderParallel.cxx -OffLineInterface/CbmL1StsTrackFinder.cxx -OffLineInterface/CbmL1GlobalTrackFinder.cxx -OffLineInterface/CbmL1GlobalFindTracksEvents.cxx -#OffLineInterface / CbmL1MuchFinder.cxx -#OffLineInterface / CbmL1MuchHit.cxx -#OffLineInterface / CbmL1MuchTrack.cxx -#OffLineInterface / CbmL1MuchFinderQa.cxx -OffLineInterface/CbmL1RichRingQa.cxx -#OffLineInterface / CbmL1SttHit.cxx -#OffLineInterface / CbmL1SttTrackFinder.cxx -#OffLineInterface / CbmL1SttTrack.cxx - -L1Algo/L1Algo.cxx -L1Algo/L1CATrackFinder.cxx -L1Algo/L1TrackExtender.cxx -L1Algo/L1TrackFitter.cxx -L1Algo/L1HitsSortHelper.cxx -L1Algo/L1Grid.cxx -CbmL1Performance.cxx -CbmL1ReadEvent.cxx -L1Algo/L1Station.cxx -L1Algo/L1TrackParFit.cxx -L1Algo/L1Event.cxx -L1Algo/L1EventMatch.cxx -L1Algo/L1MCEvent.cxx -L1Algo/L1Fit.cxx -L1Algo/L1FitMaterial.cxx -L1Algo/L1Extrapolation.cxx -CbmL1MCTrack.cxx -L1Algo/L1MaterialInfo.cxx -L1Algo/L1UMeasurementInfo.cxx -L1Algo/L1XYMeasurementInfo.cxx -L1Algo/L1Field.cxx -L1Algo/L1CAIteration.cxx -L1Algo/L1BaseStationInfo.cxx -L1Algo/L1InitManager.cxx -L1Algo/L1Parameters.cxx -L1Algo/L1InputData.cxx -L1Algo/L1IODataManager.cxx -L1Algo/L1ClonesMerger.cxx -L1Algo/L1ConfigRW.cxx -L1Algo/utils/L1AlgoDraw.cxx -L1Algo/utils/L1AlgoEfficiencyPerformance.cxx -L1Algo/utils/L1AlgoPulls.cxx - - -ParticleFinder/CbmL1PFFitter.cxx -ParticleFinder/CbmL1PFMCParticle.cxx - -qa/CbmTrackerInputQaTrd.cxx -qa/CbmTrackingInputQaSts.cxx + + # L1Algo / L1CATrackFinder.cxx + # CbmL1Performance.cxx + # CbmL1ReadEvent.cxx + # CbmL1CATrdTrackFinderSA.cxx + CbmL1TrdTracklet.cxx + CbmL1TrdTracklet4.cxx + CbmL1.cxx + + # CbmL1TrdTrackFinderSts.cxx + CbmL1TrackMerger.cxx + CbmL1TofMerger.cxx + OffLineInterface/CbmL1RichENNRingFinder.cxx + OffLineInterface/CbmL1RichENNRingFinderParallel.cxx + OffLineInterface/CbmL1StsTrackFinder.cxx + OffLineInterface/CbmL1GlobalTrackFinder.cxx + OffLineInterface/CbmL1GlobalFindTracksEvents.cxx + + # OffLineInterface / CbmL1MuchFinder.cxx + # OffLineInterface / CbmL1MuchHit.cxx + # OffLineInterface / CbmL1MuchTrack.cxx + # OffLineInterface / CbmL1MuchFinderQa.cxx + OffLineInterface/CbmL1RichRingQa.cxx + + # OffLineInterface / CbmL1SttHit.cxx + # OffLineInterface / CbmL1SttTrackFinder.cxx + # OffLineInterface / CbmL1SttTrack.cxx + L1Algo/L1Algo.cxx + L1Algo/L1CATrackFinder.cxx + L1Algo/L1TrackExtender.cxx + L1Algo/L1TrackFitter.cxx + L1Algo/L1HitsSortHelper.cxx + L1Algo/L1Grid.cxx + CbmL1Performance.cxx + CbmL1ReadEvent.cxx + L1Algo/L1Station.cxx + L1Algo/L1TrackParFit.cxx + L1Algo/L1Event.cxx + L1Algo/L1EventMatch.cxx + L1Algo/L1MCEvent.cxx + L1Algo/L1Fit.cxx + L1Algo/L1FitMaterial.cxx + L1Algo/L1Extrapolation.cxx + CbmL1MCTrack.cxx + L1Algo/L1MaterialInfo.cxx + L1Algo/L1UMeasurementInfo.cxx + L1Algo/L1XYMeasurementInfo.cxx + L1Algo/L1Field.cxx + L1Algo/L1CAIteration.cxx + L1Algo/L1BaseStationInfo.cxx + L1Algo/L1InitManager.cxx + L1Algo/L1Parameters.cxx + L1Algo/L1InputData.cxx + L1Algo/L1IODataManager.cxx + L1Algo/L1ClonesMerger.cxx + L1Algo/L1ConfigRW.cxx + L1Algo/utils/L1AlgoDraw.cxx + L1Algo/utils/L1AlgoEfficiencyPerformance.cxx + L1Algo/utils/L1AlgoPulls.cxx + + ParticleFinder/CbmL1PFFitter.cxx + ParticleFinder/CbmL1PFMCParticle.cxx + + qa/CbmTrackerInputQaTrd.cxx + qa/CbmTrackingInputQaSts.cxx ) + # ---------------------------------------------------------------------- # ----- Headers -------------------------------------------------------- set(HEADERS -CbmL1CATrdTrackFinderSA.h -CbmL1.h -CbmL1MCTrack.h -CbmL1MCPoint.h -CbmL1Hit.h -CbmL1TofMerger.h -CbmL1Track.h -CbmL1TrackMerger.h -CbmL1TrackPar.h -CbmL1TrdHit.h -#CbmL1TrdTrackFinderSts.h -CbmL1TrdTracklet4.h -CbmL1TrdTracklet.h -CbmL1Vtx.h -#OffLineInterface / CbmL1MuchFinder.h -#OffLineInterface / CbmL1MuchFinderQa.h -#OffLineInterface / CbmL1MuchHit.h -#OffLineInterface / CbmL1MuchTrack.h -OffLineInterface/CbmL1RichENNRingFinder.h -OffLineInterface/CbmL1RichRingQa.h -OffLineInterface/CbmL1StsTrackFinder.h -OffLineInterface/CbmL1GlobalTrackFinder.h -OffLineInterface/CbmL1GlobalFindTracksEvents.h -#OffLineInterface / CbmL1SttHit.h -#OffLineInterface / CbmL1SttTrackFinder.h -#OffLineInterface / CbmL1SttTrack.h -L1Algo/L1Def.h -L1Algo/L1Vector.h -qa/CbmTrackerInputQaTrd.h -qa/CbmTrackingInputQaSts.h + CbmL1CATrdTrackFinderSA.h + CbmL1.h + CbmL1MCTrack.h + CbmL1MCPoint.h + CbmL1Hit.h + CbmL1TofMerger.h + CbmL1Track.h + CbmL1TrackMerger.h + CbmL1TrackPar.h + CbmL1TrdHit.h + + # CbmL1TrdTrackFinderSts.h + CbmL1TrdTracklet4.h + CbmL1TrdTracklet.h + CbmL1Vtx.h + + # OffLineInterface / CbmL1MuchFinder.h + # OffLineInterface / CbmL1MuchFinderQa.h + # OffLineInterface / CbmL1MuchHit.h + # OffLineInterface / CbmL1MuchTrack.h + OffLineInterface/CbmL1RichENNRingFinder.h + OffLineInterface/CbmL1RichRingQa.h + OffLineInterface/CbmL1StsTrackFinder.h + OffLineInterface/CbmL1GlobalTrackFinder.h + OffLineInterface/CbmL1GlobalFindTracksEvents.h + + # OffLineInterface / CbmL1SttHit.h + # OffLineInterface / CbmL1SttTrackFinder.h + # OffLineInterface / CbmL1SttTrack.h + L1Algo/L1Def.h + L1Algo/L1Vector.h + qa/CbmTrackerInputQaTrd.h + qa/CbmTrackingInputQaSts.h ) -# ---------------------------------------------------------------------- - - - +# ---------------------------------------------------------------------- If(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - ADD_DEFINITIONS(-Wall -Wsign-promo -Wctor-dtor-privacy -Wreorder -Wno-deprecated -Wno-parentheses) # -Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wold-style-cast : wait for other parts of cbmroot\root. -#-- - Check for compiler flags + ADD_DEFINITIONS(-Wall -Wsign-promo -Wctor-dtor-privacy -Wreorder -Wno-deprecated -Wno-parentheses) # -Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wold-style-cast : wait for other parts of cbmroot\root. + + # -- - Check for compiler flags CHECK_CXX_COMPILER_FLAG("-Werror -Wno-pmf-conversions" HAS_PMF) + If(HAS_PMF) ADD_DEFINITIONS(-Wno-pmf-conversions) EndIf() + CHECK_CXX_COMPILER_FLAG("-Werror -Wstrict-null-sentinel" HAS_SENTINEL) + If(HAS_SENTINEL) ADD_DEFINITIONS(-Wstrict-null-sentinel) EndIf() + CHECK_CXX_COMPILER_FLAG("-Werror -Wno-non-template-friend" HAS_TEMPLATE_FRIEND) + If(HAS_TEMPLATE_FRIEND) ADD_DEFINITIONS(-Wno-non-template-friend) EndIf() Else() - ADD_DEFINITIONS(-Wall -Wsign-promo -Wno-pmf-conversions -Wctor-dtor-privacy -Wreorder -Wno-deprecated -Wstrict-null-sentinel -Wno-non-template-friend -Wno-parentheses) # -Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wold-style-cast : wait for other parts of cbmroot\root. + ADD_DEFINITIONS(-Wall -Wsign-promo -Wno-pmf-conversions -Wctor-dtor-privacy -Wreorder -Wno-deprecated -Wstrict-null-sentinel -Wno-non-template-friend -Wno-parentheses) # -Weffc++ -Wnon-virtual-dtor -Woverloaded-virtual -Wold-style-cast : wait for other parts of cbmroot\root. EndIf() - -IF (SSE_FOUND) +IF(SSE_FOUND) ADD_DEFINITIONS(-DHAVE_SSE) - SET_SOURCE_FILES_PROPERTIES(${SRCS} PROPERTIES COMPILE_FLAGS - "-msse -O3") + SET_SOURCE_FILES_PROPERTIES(${SRCS} PROPERTIES COMPILE_FLAGS + "-msse -O3") Message(STATUS "L1 will be compiled with SSE support") -ELSE (SSE_FOUND) +ELSE(SSE_FOUND) Message(STATUS "L1 will be compiled without SSE support") - SET_SOURCE_FILES_PROPERTIES(${SRCS} PROPERTIES COMPILE_FLAGS - "-O3") -ENDIF (SSE_FOUND) + SET_SOURCE_FILES_PROPERTIES(${SRCS} PROPERTIES COMPILE_FLAGS + "-O3") +ENDIF(SSE_FOUND) Set(LINKDEF L1LinkDef.h) Set(LIBRARY_NAME L1) Set(DEPENDENCIES - KF - KFParticle - Base - CbmBase - CbmData - CbmSimSteer -# CbmGeoSetup - CbmMuchBase - CbmTrdBase - CbmTofBase - CbmStsBase - CbmRecoBase - CbmRecoSts - CbmQaBase - boost_regex - external::yaml-cpp - ) - -if (OPENMP_FOUND AND APPLE) + KF + KFParticle + Base + CbmBase + CbmData + CbmSimSteer + + # CbmGeoSetup + CbmMuchBase + CbmTrdBase + CbmTofBase + CbmStsBase + CbmRecoBase + CbmRecoSts + CbmQaBase + boost_regex + external::yaml-cpp +) + +if(OPENMP_FOUND AND APPLE) Set(DEPENDENCIES ${DEPENDENCIES} ${OpenMP_CXX_LIBRARIES} ) endif() GENERATE_LIBRARY() Install(FILES CbmL1Counters.h - L1Algo/L1Assert.h - L1Algo/L1EventEfficiencies.h - L1Algo/L1Grid.h - L1Algo/L1Branch.h - L1Algo/L1Field.h - L1Algo/L1Grid.h - L1Algo/L1Hit.h - L1Algo/L1HitPoint.h - L1Algo/L1HitsSortHelper.h - L1Algo/L1Portion.h - L1Algo/L1Station.h - L1Algo/L1Track.h - L1Algo/L1TrackPar.h - L1Algo/L1TrackParFit.h - L1Algo/L1Triplet.h - L1Algo/L1Vector.h - L1Algo/L1ObjectInitController.h - L1Algo/L1MaterialInfo.h - L1Algo/L1UMeasurementInfo.h - L1Algo/L1XYMeasurementInfo.h - L1Algo/L1BaseStationInfo.h - L1Algo/L1InitManager.h - L1Algo/L1CAIteration.h - L1Algo/L1Parameters.h - L1Algo/L1InputData.h - L1Algo/L1IODataManager.h - L1Algo/L1ClonesMerger.h - L1Algo/L1ConfigRW.h - L1Algo/L1Constants.h - L1Algo/L1Utils.h - L1Algo/L1NaN.h - vectors/vec_arithmetic.h - vectors/std_alloc.h - DESTINATION include - ) + L1Algo/L1Assert.h + L1Algo/L1EventEfficiencies.h + L1Algo/L1Grid.h + L1Algo/L1Branch.h + L1Algo/L1Field.h + L1Algo/L1Grid.h + L1Algo/L1Hit.h + L1Algo/L1HitPoint.h + L1Algo/L1HitsSortHelper.h + L1Algo/L1Portion.h + L1Algo/L1Station.h + L1Algo/L1Track.h + L1Algo/L1TrackPar.h + L1Algo/L1TrackParFit.h + L1Algo/L1Triplet.h + L1Algo/L1Vector.h + L1Algo/L1ObjectInitController.h + L1Algo/L1MaterialInfo.h + L1Algo/L1UMeasurementInfo.h + L1Algo/L1XYMeasurementInfo.h + L1Algo/L1BaseStationInfo.h + L1Algo/L1InitManager.h + L1Algo/L1CAIteration.h + L1Algo/L1Parameters.h + L1Algo/L1InputData.h + L1Algo/L1IODataManager.h + L1Algo/L1ClonesMerger.h + L1Algo/L1ConfigRW.h + L1Algo/L1Constants.h + L1Algo/L1Utils.h + L1Algo/L1NaN.h + vectors/vec_arithmetic.h + vectors/std_alloc.h + DESTINATION include +) Install(FILES L1Algo/L1Algo.h - L1Algo/L1Branch.h - L1Algo/L1Field.h - L1Algo/L1Hit.h - L1Algo/L1Vector.h - DESTINATION include/L1Algo - ) - -Install(FILES vectors/P4_F32vec4.h - DESTINATION include/vectors - ) + L1Algo/L1Branch.h + L1Algo/L1Field.h + L1Algo/L1Hit.h + L1Algo/L1Vector.h + DESTINATION include/L1Algo +) + +Install(FILES vectors/L1vecVc.h + DESTINATION include/vectors +) diff --git a/reco/L1/vectors/L1vec.h b/reco/L1/vectors/L1vec.h new file mode 100644 index 0000000000..5c04fc2a4f --- /dev/null +++ b/reco/L1/vectors/L1vec.h @@ -0,0 +1,10 @@ +/* Copyright (C) 2010-2014 Frankfurt Institute for Advanced Studies, Goethe-Universität Frankfurt, Frankfurt + SPDX-License-Identifier: GPL-3.0-only + Authors: Sergey Gorbunov [committer]*/ + +#ifndef L1vec_H +#define L1vec_H + +#include "vectors/L1vecVc.h" + +#endif diff --git a/reco/L1/vectors/L1vecVc.h b/reco/L1/vectors/L1vecVc.h new file mode 100644 index 0000000000..cf27cb1c36 --- /dev/null +++ b/reco/L1/vectors/L1vecVc.h @@ -0,0 +1,86 @@ +/* Copyright (C) 2010-2014 Frankfurt Institute for Advanced Studies, Goethe-Universität Frankfurt, Frankfurt + SPDX-License-Identifier: GPL-3.0-only + Authors: Igor Kulakov [committer], Maksym Zyzak */ + +#ifndef L1vecVc_H +#define L1vecVc_H + +#include "Vc/Vc" + +typedef Vc::float_v fvec; +typedef fvec::EntryType fscal; +typedef Vc::float_m fmask; +const int fvecLen = fvec::Size; + +#define _fvecalignment __attribute__((aligned(Vc::VectorAlignment))) + +inline fvec operator*(fscal a, const fvec& b) { return fvec(a) * b; } +inline fvec operator*(const fvec& a, fscal b) { return a * fvec(b); } + +inline fvec operator/(fscal a, const fvec& b) { return fvec(a) / b; } +inline fvec operator/(const fvec& a, fscal b) { return a / fvec(b); } + +inline fvec operator+(fscal a, const fvec& b) { return fvec(a) + b; } +inline fvec operator+(const fvec& a, fscal b) { return a + fvec(b); } + +inline fvec operator-(fscal a, const fvec& b) { return fvec(a) - b; } +inline fvec operator-(const fvec& a, fscal b) { return a - fvec(b); } + +inline fvec if3(const fmask& a, const fvec& b, const fvec& c) +{ + // return (a ?b :c); + fvec ret = c; + ret(a) = b; + return ret; +} + +inline fmask MaskOne() { return fmask::One(); } +inline fmask MaskZero() { return fmask::One(); } + +inline fvec fabs(const fvec& a) { return abs(a); } + +inline fvec masked(const fvec& a, const fmask& mask) { return if3(mask, a, fvec::Zero()); } + +inline fvec mask2int(const fmask& mask) +{ // mask returned + return if3(mask, fvec::One(), fvec::Zero()); +} + +/// Checks, if all bands are equal +/// NOTE: two values defined as signaling_NaN() are not equal, thus if there are all or one +/// of the words are kNaN, the function returns false +inline bool IsHorizontallyEqual(const fvec& v) +{ + fscal s = v[0]; + bool ret = true; + for (int i = 1; i < fvecLen; i++) { + ret = ret && (v[i] == s); + } + return ret; +} + +/// Checks, if any of the bands is NaN +inline bool IsNanAny(const fvec& v) +{ + bool ret = false; + for (int i = 0; i < fvecLen; i++) { + ret = ret || std::isnan(v[i]); + } + return ret; +} + +inline bool EmptyFmask(const fmask& a) +{ + bool ret = true; + for (int i = 0; i < fvecLen; i++) { + ret = ret && (!bool(a[i])); + } + return ret; +} + +inline bool NotEmptyFmask(const fmask& a) { return !EmptyFmask(a); } + +#include "std_alloc.h" + + +#endif -- GitLab