From 2eb90d0418eb9af4892a407317d85676428ee014 Mon Sep 17 00:00:00 2001 From: "se.gorbunov" <se.gorbunov@gsi.de> Date: Tue, 23 Aug 2022 23:06:57 +0000 Subject: [PATCH] L1: fix pseudo vectors --- reco/L1/CbmL1Performance.cxx | 8 +- reco/L1/L1Algo/L1ClonesMerger.h | 2 +- reco/L1/L1Algo/L1Constants.h | 1 - reco/L1/L1Algo/L1NaN.h | 9 +- reco/L1/L1Algo/L1Utils.h | 7 +- reco/L1/vectors/PSEUDO_F32vec4.h | 169 ++++++++++++++++++++----------- 6 files changed, 118 insertions(+), 78 deletions(-) diff --git a/reco/L1/CbmL1Performance.cxx b/reco/L1/CbmL1Performance.cxx index 5e5de1d3f5..09d171bcde 100644 --- a/reco/L1/CbmL1Performance.cxx +++ b/reco/L1/CbmL1Performance.cxx @@ -386,7 +386,7 @@ void CbmL1::EfficienciesPerformance() if (mtra.p > CbmL1Constants::MinRefMom) { // reference tracks ntra.Inc(reco, killed, ratio_length, ratio_fakes, nclones, mc_length, mc_length_hits, "fast"); - if (mtra.IsPrimary()) { // reference primary + if (mtra.IsPrimary()) { // reference primary if (mtra.NStations() == fNStations) { // long reference primary ntra.Inc(reco, killed, ratio_length, ratio_fakes, nclones, mc_length, mc_length_hits, "long_fast_prim"); } @@ -1402,7 +1402,7 @@ void CbmL1::TrackFitPerformance() { // extrapolate to vertex L1FieldRegion fld _fvecalignment; L1FieldValue B[3] _fvecalignment; - float z[3]; + float z[3] = {0.f, 0.f, 0.f}; for (unsigned int ih = 0; ih < 3; ih++) { if (ih >= mc.Points.size()) continue; //If nofMCPoints in track < 3 const int iMCP = mc.Points[ih]; @@ -2233,9 +2233,9 @@ void CbmL1::InputPerformance() if (hm->GetNofLinks() == 0) continue; - Float_t bestWeight = 0.f; + Float_t bestWeight = 0.f; //Float_t totalWeight = 0.f; - int iMCPoint = -1; + int iMCPoint = -1; CbmLink link; for (int iLink = 0; iLink < hm->GetNofLinks(); iLink++) { diff --git a/reco/L1/L1Algo/L1ClonesMerger.h b/reco/L1/L1Algo/L1ClonesMerger.h index 9b76aefdfa..072d13c0f6 100644 --- a/reco/L1/L1Algo/L1ClonesMerger.h +++ b/reco/L1/L1Algo/L1ClonesMerger.h @@ -10,9 +10,9 @@ #ifndef L1ClonesMerger_h #define L1ClonesMerger_h 1 +#include "L1Def.h" #include "L1Hit.h" // For L1HitIndex_t #include "L1Vector.h" -#include "vectors/P4_F32vec4.h" class L1Track; class L1Algo; diff --git a/reco/L1/L1Algo/L1Constants.h b/reco/L1/L1Algo/L1Constants.h index fbfff4e2f8..c2ae4b477c 100644 --- a/reco/L1/L1Algo/L1Constants.h +++ b/reco/L1/L1Algo/L1Constants.h @@ -13,7 +13,6 @@ #define L1Constants_h 1 #include "L1NaN.h" -#include "vectors/P4_F32vec4.h" // for fvecLen /// Namespace contains compile-time constants definition for the L1 tracking algorithm namespace L1Constants diff --git a/reco/L1/L1Algo/L1NaN.h b/reco/L1/L1Algo/L1NaN.h index 91e3b8e580..a83807d8f0 100644 --- a/reco/L1/L1Algo/L1NaN.h +++ b/reco/L1/L1Algo/L1NaN.h @@ -12,17 +12,12 @@ #ifndef L1NaN_h #define L1NaN_h 1 -#include <type_traits> - #include <limits> +#include <type_traits> #include <cmath> -#if defined(__CLING__) && defined(__arm64__) -#include "vectors/PSEUDO_F32vec4.h" -#else -#include "vectors/P4_F32vec4.h" -#endif +#include "L1Def.h" /// Namespace L1NaN defines functions to set variables to NaN and check wether they are NaN or not /// diff --git a/reco/L1/L1Algo/L1Utils.h b/reco/L1/L1Algo/L1Utils.h index a813f52a36..7b053e5065 100644 --- a/reco/L1/L1Algo/L1Utils.h +++ b/reco/L1/L1Algo/L1Utils.h @@ -10,7 +10,6 @@ #ifndef L1Utils_h #define L1Utils_h 1 - #include <FairLogger.h> #include <chrono> @@ -24,11 +23,7 @@ #include <cmath> -#if defined(__CLING__) && defined(__arm64__) -#include "vectors/PSEUDO_F32vec4.h" -#else -#include "vectors/P4_F32vec4.h" -#endif +#include "L1Def.h" /// Class contains some utility functions for L1Algo namespace L1Utils diff --git a/reco/L1/vectors/PSEUDO_F32vec4.h b/reco/L1/vectors/PSEUDO_F32vec4.h index 47b66abeb5..fa985d9e60 100644 --- a/reco/L1/vectors/PSEUDO_F32vec4.h +++ b/reco/L1/vectors/PSEUDO_F32vec4.h @@ -5,6 +5,7 @@ #ifndef L1Algo_PSEUDO_F32vec4_H #define L1Algo_PSEUDO_F32vec4_H +#include <iomanip> #include <iostream> #include <cmath> @@ -25,16 +26,31 @@ float rsqrt(float x); float rcp(float x); float sgn(float x); -class F32vec4 { + +union F32scalUnion { + float f; + int i; + F32scalUnion() : f(0.f) {} + F32scalUnion(int j) : i(j) {} + F32scalUnion(float g) : f(g) {} +}; + +//const F32scalUnion __f32scal_abs_mask {static_cast<int>(0x7fffffff)}; +//const F32scalUnion __f32scal_sgn_mask {static_cast<int>(0x80000000)}; +const F32scalUnion __f32scal_true {static_cast<int>(0xFFFFFFFF)}; +const F32scalUnion __f32scal_false {static_cast<int>(0x00000000)}; +class F32vec4 { + public: - float v[4]; + F32scalUnion v[4]; + + float& operator[](int i) { return v[i].f; } + float operator[](int i) const { return v[i].f; } - float& operator[](int i) { return ((float*) &v)[i]; } - float operator[](int i) const { return ((float*) &v)[i]; } + F32vec4() : F32vec4(0.f) {} - F32vec4() {} F32vec4(const F32vec4& a) { v[0] = a.v[0]; @@ -42,45 +58,64 @@ public: v[2] = a.v[2]; v[3] = a.v[3]; } - F32vec4(const float& a) + + F32vec4(float a) { - v[0] = a; - v[1] = a; - v[2] = a; - v[3] = a; + v[0].f = a; + v[1].f = a; + v[2].f = a; + v[3].f = a; } - F32vec4(const float& f0, const float& f1, const float& f2, const float& f3) + F32vec4(float f0, float f1, float f2, float f3) { - v[0] = f0; - v[1] = f1; - v[2] = f2; - v[3] = f3; + v[0].f = f0; + v[1].f = f1; + v[2].f = f2; + v[3].f = f3; } +#define _f1(A, F) \ + F32vec4 z; \ + z[0] = F(A[0]); \ + z[1] = F(A[1]); \ + z[2] = F(A[2]); \ + z[3] = F(A[3]); \ + return z; #define _f2(A, B, F) \ F32vec4 z; \ - z.v[0] = F(A.v[0], B.v[0]); \ - z.v[1] = F(A.v[1], B.v[1]); \ - z.v[2] = F(A.v[2], B.v[2]); \ - z.v[3] = F(A.v[3], B.v[3]); \ + z[0] = F(A[0], B[0]); \ + z[1] = F(A[1], B[1]); \ + z[2] = F(A[2], B[2]); \ + z[3] = F(A[3], B[3]); \ return z; -#define _f1(A, F) \ + +#define _op(A, B, F) \ + F32vec4 z; \ + z[0] = (A[0] F B[0]); \ + z[1] = (A[1] F B[1]); \ + z[2] = (A[2] F B[2]); \ + z[3] = (A[3] F B[3]); \ + return z; + +#define _opBit(A, B, F) \ F32vec4 z; \ - z.v[0] = F(A.v[0]); \ - z.v[1] = F(A.v[1]); \ - z.v[2] = F(A.v[2]); \ - z.v[3] = F(A.v[3]); \ + z.v[0].i = (A.v[0].i F B.v[0].i); \ + z.v[1].i = (A.v[1].i F B.v[1].i); \ + z.v[2].i = (A.v[2].i F B.v[2].i); \ + z.v[3].i = (A.v[3].i F B.v[3].i); \ return z; -#define _op(A, B, O) \ + +#define _opComp(A, B, F) \ F32vec4 z; \ - z.v[0] = A.v[0] O B.v[0]; \ - z.v[1] = A.v[1] O B.v[1]; \ - z.v[2] = A.v[2] O B.v[2]; \ - z.v[3] = A.v[3] O B.v[3]; \ + z[0] = (A[0] F B[0]) ? __f32scal_true.f : __f32scal_false.f; \ + z[1] = (A[1] F B[1]) ? __f32scal_true.f : __f32scal_false.f; \ + z[2] = (A[2] F B[2]) ? __f32scal_true.f : __f32scal_false.f; \ + z[3] = (A[3] F B[3]) ? __f32scal_true.f : __f32scal_false.f; \ return z; + /* Arithmetic Operators */ friend F32vec4 operator+(const F32vec4& a, const F32vec4& b) { _op(a, b, +) } friend F32vec4 operator-(const F32vec4& a, const F32vec4& b) { _op(a, b, -) } @@ -88,42 +123,46 @@ public: friend F32vec4 operator/(const F32vec4& a, const F32vec4& b) { _op(a, b, /) } /* Comparison */ - friend F32vec4 operator<(const F32vec4& a, const F32vec4& b) { _op(a, b, <) } - friend F32vec4 operator<=(const F32vec4& a, const F32vec4& b) { _op(a, b, <=) } - friend F32vec4 operator>(const F32vec4& a, const F32vec4& b) { _op(a, b, >) } - friend F32vec4 operator>=(const F32vec4& a, const F32vec4& b) { _op(a, b, >=) } + friend F32vec4 operator<(const F32vec4& a, const F32vec4& b) { _opComp(a, b, <) } + friend F32vec4 operator<=(const F32vec4& a, const F32vec4& b) { _opComp(a, b, <=) } + friend F32vec4 operator>(const F32vec4& a, const F32vec4& b) { _opComp(a, b, >) } + friend F32vec4 operator>=(const F32vec4& a, const F32vec4& b) { _opComp(a, b, >=) } + friend F32vec4 operator==(const F32vec4& a, const F32vec4& b) { _opComp(a, b, ==) } /* Logic */ - friend F32vec4 operator&(const F32vec4& a, const F32vec4& b) { _op(a, b, &&) } - friend F32vec4 operator|(const F32vec4& a, const F32vec4& b) { _op(a, b, ||) } - friend F32vec4 operator||(const F32vec4& a, const F32vec4& b) { _op(a, b, ||) } + friend F32vec4 operator&(const F32vec4& a, const F32vec4& b) { _opBit(a, b, &) } + friend F32vec4 operator|(const F32vec4& a, const F32vec4& b) { _opBit(a, b, |) } + friend F32vec4 operator^(const F32vec4& a, const F32vec4& b) { _opBit(a, b, ^) } friend F32vec4 operator!(const F32vec4& a) { F32vec4 z; - z[0] = !a[0]; - z[1] = !a[1]; - z[2] = !a[2]; - z[3] = !a[3]; - + z.v[0].i = ~a.v[0].i; + z.v[1].i = ~a.v[1].i; + z.v[2].i = ~a.v[2].i; + z.v[3].i = ~a.v[3].i; return z; } + friend F32vec4 if3(F32vec4 a, F32vec4 b, F32vec4 c) { return ((a) & (b)) | ((!(a)) & (c)); } // analog (a) ? b : c + + /* friend F32vec4 if3(const F32vec4& a, const F32vec4& b, const F32vec4& c) { F32vec4 z; - z[0] = (a[0]) ? b[0] : c[0]; - z[1] = (a[1]) ? b[1] : c[1]; - z[2] = (a[2]) ? b[2] : c[2]; - z[3] = (a[3]) ? b[3] : c[3]; - + z[0] = (a.v[0].i) ? b[0] : c[0]; + z[1] = (a.v[1].i) ? b[1] : c[1]; + z[2] = (a.v[2].i) ? b[2] : c[2]; + z[3] = (a.v[3].i) ? b[3] : c[3]; return z; - } + }*/ + + friend bool NotEmptyFvec(F32vec4 a) { return bool(a.v[0].i) || bool(a.v[1].i) || bool(a.v[2].i) || bool(a.v[3].i); } + friend bool EmptyFvec(F32vec4 a) { return !NotEmptyFvec(a); } -#define NotEmptyFvec(a) bool((a)[0]) | bool((a)[1]) | bool((a)[2]) | bool((a)[3]) -#define EmptyFvec(a) !(bool((a)[0]) | bool((a)[1]) | bool((a)[2]) | bool((a)[3])) // bool NotEmpty(const F32vec4 &a) { return a[0]||a[1]||a[2]||a[3]; } // bool Empty(const F32vec4 &a) { return !(a[0]||a[1]||a[2]||a[3]); } // optimize + friend F32vec4 bool2int(const F32vec4& a) { // mask returned return if3(a, 1, 0); @@ -133,10 +172,10 @@ public: /* Functions */ friend float min(float x, float y) { return x < y ? x : y; } friend float max(float x, float y) { return x < y ? y : x; } - friend float asgnb(float x, float y) { return y >= 0 ? fabs(x) : -fabs(x); } + friend float asgnb(float x, float y) { return y >= 0.f ? fabs(x) : -fabs(x); } friend float rsqrt(float x) { return 1. / sqrt(x); } friend float rcp(float x) { return 1. / x; } - friend float sgn(float x) { return x >= 0 ? 1 : -1; } + friend float sgn(float x) { return x >= 0.f ? 1.f : -1.f; } friend F32vec4 min(const F32vec4& a, const F32vec4& b) { _f2(a, b, min) } friend F32vec4 max(const F32vec4& a, const F32vec4& b) { _f2(a, b, max) } @@ -153,18 +192,25 @@ public: #undef _f1 #undef _f2 #undef _op +#undef _opBit +#undef _opComp /* Define all operators for consistensy */ vec_arithmetic(F32vec4, float); - friend ostream& operator<<(ostream& strm, const F32vec4& a) + friend std::ostream& operator<<(std::ostream& strm, const F32vec4& a) { - strm << a[0] << " " << a[1] << " " << a[2] << " " << a[3]; + //strm << "[" << a[0] << " " << a[1] << " " << a[2] << " " << a[3] << "]"; + strm << '['; + strm << std::setw(12) << std::setfill(' ') << a[0] << ' '; + strm << std::setw(12) << std::setfill(' ') << a[1] << ' '; + strm << std::setw(12) << std::setfill(' ') << a[2] << ' '; + strm << std::setw(12) << std::setfill(' ') << a[3] << ']'; return strm; } - friend istream& operator>>(istream& strm, F32vec4& a) + friend std::istream& operator>>(std::istream& strm, F32vec4& a) { float tmp; strm >> tmp; @@ -175,22 +221,27 @@ public: /// Checks, if all bands are equal /// NOTE: two values defined as signaling_NaN() are not equal, thus if there are all or one /// of the words are kNaN, the function returns false - bool IsHorizontallyEqual() const { return v[0] == v[1] && v[1] == v[2] && v[2] == v[3]; } + bool IsHorizontallyEqual() const { return (v[0].f == v[1].f) && (v[1].f == v[2].f) && (v[2].f == v[3].f); } /// Checks, if any of the bands is NaN - bool IsNanAny() const { return std::isnan(v[0]) || std::isnan(v[1]) || std::isnan(v[2]) || std::isnan(v[3]); } + bool IsNanAny() const { return std::isnan(v[0].f) || std::isnan(v[1].f) || std::isnan(v[2].f) || std::isnan(v[3].f); } } __attribute__((aligned(16))); -; + typedef F32vec4 fvec; typedef float fscal; const int fvecLen = 4; -//#define fvec_true _f32vec4_true -//#define fvec_false _f32vec4_false + +const F32vec4 fvec_zero(0.f); +const F32vec4 fvec_one(1.f); +const F32vec4 fvec_true(__f32scal_true.f); +const F32vec4 fvec_false(__f32scal_false.f); + #define _fvecalignment + namespace nsL1 { template<typename T> -- GitLab