From 2eb90d0418eb9af4892a407317d85676428ee014 Mon Sep 17 00:00:00 2001
From: "se.gorbunov" <se.gorbunov@gsi.de>
Date: Tue, 23 Aug 2022 23:06:57 +0000
Subject: [PATCH] L1: fix pseudo vectors

---
 reco/L1/CbmL1Performance.cxx     |   8 +-
 reco/L1/L1Algo/L1ClonesMerger.h  |   2 +-
 reco/L1/L1Algo/L1Constants.h     |   1 -
 reco/L1/L1Algo/L1NaN.h           |   9 +-
 reco/L1/L1Algo/L1Utils.h         |   7 +-
 reco/L1/vectors/PSEUDO_F32vec4.h | 169 ++++++++++++++++++++-----------
 6 files changed, 118 insertions(+), 78 deletions(-)

diff --git a/reco/L1/CbmL1Performance.cxx b/reco/L1/CbmL1Performance.cxx
index 5e5de1d3f5..09d171bcde 100644
--- a/reco/L1/CbmL1Performance.cxx
+++ b/reco/L1/CbmL1Performance.cxx
@@ -386,7 +386,7 @@ void CbmL1::EfficienciesPerformance()
       if (mtra.p > CbmL1Constants::MinRefMom) {  // reference tracks
         ntra.Inc(reco, killed, ratio_length, ratio_fakes, nclones, mc_length, mc_length_hits, "fast");
 
-        if (mtra.IsPrimary()) {                // reference primary
+        if (mtra.IsPrimary()) {                  // reference primary
           if (mtra.NStations() == fNStations) {  // long reference primary
             ntra.Inc(reco, killed, ratio_length, ratio_fakes, nclones, mc_length, mc_length_hits, "long_fast_prim");
           }
@@ -1402,7 +1402,7 @@ void CbmL1::TrackFitPerformance()
         {  // extrapolate to vertex
           L1FieldRegion fld _fvecalignment;
           L1FieldValue B[3] _fvecalignment;
-          float z[3];
+          float z[3] = {0.f, 0.f, 0.f};
           for (unsigned int ih = 0; ih < 3; ih++) {
             if (ih >= mc.Points.size()) continue;  //If nofMCPoints in track < 3
             const int iMCP      = mc.Points[ih];
@@ -2233,9 +2233,9 @@ void CbmL1::InputPerformance()
 
 
       if (hm->GetNofLinks() == 0) continue;
-      Float_t bestWeight  = 0.f;
+      Float_t bestWeight = 0.f;
       //Float_t totalWeight = 0.f;
-      int iMCPoint        = -1;
+      int iMCPoint = -1;
       CbmLink link;
 
       for (int iLink = 0; iLink < hm->GetNofLinks(); iLink++) {
diff --git a/reco/L1/L1Algo/L1ClonesMerger.h b/reco/L1/L1Algo/L1ClonesMerger.h
index 9b76aefdfa..072d13c0f6 100644
--- a/reco/L1/L1Algo/L1ClonesMerger.h
+++ b/reco/L1/L1Algo/L1ClonesMerger.h
@@ -10,9 +10,9 @@
 #ifndef L1ClonesMerger_h
 #define L1ClonesMerger_h 1
 
+#include "L1Def.h"
 #include "L1Hit.h"  // For L1HitIndex_t
 #include "L1Vector.h"
-#include "vectors/P4_F32vec4.h"
 
 class L1Track;
 class L1Algo;
diff --git a/reco/L1/L1Algo/L1Constants.h b/reco/L1/L1Algo/L1Constants.h
index fbfff4e2f8..c2ae4b477c 100644
--- a/reco/L1/L1Algo/L1Constants.h
+++ b/reco/L1/L1Algo/L1Constants.h
@@ -13,7 +13,6 @@
 #define L1Constants_h 1
 
 #include "L1NaN.h"
-#include "vectors/P4_F32vec4.h"  // for fvecLen
 
 /// Namespace contains compile-time constants definition for the L1 tracking algorithm
 namespace L1Constants
diff --git a/reco/L1/L1Algo/L1NaN.h b/reco/L1/L1Algo/L1NaN.h
index 91e3b8e580..a83807d8f0 100644
--- a/reco/L1/L1Algo/L1NaN.h
+++ b/reco/L1/L1Algo/L1NaN.h
@@ -12,17 +12,12 @@
 #ifndef L1NaN_h
 #define L1NaN_h 1
 
-#include <type_traits>
-
 #include <limits>
+#include <type_traits>
 
 #include <cmath>
 
-#if defined(__CLING__) && defined(__arm64__)
-#include "vectors/PSEUDO_F32vec4.h"
-#else
-#include "vectors/P4_F32vec4.h"
-#endif
+#include "L1Def.h"
 
 /// Namespace L1NaN defines functions to set variables to NaN and check wether they are NaN or not
 ///
diff --git a/reco/L1/L1Algo/L1Utils.h b/reco/L1/L1Algo/L1Utils.h
index a813f52a36..7b053e5065 100644
--- a/reco/L1/L1Algo/L1Utils.h
+++ b/reco/L1/L1Algo/L1Utils.h
@@ -10,7 +10,6 @@
 #ifndef L1Utils_h
 #define L1Utils_h 1
 
-
 #include <FairLogger.h>
 
 #include <chrono>
@@ -24,11 +23,7 @@
 
 #include <cmath>
 
-#if defined(__CLING__) && defined(__arm64__)
-#include "vectors/PSEUDO_F32vec4.h"
-#else
-#include "vectors/P4_F32vec4.h"
-#endif
+#include "L1Def.h"
 
 /// Class contains some utility functions for L1Algo
 namespace L1Utils
diff --git a/reco/L1/vectors/PSEUDO_F32vec4.h b/reco/L1/vectors/PSEUDO_F32vec4.h
index 47b66abeb5..fa985d9e60 100644
--- a/reco/L1/vectors/PSEUDO_F32vec4.h
+++ b/reco/L1/vectors/PSEUDO_F32vec4.h
@@ -5,6 +5,7 @@
 #ifndef L1Algo_PSEUDO_F32vec4_H
 #define L1Algo_PSEUDO_F32vec4_H
 
+#include <iomanip>
 #include <iostream>
 
 #include <cmath>
@@ -25,16 +26,31 @@ float rsqrt(float x);
 float rcp(float x);
 float sgn(float x);
 
-class F32vec4 {
+
+union F32scalUnion {
+  float f;
+  int i;
+  F32scalUnion() : f(0.f) {}
+  F32scalUnion(int j) : i(j) {}
+  F32scalUnion(float g) : f(g) {}
+};
+
+//const F32scalUnion __f32scal_abs_mask {static_cast<int>(0x7fffffff)};
+//const F32scalUnion __f32scal_sgn_mask {static_cast<int>(0x80000000)};
+const F32scalUnion __f32scal_true {static_cast<int>(0xFFFFFFFF)};
+const F32scalUnion __f32scal_false {static_cast<int>(0x00000000)};
 
 
+class F32vec4 {
+
 public:
-  float v[4];
+  F32scalUnion v[4];
+
+  float& operator[](int i) { return v[i].f; }
+  float operator[](int i) const { return v[i].f; }
 
-  float& operator[](int i) { return ((float*) &v)[i]; }
-  float operator[](int i) const { return ((float*) &v)[i]; }
+  F32vec4() : F32vec4(0.f) {}
 
-  F32vec4() {}
   F32vec4(const F32vec4& a)
   {
     v[0] = a.v[0];
@@ -42,45 +58,64 @@ public:
     v[2] = a.v[2];
     v[3] = a.v[3];
   }
-  F32vec4(const float& a)
+
+  F32vec4(float a)
   {
-    v[0] = a;
-    v[1] = a;
-    v[2] = a;
-    v[3] = a;
+    v[0].f = a;
+    v[1].f = a;
+    v[2].f = a;
+    v[3].f = a;
   }
 
-  F32vec4(const float& f0, const float& f1, const float& f2, const float& f3)
+  F32vec4(float f0, float f1, float f2, float f3)
   {
-    v[0] = f0;
-    v[1] = f1;
-    v[2] = f2;
-    v[3] = f3;
+    v[0].f = f0;
+    v[1].f = f1;
+    v[2].f = f2;
+    v[3].f = f3;
   }
 
+#define _f1(A, F)                                                                                                      \
+  F32vec4 z;                                                                                                           \
+  z[0] = F(A[0]);                                                                                                      \
+  z[1] = F(A[1]);                                                                                                      \
+  z[2] = F(A[2]);                                                                                                      \
+  z[3] = F(A[3]);                                                                                                      \
+  return z;
 
 #define _f2(A, B, F)                                                                                                   \
   F32vec4 z;                                                                                                           \
-  z.v[0] = F(A.v[0], B.v[0]);                                                                                          \
-  z.v[1] = F(A.v[1], B.v[1]);                                                                                          \
-  z.v[2] = F(A.v[2], B.v[2]);                                                                                          \
-  z.v[3] = F(A.v[3], B.v[3]);                                                                                          \
+  z[0] = F(A[0], B[0]);                                                                                                \
+  z[1] = F(A[1], B[1]);                                                                                                \
+  z[2] = F(A[2], B[2]);                                                                                                \
+  z[3] = F(A[3], B[3]);                                                                                                \
   return z;
-#define _f1(A, F)                                                                                                      \
+
+#define _op(A, B, F)                                                                                                   \
+  F32vec4 z;                                                                                                           \
+  z[0] = (A[0] F B[0]);                                                                                                \
+  z[1] = (A[1] F B[1]);                                                                                                \
+  z[2] = (A[2] F B[2]);                                                                                                \
+  z[3] = (A[3] F B[3]);                                                                                                \
+  return z;
+
+#define _opBit(A, B, F)                                                                                                \
   F32vec4 z;                                                                                                           \
-  z.v[0] = F(A.v[0]);                                                                                                  \
-  z.v[1] = F(A.v[1]);                                                                                                  \
-  z.v[2] = F(A.v[2]);                                                                                                  \
-  z.v[3] = F(A.v[3]);                                                                                                  \
+  z.v[0].i = (A.v[0].i F B.v[0].i);                                                                                    \
+  z.v[1].i = (A.v[1].i F B.v[1].i);                                                                                    \
+  z.v[2].i = (A.v[2].i F B.v[2].i);                                                                                    \
+  z.v[3].i = (A.v[3].i F B.v[3].i);                                                                                    \
   return z;
-#define _op(A, B, O)                                                                                                   \
+
+#define _opComp(A, B, F)                                                                                               \
   F32vec4 z;                                                                                                           \
-  z.v[0] = A.v[0] O B.v[0];                                                                                            \
-  z.v[1] = A.v[1] O B.v[1];                                                                                            \
-  z.v[2] = A.v[2] O B.v[2];                                                                                            \
-  z.v[3] = A.v[3] O B.v[3];                                                                                            \
+  z[0] = (A[0] F B[0]) ? __f32scal_true.f : __f32scal_false.f;                                                         \
+  z[1] = (A[1] F B[1]) ? __f32scal_true.f : __f32scal_false.f;                                                         \
+  z[2] = (A[2] F B[2]) ? __f32scal_true.f : __f32scal_false.f;                                                         \
+  z[3] = (A[3] F B[3]) ? __f32scal_true.f : __f32scal_false.f;                                                         \
   return z;
 
+
   /* Arithmetic Operators */
   friend F32vec4 operator+(const F32vec4& a, const F32vec4& b) { _op(a, b, +) }
   friend F32vec4 operator-(const F32vec4& a, const F32vec4& b) { _op(a, b, -) }
@@ -88,42 +123,46 @@ public:
   friend F32vec4 operator/(const F32vec4& a, const F32vec4& b) { _op(a, b, /) }
 
   /* Comparison */
-  friend F32vec4 operator<(const F32vec4& a, const F32vec4& b) { _op(a, b, <) }
-  friend F32vec4 operator<=(const F32vec4& a, const F32vec4& b) { _op(a, b, <=) }
-  friend F32vec4 operator>(const F32vec4& a, const F32vec4& b) { _op(a, b, >) }
-  friend F32vec4 operator>=(const F32vec4& a, const F32vec4& b) { _op(a, b, >=) }
+  friend F32vec4 operator<(const F32vec4& a, const F32vec4& b) { _opComp(a, b, <) }
+  friend F32vec4 operator<=(const F32vec4& a, const F32vec4& b) { _opComp(a, b, <=) }
+  friend F32vec4 operator>(const F32vec4& a, const F32vec4& b) { _opComp(a, b, >) }
+  friend F32vec4 operator>=(const F32vec4& a, const F32vec4& b) { _opComp(a, b, >=) }
+  friend F32vec4 operator==(const F32vec4& a, const F32vec4& b) { _opComp(a, b, ==) }
 
   /* Logic */
-  friend F32vec4 operator&(const F32vec4& a, const F32vec4& b) { _op(a, b, &&) }
-  friend F32vec4 operator|(const F32vec4& a, const F32vec4& b) { _op(a, b, ||) }
-  friend F32vec4 operator||(const F32vec4& a, const F32vec4& b) { _op(a, b, ||) }
+  friend F32vec4 operator&(const F32vec4& a, const F32vec4& b) { _opBit(a, b, &) }
+  friend F32vec4 operator|(const F32vec4& a, const F32vec4& b) { _opBit(a, b, |) }
+  friend F32vec4 operator^(const F32vec4& a, const F32vec4& b) { _opBit(a, b, ^) }
 
   friend F32vec4 operator!(const F32vec4& a)
   {
     F32vec4 z;
-    z[0] = !a[0];
-    z[1] = !a[1];
-    z[2] = !a[2];
-    z[3] = !a[3];
-
+    z.v[0].i = ~a.v[0].i;
+    z.v[1].i = ~a.v[1].i;
+    z.v[2].i = ~a.v[2].i;
+    z.v[3].i = ~a.v[3].i;
     return z;
   }
 
+  friend F32vec4 if3(F32vec4 a, F32vec4 b, F32vec4 c) { return ((a) & (b)) | ((!(a)) & (c)); }  // analog (a) ? b : c
+
+  /*
   friend F32vec4 if3(const F32vec4& a, const F32vec4& b, const F32vec4& c)
   {
     F32vec4 z;
-    z[0] = (a[0]) ? b[0] : c[0];
-    z[1] = (a[1]) ? b[1] : c[1];
-    z[2] = (a[2]) ? b[2] : c[2];
-    z[3] = (a[3]) ? b[3] : c[3];
-
+    z[0] = (a.v[0].i) ? b[0] : c[0];
+    z[1] = (a.v[1].i) ? b[1] : c[1];
+    z[2] = (a.v[2].i) ? b[2] : c[2];
+    z[3] = (a.v[3].i) ? b[3] : c[3];
     return z;
-  }
+  }*/
+
+  friend bool NotEmptyFvec(F32vec4 a) { return bool(a.v[0].i) || bool(a.v[1].i) || bool(a.v[2].i) || bool(a.v[3].i); }
+  friend bool EmptyFvec(F32vec4 a) { return !NotEmptyFvec(a); }
 
-#define NotEmptyFvec(a) bool((a)[0]) | bool((a)[1]) | bool((a)[2]) | bool((a)[3])
-#define EmptyFvec(a) !(bool((a)[0]) | bool((a)[1]) | bool((a)[2]) | bool((a)[3]))
   // bool NotEmpty(const F32vec4 &a) { return a[0]||a[1]||a[2]||a[3]; }
   // bool    Empty(const F32vec4 &a) { return !(a[0]||a[1]||a[2]||a[3]); } // optimize
+
   friend F32vec4 bool2int(const F32vec4& a)
   {  // mask returned
     return if3(a, 1, 0);
@@ -133,10 +172,10 @@ public:
   /* Functions */
   friend float min(float x, float y) { return x < y ? x : y; }
   friend float max(float x, float y) { return x < y ? y : x; }
-  friend float asgnb(float x, float y) { return y >= 0 ? fabs(x) : -fabs(x); }
+  friend float asgnb(float x, float y) { return y >= 0.f ? fabs(x) : -fabs(x); }
   friend float rsqrt(float x) { return 1. / sqrt(x); }
   friend float rcp(float x) { return 1. / x; }
-  friend float sgn(float x) { return x >= 0 ? 1 : -1; }
+  friend float sgn(float x) { return x >= 0.f ? 1.f : -1.f; }
 
   friend F32vec4 min(const F32vec4& a, const F32vec4& b) { _f2(a, b, min) }
   friend F32vec4 max(const F32vec4& a, const F32vec4& b) { _f2(a, b, max) }
@@ -153,18 +192,25 @@ public:
 #undef _f1
 #undef _f2
 #undef _op
+#undef _opBit
+#undef _opComp
 
   /* Define all operators for consistensy */
 
   vec_arithmetic(F32vec4, float);
 
-  friend ostream& operator<<(ostream& strm, const F32vec4& a)
+  friend std::ostream& operator<<(std::ostream& strm, const F32vec4& a)
   {
-    strm << a[0] << " " << a[1] << " " << a[2] << " " << a[3];
+    //strm << "[" << a[0] << " " << a[1] << " " << a[2] << " " << a[3] << "]";
+    strm << '[';
+    strm << std::setw(12) << std::setfill(' ') << a[0] << ' ';
+    strm << std::setw(12) << std::setfill(' ') << a[1] << ' ';
+    strm << std::setw(12) << std::setfill(' ') << a[2] << ' ';
+    strm << std::setw(12) << std::setfill(' ') << a[3] << ']';
     return strm;
   }
 
-  friend istream& operator>>(istream& strm, F32vec4& a)
+  friend std::istream& operator>>(std::istream& strm, F32vec4& a)
   {
     float tmp;
     strm >> tmp;
@@ -175,22 +221,27 @@ public:
   /// Checks, if all bands are equal
   /// NOTE: two values defined as signaling_NaN() are not equal, thus if there are all or one
   /// of the words are kNaN, the function returns false
-  bool IsHorizontallyEqual() const { return v[0] == v[1] && v[1] == v[2] && v[2] == v[3]; }
+  bool IsHorizontallyEqual() const { return (v[0].f == v[1].f) && (v[1].f == v[2].f) && (v[2].f == v[3].f); }
 
   /// Checks, if any of the bands is NaN
-  bool IsNanAny() const { return std::isnan(v[0]) || std::isnan(v[1]) || std::isnan(v[2]) || std::isnan(v[3]); }
+  bool IsNanAny() const { return std::isnan(v[0].f) || std::isnan(v[1].f) || std::isnan(v[2].f) || std::isnan(v[3].f); }
 
 
 } __attribute__((aligned(16)));
-;
+
 
 typedef F32vec4 fvec;
 typedef float fscal;
 const int fvecLen = 4;
-//#define fvec_true  _f32vec4_true
-//#define fvec_false _f32vec4_false
+
+const F32vec4 fvec_zero(0.f);
+const F32vec4 fvec_one(1.f);
+const F32vec4 fvec_true(__f32scal_true.f);
+const F32vec4 fvec_false(__f32scal_false.f);
+
 #define _fvecalignment
 
+
 namespace nsL1
 {
   template<typename T>
-- 
GitLab