diff --git a/reco/L1/L1Algo/L1Extrapolation.cxx b/reco/L1/L1Algo/L1Extrapolation.cxx index 58cc92e8ac03f52734aa0829a0bbd3e0ff3658e0..c6008523fb778f9932e3323117be967dad71054b 100644 --- a/reco/L1/L1Algo/L1Extrapolation.cxx +++ b/reco/L1/L1Algo/L1Extrapolation.cxx @@ -214,7 +214,7 @@ void L1ExtrapolateAnalytic(L1TrackPar& T, // input track parameters (x,y,tx,ty, const fvec ctdz2 = c_light * t * dz2; const fvec dqp = qp - qp0; - const fvec t2i = c1 * rcp(t2); // /t2; + const fvec t2i = c1 / t2; const fvec xt2i = x * t2i; const fvec yt2i = y * t2i; const fvec tmp0 = ht1SA1 + c2 * ht2SA2 + c3 * ht3SA3; diff --git a/reco/L1/L1Algo/L1Field.cxx b/reco/L1/L1Algo/L1Field.cxx index aa1766fd6a93e5d5cb21f974e4d5b1f0c72fd52b..15d8df849d968565d11753bcba90274613d1381c 100644 --- a/reco/L1/L1Algo/L1Field.cxx +++ b/reco/L1/L1Algo/L1Field.cxx @@ -218,7 +218,7 @@ void L1FieldRegion::Set(const L1FieldValue& b0, const fvec b0z, const L1FieldVal z0 = b0z; fvec dz1 = b1z - b0z; fvec dz2 = b2z - b0z; - fvec det = rcp(dz1 * dz2 * (b2z - b1z)); + fvec det = fvec::One() / (dz1 * dz2 * (b2z - b1z)); fvec w21 = -dz2 * det; fvec w22 = dz1 * det; @@ -249,7 +249,7 @@ void L1FieldRegion::Set(const L1FieldValue& b0, const fvec b0z, const L1FieldVal void L1FieldRegion::Set(const L1FieldValue& b0, const fvec b0z, const L1FieldValue& b1, const fvec b1z) { z0 = b0z; - fvec dzi = rcp(b1z - b0z); + fvec dzi = fvec::One() / (b1z - b0z); cx0 = b0.x; cy0 = b0.y; cz0 = b0.z; diff --git a/reco/L1/L1Algo/L1TrackFitter.cxx b/reco/L1/L1Algo/L1TrackFitter.cxx index ec45faa181d97558f972a72d648e8a9d1e7b5b76..1bee3607e0d68987252604ff51636729d389d8ee 100644 --- a/reco/L1/L1Algo/L1TrackFitter.cxx +++ b/reco/L1/L1Algo/L1TrackFitter.cxx @@ -1459,7 +1459,7 @@ void L1Algo::GuessVec(L1TrackPar& t, fvec* xV, fvec* yV, fvec* zV, fvec* Sy, fve fvec A2A5 = A2 * A5; fvec A4A4 = A4 * A4; - fvec det = rcp(-A2 * A3A3 + A1 * (A3A4 + A3A4 - A1A5) + A0 * (A2A5 - A4A4)); + fvec det = fvec::One() / (-A2 * A3A3 + A1 * (A3A4 + A3A4 - A1A5) + A0 * (A2A5 - A4A4)); fvec Ai0 = (-A4A4 + A2A5); fvec Ai1 = (A3A4 - A1A5); fvec Ai2 = (-A3A3 + A0 * A5); @@ -1471,16 +1471,16 @@ void L1Algo::GuessVec(L1TrackPar& t, fvec* xV, fvec* yV, fvec* zV, fvec* Sy, fve t.x = (Ai0 * a0 + Ai1 * a1 + Ai3 * a2) * det; t.tx = (Ai1 * a0 + Ai2 * a1 + Ai4 * a2) * det; fvec txtx1 = 1. + t.tx * t.tx; - L = (Ai3 * a0 + Ai4 * a1 + Ai5 * a2) * det * rcp(txtx1); + L = (Ai3 * a0 + Ai4 * a1 + Ai5 * a2) * det / txtx1; L1 = L * t.tx; A1 = A1 + A3 * L1; A2 = A2 + (A4 + A4 + A5 * L1) * L1; b1 += b2 * L1; - det = rcp(-A1 * A1 + A0 * A2); + det = fvec::One() / (-A1 * A1 + A0 * A2); t.y = (A2 * b0 - A1 * b1) * det; t.ty = (-A1 * b0 + A0 * b1) * det; - t.qp = -L * c_light_i * rsqrt(txtx1 + t.ty * t.ty); + t.qp = -L * c_light_i / sqrt(txtx1 + t.ty * t.ty); t.z = z0; } @@ -1534,7 +1534,7 @@ void L1Algo::GuessVec(L1TrackParFit& t, fvec* xV, fvec* yV, fvec* zV, fvec* Sy, fvec A2A5 = A2 * A5; fvec A4A4 = A4 * A4; - fvec det = rcp(-A2 * A3A3 + A1 * (A3A4 + A3A4 - A1A5) + A0 * (A2A5 - A4A4)); + fvec det = fvec::One() / (-A2 * A3A3 + A1 * (A3A4 + A3A4 - A1A5) + A0 * (A2A5 - A4A4)); fvec Ai0 = (-A4A4 + A2A5); fvec Ai1 = (A3A4 - A1A5); fvec Ai2 = (-A3A3 + A0 * A5); @@ -1546,16 +1546,16 @@ void L1Algo::GuessVec(L1TrackParFit& t, fvec* xV, fvec* yV, fvec* zV, fvec* Sy, t.fx = (Ai0 * a0 + Ai1 * a1 + Ai3 * a2) * det; t.ftx = (Ai1 * a0 + Ai2 * a1 + Ai4 * a2) * det; fvec txtx1 = 1. + t.ftx * t.ftx; - L = (Ai3 * a0 + Ai4 * a1 + Ai5 * a2) * det * rcp(txtx1); + L = (Ai3 * a0 + Ai4 * a1 + Ai5 * a2) * det / txtx1; L1 = L * t.ftx; A1 = A1 + A3 * L1; A2 = A2 + (A4 + A4 + A5 * L1) * L1; b1 += b2 * L1; - det = rcp(-A1 * A1 + A0 * A2); + det = fvec::One() / (-A1 * A1 + A0 * A2); t.fy = (A2 * b0 - A1 * b1) * det; t.fty = (-A1 * b0 + A0 * b1) * det; - t.fqp = -L * c_light_i * rsqrt(txtx1 + t.fty * t.fty); + t.fqp = -L * c_light_i / sqrt(txtx1 + t.fty * t.fty); if (timeV) { t.ft = masked(time, nhits > 0); } t.fz = z0; diff --git a/reco/L1/vectors/P4_F32vec4.h b/reco/L1/vectors/P4_F32vec4.h index a18457bd06705b51c93b21ddf6aecda5216d66d7..b1ac7fce338f30127791944673a8d03a1754fb7d 100644 --- a/reco/L1/vectors/P4_F32vec4.h +++ b/reco/L1/vectors/P4_F32vec4.h @@ -75,28 +75,13 @@ public: /* Square Root */ friend F32vec4 sqrt(const F32vec4& a) { return _mm_sqrt_ps(a); } - /* Reciprocal( inverse) Square Root */ - friend F32vec4 rsqrt(const F32vec4& a) { return _mm_rsqrt_ps(a); } - - /* Reciprocal (inversion) */ - // friend F32vec4 rcp ( const F32vec4 &a ){ return _mm_rcp_ps (a); } - /* Reciprocal (inversion) */ - //friend F32vec4 rcp ( const F32vec4 &a ){ return 1. / a; } - /* NewtonRaphson Reciprocal - [2 * rcpps(x) - (x * rcpps(x) * rcpps(x))] */ - friend F32vec4 rcp(const F32vec4& a) - { - F32vec4 Ra0 = _mm_rcp_ps(a); - return _mm_sub_ps(_mm_add_ps(Ra0, Ra0), _mm_mul_ps(_mm_mul_ps(Ra0, a), Ra0)); - } - /* Absolute value */ - friend F32vec4 fabs(const F32vec4& a) { return _mm_and_ps(a, _f32vec4_abs_mask); } + friend F32vec4 abs(const F32vec4& a) { return _mm_and_ps(a, _f32vec4_abs_mask); } /* Sign */ - friend F32vec4 sgn(const F32vec4& a) { return _mm_or_ps(_mm_and_ps(a, _f32vec4_sgn_mask), _f32vec4_one); } - friend F32vec4 asgnb(const F32vec4& a, const F32vec4& b) { return _mm_or_ps(_mm_and_ps(b, _f32vec4_sgn_mask), a); } + //friend F32vec4 sgn(const F32vec4& a) { return _mm_or_ps(_mm_and_ps(a, _f32vec4_sgn_mask), _f32vec4_one); } + //friend F32vec4 asgnb(const F32vec4& a, const F32vec4& b) { return _mm_or_ps(_mm_and_ps(b, _f32vec4_sgn_mask), a); } /* Logical */ @@ -150,11 +135,6 @@ public: static F32vec4 MaskOne() { return _f32vec4_true; } static F32vec4 MaskZero() { return _f32vec4_false; } -#define NotEmptyFvec(a) bool((a)[0]) | bool((a)[1]) | bool((a)[2]) | bool((a)[3]) -#define EmptyFvec(a) !(bool((a)[0]) | bool((a)[1]) | bool((a)[2]) | bool((a)[3])) - // bool NotEmpty(const F32vec4 &a) { return a[0]||a[1]||a[2]||a[3]; } - // bool Empty(const F32vec4 &a) { return !(a[0]||a[1]||a[2]||a[3]); } // optimize - /* Define all operators for consistensy */ vec_arithmetic(F32vec4, float); @@ -171,45 +151,6 @@ public: #undef _f1 - friend F32vec4 atan2(const F32vec4& y, const F32vec4& x) - { - const F32vec4 pi(3.1415926535897932); - const F32vec4 pi_2 = pi / 2; - const F32vec4 zero(0); - - const F32vec4& xZero = F32vec4(x == zero); - const F32vec4& yZero = F32vec4(y == zero); - const F32vec4& xNeg = F32vec4(x < zero); - const F32vec4& yNeg = F32vec4(y < zero); - - const F32vec4& absX = fabs(x); - const F32vec4& absY = fabs(y); - - F32vec4 a = absY / absX; - const F32vec4 pi_4 = pi / 4; - const F32vec4& gt_tan_3pi_8 = F32vec4(a > F32vec4(2.414213562373095)); - const F32vec4& gt_tan_pi_8 = F32vec4(a > F32vec4(0.4142135623730950)) & F32vec4(!gt_tan_3pi_8); - const F32vec4 minusOne(-1); - F32vec4 b(zero); - b = (pi_2 & gt_tan_3pi_8) + (F32vec4(!gt_tan_3pi_8) & b); - b = (pi_4 & gt_tan_pi_8) + (F32vec4(!gt_tan_pi_8) & b); - a = (gt_tan_3pi_8 & (minusOne / a)) + (F32vec4(!gt_tan_3pi_8) & a); - a = (gt_tan_pi_8 & ((absY - absX) / (absY + absX))) + (F32vec4(!gt_tan_pi_8) & a); - const F32vec4& a2 = a * a; - b += (((8.05374449538e-2 * a2 - 1.38776856032E-1) * a2 + 1.99777106478E-1) * a2 - 3.33329491539E-1) * a2 * a + a; - F32vec4 xyNeg = F32vec4(xNeg ^ yNeg); - b = (xyNeg & (-b)) + (F32vec4(!xyNeg) & b); - xyNeg = F32vec4(xNeg & !yNeg); - b = (xyNeg & (b + pi)) + (F32vec4(!xyNeg) & b); - xyNeg = F32vec4(xNeg & yNeg); - b = (xyNeg & (b - pi)) + (F32vec4(!xyNeg) & b); - xyNeg = F32vec4(xZero & yZero); - b = (xyNeg & zero) + (F32vec4(!xyNeg) & b); - xyNeg = F32vec4(xZero & yNeg); - b = (xyNeg & (-pi_2)) + (F32vec4(!xyNeg) & b); - return b; - } - friend std::ostream& operator<<(std::ostream& strm, const F32vec4& a) { //strm << "[" << a[0] << " " << a[1] << " " << a[2] << " " << a[3] << "]"; @@ -239,6 +180,8 @@ const int fvecLen = 4; //#define fvec_false _f32vec4_false #define _fvecalignment __attribute__((aligned(16))) +inline fvec fabs(const fvec& a) { return abs(a); } + inline fvec if3(const fvec& mask, const fvec& b, const fvec& c) { // return (a ?b :c); @@ -275,6 +218,10 @@ inline bool IsNanAny(const fvec& v) return ret; } +inline bool NotEmptyFvec(const F32vec4& a) { return bool(a[0]) || bool(a[1]) || bool(a[2]) || bool(a[3]); } + +inline bool EmptyFvec(const F32vec4& a) { return !(bool(a[0]) || bool(a[1]) || bool(a[2]) || bool(a[3])); } + #include "std_alloc.h"