diff --git a/algo/ca/core/utils/CaUtils.cxx b/algo/ca/core/utils/CaUtils.cxx
index 5b75ffb3b4ddaf99fbd1ee505cf3099648c49694..65844ebcbb0304a6651f6baf74f66fe371190186 100644
--- a/algo/ca/core/utils/CaUtils.cxx
+++ b/algo/ca/core/utils/CaUtils.cxx
@@ -40,3 +40,330 @@ namespace cbm::algo::ca::utils
     }
   }
 }  // namespace cbm::algo::ca::utils
+
+
+namespace cbm::algo::ca::utils::math
+{
+
+  void CholeskyFactorization(const double a[], const int n, int nn, double u[], int* nullty, int* ifault)
+  {
+    //
+    //  Purpose:
+    //
+    //    CHOLESKY computes the Cholesky factorization of a PDS matrix.
+    //
+    //  Discussion:
+    //
+    //    For a positive definite symmetric matrix A, the Cholesky factor U
+    //    is an upper triangular matrix such that A = U' * U.
+    //
+    //    This routine was originally named "CHOL", but that conflicted with
+    //    a built in MATLAB routine name.
+    //
+    //    The missing initialization "II = 0" has been added to the code.
+    //
+    //  Licensing:
+    //
+    //    This code is distributed under the GNU LGPL license.
+    //
+    //  Modified:
+    //
+    //    12 February 2008
+    //
+    //  Author:
+    //
+    //    Original FORTRAN77 version by Michael Healy.
+    //    Modifications by AJ Miller.
+    //    C++ version by John Burkardt.
+    //
+    //  Reference:
+    //
+    //    Michael Healy,
+    //    Algorithm AS 6:
+    //    Triangular decomposition of a symmetric matrix,
+    //    Applied Statistics,
+    //    Volume 17, Number 2, 1968, pages 195-197.
+    //
+    //  Parameters:
+    //
+    //    Input, double A((N*(N+1))/2), a positive definite matrix
+    //    stored by rows in lower triangular form as a one dimensional array,
+    //    in the sequence
+    //    A(1,1),
+    //    A(2,1), A(2,2),
+    //    A(3,1), A(3,2), A(3,3), and so on.
+    //
+    //    Input, int N, the order of A.
+    //
+    //    Input, int NN, the dimension of the array used to store A,
+    //    which should be at least (N*(N+1))/2.
+    //
+    //    Output, double U((N*(N+1))/2), an upper triangular matrix,
+    //    stored by columns, which is the Cholesky factor of A.  The program is
+    //    written in such a way that A and U can share storage.
+    //
+    //    Output, int NULLTY, the rank deficiency of A.  If NULLTY is zero,
+    //    the matrix is judged to have full rank.
+    //
+    //    Output, int IFAULT, an error indicator.
+    //    0, no error was detected;
+    //    1, if N < 1;
+    //    2, if A is not positive semi-definite.
+    //    3, if NN < (N*(N+1))/2.
+    //
+    //  Local Parameters:
+    //
+    //    Local, double ETA, should be set equal to the smallest positive
+    //    value such that 1.0 + ETA is calculated as being greater than 1.0 in the
+    //    accuracy being used.
+    //
+
+    double eta = 1.0E-09;
+    int i;
+    int icol;
+    int ii;
+    int irow;
+    int j;
+    int k;
+    int kk;
+    int l;
+    int m;
+    double w;
+    double x;
+
+    *ifault = 0;
+    *nullty = 0;
+
+    if (n <= 0) {
+      *ifault = 1;
+      return;
+    }
+
+    if (nn < (n * (n + 1)) / 2) {
+      *ifault = 3;
+      return;
+    }
+
+    j  = 1;
+    k  = 0;
+    ii = 0;
+    //
+    //  Factorize column by column, ICOL = column number.
+    //
+    for (icol = 1; icol <= n; icol++) {
+      ii = ii + icol;
+      x  = eta * eta * a[ii - 1];
+      l  = 0;
+      kk = 0;
+      //
+      //  IROW = row number within column ICOL.
+      //
+      for (irow = 1; irow <= icol; irow++) {
+        kk = kk + irow;
+        k  = k + 1;
+        w  = a[k - 1];
+        m  = j;
+
+        for (i = 1; i < irow; i++) {
+          l = l + 1;
+          w = w - u[l - 1] * u[m - 1];
+          m = m + 1;
+        }
+
+        l = l + 1;
+
+        if (irow == icol) {
+          break;
+        }
+
+        if (u[l - 1] != 0.0) {
+          u[k - 1] = w / u[l - 1];
+        }
+        else {
+          u[k - 1] = 0.0;
+
+          if (fabs(x * a[k - 1]) < w * w) {
+            *ifault = 2;
+            return;
+          }
+        }
+      }
+      //
+      //  End of row, estimate relative accuracy of diagonal element.
+      //
+      if (fabs(w) <= fabs(eta * a[k - 1])) {
+        u[k - 1] = 0.0;
+        *nullty  = *nullty + 1;
+      }
+      else {
+        if (w < 0.0) {
+          *ifault = 2;
+          return;
+        }
+        u[k - 1] = sqrt(w);
+      }
+      j = j + icol;
+    }
+
+  }  // CholeskyFactorization
+
+
+  void SymInv(const double a[], const int n, double c[], double w[], int* nullty, int* ifault)
+  {
+    //
+    //  Purpose:
+    //
+    //    SYMINV computes the inverse of a symmetric matrix.
+    //
+    //  Licensing:
+    //
+    //    This code is distributed under the GNU LGPL license.
+    //
+    //  Modified:
+    //
+    //    11 February 2008
+    //
+    //  Author:
+    //
+    //    Original FORTRAN77 version by Michael Healy.
+    //    C++ version by John Burkardt.
+    //
+    //  Reference:
+    //
+    //    Michael Healy,
+    //    Algorithm AS 7:
+    //    Inversion of a Positive Semi-Definite Symmetric Matrix,
+    //    Applied Statistics,
+    //    Volume 17, Number 2, 1968, pages 198-199.
+    //
+    //  Parameters:
+    //
+    //    Input, double A((N*(N+1))/2), a positive definite matrix stored
+    //    by rows in lower triangular form as a one dimensional array, in the sequence
+    //    A(1,1),
+    //    A(2,1), A(2,2),
+    //    A(3,1), A(3,2), A(3,3), and so on.
+    //
+    //    Input, int N, the order of A.
+    //
+    //    Output, double C((N*(N+1))/2), the inverse of A, or generalized
+    //    inverse if A is singular, stored using the same storage scheme employed
+    //    for A.  The program is written in such a way that A and U can share storage.
+    //
+    //    Workspace, double W(N).
+    //
+    //    Output, int *NULLTY, the rank deficiency of A.  If NULLTY is zero,
+    //    the matrix is judged to have full rank.
+    //
+    //    Output, int *IFAULT, error indicator.
+    //    0, no error detected.
+    //    1, N < 1.
+    //    2, A is not positive semi-definite.
+    //
+
+    int i;
+    int icol;
+    int irow;
+    int j;
+    int jcol;
+    int k;
+    int l;
+    int mdiag;
+    int ndiag;
+    int nn;
+    int nrow;
+    double x;
+
+    *ifault = 0;
+
+    if (n <= 0) {
+      *ifault = 1;
+      return;
+    }
+
+    nrow = n;
+    //
+    //  Compute the Cholesky factorization of A.
+    //  The result is stored in C.
+    //
+    nn = (n * (n + 1)) / 2;
+
+    CholeskyFactorization(a, n, nn, c, nullty, ifault);
+
+    if (*ifault != 0) {
+      return;
+    }
+    //
+    //  Invert C and form the product (Cinv)' * Cinv, where Cinv is the inverse
+    //  of C, row by row starting with the last row.
+    //  IROW = the row number,
+    //  NDIAG = location of last element in the row.
+    //
+    irow  = nrow;
+    ndiag = nn;
+    //
+    //  Special case, zero diagonal element.
+    //
+    for (;;) {
+      if (c[ndiag - 1] == 0.0) {
+        l = ndiag;
+        for (j = irow; j <= nrow; j++) {
+          c[l - 1] = 0.0;
+          l        = l + j;
+        }
+      }
+      else {
+        l = ndiag;
+        for (i = irow; i <= nrow; i++) {
+          w[i - 1] = c[l - 1];
+          l        = l + i;
+        }
+
+        icol  = nrow;
+        jcol  = nn;
+        mdiag = nn;
+
+        for (;;) {
+          l = jcol;
+
+          if (icol == irow) {
+            x = 1.0 / w[irow - 1];
+          }
+          else {
+            x = 0.0;
+          }
+
+          k = nrow;
+
+          while (irow < k) {
+            x = x - w[k - 1] * c[l - 1];
+            k = k - 1;
+            l = l - 1;
+
+            if (mdiag < l) {
+              l = l - k + 1;
+            }
+          }
+
+          c[l - 1] = x / w[irow - 1];
+
+          if (icol <= irow) {
+            break;
+          }
+          mdiag = mdiag - icol;
+          icol  = icol - 1;
+          jcol  = jcol - 1;
+        }
+      }
+
+      ndiag = ndiag - irow;
+      irow  = irow - 1;
+
+      if (irow <= 0) {
+        break;
+      }
+    }
+
+  }  // SymInv
+
+}  // namespace cbm::algo::ca::utils::math
diff --git a/algo/ca/core/utils/CaUtils.h b/algo/ca/core/utils/CaUtils.h
index d6f03ff314606b6938d3f3096df7b056233cdf80..1ebc42286bb4c91d8c70171e64ec961c340d66f6 100644
--- a/algo/ca/core/utils/CaUtils.h
+++ b/algo/ca/core/utils/CaUtils.h
@@ -320,3 +320,16 @@ namespace cbm::algo::ca::utils::simd
     out[i] = in;
   }
 }  // namespace cbm::algo::ca::utils::simd
+
+
+/// Namespace contains compile-time constants definition for SIMD operations
+/// in the CA tracking algorithm
+///
+namespace cbm::algo::ca::utils::math
+{
+
+  void CholeskyFactorization(const double a[], const int n, int nn, double u[], int* nullty, int* ifault);
+
+  void SymInv(const double a[], const int n, double c[], double w[], int* nullty, int* ifault);
+
+}  // namespace cbm::algo::ca::utils::math
diff --git a/reco/KF/CbmKfTrackFitter.cxx b/reco/KF/CbmKfTrackFitter.cxx
index ecae947ba5b72dc86ebb45e671284577613125f2..6c163cfc4bb4d58494f8cd09d5788a75cbbaed12 100644
--- a/reco/KF/CbmKfTrackFitter.cxx
+++ b/reco/KF/CbmKfTrackFitter.cxx
@@ -473,7 +473,9 @@ void CbmKfTrackFitter::AddMaterialEffects(const CbmKfTrackFitter::Track& t, CbmK
   fvec msQp0 = t.fIsMsQp0Set ? t.fMsQp0 : tr.GetQp();
 
   fFit.MultipleScattering(n.fRadThick, msQp0);
-  fFit.EnergyLossCorrection(n.fRadThick, upstreamDirection ? fvec::One() : fvec::Zero());
+  if (!t.fIsMsQp0Set) {
+    fFit.EnergyLossCorrection(n.fRadThick, upstreamDirection ? fvec::One() : fvec::Zero());
+  }
 }
 
 
@@ -571,53 +573,95 @@ void CbmKfTrackFitter::FitTrack(CbmKfTrackFitter::Track& t)
 
 void CbmKfTrackFitter::Smooth(ca::TrackParamV& t1, const ca::TrackParamV& t2)
 {
-  // combine two tracks
-  std::tie(t1.X(), t1.Y(), t1.C00(), t1.C10(), t1.C11()) =
-    Smooth2D(t1.X(), t1.Y(), t1.C00(), t1.C10(), t1.C11(), t2.X(), t2.Y(), t2.C00(), t2.C10(), t2.C11());
-
-  std::tie(t1.Tx(), t1.Ty(), t1.C22(), t1.C32(), t1.C33()) =
-    Smooth2D(t1.Tx(), t1.Ty(), t1.C22(), t1.C32(), t1.C33(), t2.Tx(), t2.Ty(), t2.C22(), t2.C32(), t2.C33());
-
-  std::tie(t1.Qp(), t1.C44()) = Smooth1D(t1.Qp(), t1.C44(), t2.Qp(), t2.C44());
-
-  std::tie(t1.Time(), t1.Vi(), t1.C55(), t1.C65(), t1.C66()) =
-    Smooth2D(t1.Time(), t1.Vi(), t1.C55(), t1.C65(), t1.C66(), t2.Time(), t2.Vi(), t2.C55(), t2.C65(), t2.C66());
-
-  t1.C20() = 0.;
-  t1.C21() = 0.;
-  t1.C30() = 0.;
-  t1.C31() = 0.;
-  t1.C40() = 0.;
-  t1.C41() = 0.;
-  t1.C42() = 0.;
-  t1.C43() = 0.;
-  t1.C50() = 0.;
-  t1.C51() = 0.;
-  t1.C52() = 0.;
-  t1.C53() = 0.;
-  t1.C54() = 0.;
-  t1.C60() = 0.;
-  t1.C61() = 0.;
-  t1.C62() = 0.;
-  t1.C63() = 0.;
-  t1.C64() = 0.;
-}
+  // TODO: move to the CaTrackFit class
 
-std::tuple<ca::fvec, ca::fvec> CbmKfTrackFitter::Smooth1D(ca::fvec x1, ca::fvec Cxx1, ca::fvec x2, ca::fvec Cxx2)
-{
-  // combine two 1D values
-  ca::fvec x   = (x1 * Cxx1 + x2 * Cxx2) / (Cxx1 + Cxx2);
-  ca::fvec Cxx = Cxx1 * Cxx2 / (Cxx1 + Cxx2);
-  return std::tuple(x, Cxx);
-}
+  constexpr int nPar = ca::TrackParamV::kNtrackParam;
+  constexpr int nCov = ca::TrackParamV::kNcovParam;
 
-std::tuple<ca::fvec, ca::fvec, ca::fvec, ca::fvec, ca::fvec>
-CbmKfTrackFitter::Smooth2D(ca::fvec x1, ca::fvec y1, ca::fvec Cxx1, ca::fvec /*Cxy1*/, ca::fvec Cyy1, ca::fvec x2,
-                           ca::fvec y2, ca::fvec Cxx2, ca::fvec /*Cxy2*/, ca::fvec Cyy2)
-{
-  // combine two 2D values
-  // TODO: do it right
-  auto [x, Cxx] = Smooth1D(x1, Cxx1, x2, Cxx2);
-  auto [y, Cyy] = Smooth1D(y1, Cyy1, y2, Cyy2);
-  return std::tuple(x, y, Cxx, ca::fvec::Zero(), Cyy);
+  double r[nPar] = {t1.X()[0], t1.Y()[0], t1.Tx()[0], t1.Ty()[0], t1.Qp()[0], t1.Time()[0], t1.Vi()[0]};
+  double m[nPar] = {t2.X()[0], t2.Y()[0], t2.Tx()[0], t2.Ty()[0], t2.Qp()[0], t2.Time()[0], t2.Vi()[0]};
+
+  double S[nCov], S1[nCov], Tmp[nCov];
+  for (int i = 0; i < nCov; i++) {
+    S[i] = (t1.GetCovMatrix()[i] + t2.GetCovMatrix()[i])[0];
+  }
+
+  int nullty = 0;
+  int ifault = 0;
+  cbm::algo::ca::utils::math::SymInv(S, nPar, S1, Tmp, &nullty, &ifault);
+
+  assert(0 == nullty);
+  assert(0 == ifault);
+
+  double dzeta[nPar];
+
+  for (int i = 0; i < nPar; i++) {
+    dzeta[i] = m[i] - r[i];
+  }
+
+  double C[nPar][nPar];
+  double Si[nPar][nPar];
+  for (int i = 0, k = 0; i < nPar; i++) {
+    for (int j = 0; j <= i; j++, k++) {
+      C[i][j]  = t1.GetCovMatrix()[k][0];
+      Si[i][j] = S1[k];
+      C[j][i]  = C[i][j];
+      Si[j][i] = Si[i][j];
+    }
+  }
+
+  // K = C * S^{-1}
+  double K[nPar][nPar];
+  for (int i = 0; i < nPar; i++) {
+    for (int j = 0; j < nPar; j++) {
+      K[i][j] = 0.;
+      for (int k = 0; k < nPar; k++) {
+        K[i][j] += C[i][k] * Si[k][j];
+      }
+    }
+  }
+
+  for (int i = 0, k = 0; i < nPar; i++) {
+    for (int j = 0; j <= i; j++, k++) {
+      double kc = 0.;  // K*C[i][j]
+      for (int l = 0; l < nPar; l++) {
+        kc += K[i][l] * C[l][j];
+      }
+      t1.CovMatrix()[k] = t1.CovMatrix()[k][0] - kc;
+    }
+  }
+
+  for (int i = 0; i < nPar; i++) {
+    double kd = 0.;
+    for (int j = 0; j < nPar; j++) {
+      kd += K[i][j] * dzeta[j];
+    }
+    r[i] += kd;
+  }
+  t1.X()    = r[0];
+  t1.Y()    = r[1];
+  t1.Tx()   = r[2];
+  t1.Ty()   = r[3];
+  t1.Qp()   = r[4];
+  t1.Time() = r[5];
+  t1.Vi()   = r[6];
+
+  double chi2     = 0.;
+  double chi2Time = 0.;
+  for (int i = 0; i < nPar; i++) {
+    double SiDzeta = 0.;
+    for (int j = 0; j < nPar; j++) {
+      SiDzeta += Si[i][j] * dzeta[j];
+    }
+    if (i < 5) {
+      chi2 += dzeta[i] * SiDzeta;
+    }
+    else {
+      chi2Time += dzeta[i] * SiDzeta;
+    }
+  }
+  t1.SetChiSq(chi2 + t1.GetChiSq()[0] + t2.GetChiSq()[0]);
+  t1.SetChiSqTime(chi2Time + t1.GetChiSqTime()[0] + t2.GetChiSqTime()[0]);
+  t1.SetNdf(5 + t1.GetNdf()[0] + t2.GetNdf()[0]);
+  t1.SetNdfTime(2 + t1.GetNdfTime()[0] + t2.GetNdfTime()[0]);
 }
diff --git a/reco/KF/CbmKfTrackFitter.h b/reco/KF/CbmKfTrackFitter.h
index 58b618ad870a4f7066a01a9d800637e4219b7b01..fa1ec0e32f15eb9698aa639cf8791d7964db044d 100644
--- a/reco/KF/CbmKfTrackFitter.h
+++ b/reco/KF/CbmKfTrackFitter.h
@@ -126,12 +126,6 @@ class CbmKfTrackFitter {
   // combine two tracks
   void Smooth(ca::TrackParamV& t1, const ca::TrackParamV& t2);
 
-  std::tuple<ca::fvec, ca::fvec> Smooth1D(ca::fvec x1, ca::fvec Cxx1, ca::fvec x2, ca::fvec Cxx2);
-  std::tuple<ca::fvec, ca::fvec, ca::fvec, ca::fvec, ca::fvec> Smooth2D(ca::fvec x1, ca::fvec y1, ca::fvec Cxx1,
-                                                                        ca::fvec Cxy1, ca::fvec Cyy1, ca::fvec x2,
-                                                                        ca::fvec y2, ca::fvec Cxx2, ca::fvec Cxy2,
-                                                                        ca::fvec Cyy2);
-
  private:
   // input data arrays
   TClonesArray* fInputMvdHits{nullptr};