diff --git a/algo/data/sts/Hit.h b/algo/data/sts/Hit.h
index f310fff53ff7de92ec544b25627e679530afac77..0a33355535b615cca69a1f6bee867468295b3bd5 100644
--- a/algo/data/sts/Hit.h
+++ b/algo/data/sts/Hit.h
@@ -23,6 +23,9 @@ namespace cbm::algo::sts
     real fDu;         ///< Error of coordinate across front-side strips [cm]
     real fDv;         ///< Error of coordinate across back-side strips [cm]
 
+    u32 fFrontClusterId;  ///< Index of front-side cluster, used by tracking to reduce combinatorics
+    u32 fBackClusterId;   ///< Index of back-side cluster, used by tracking to reduce combinatorics
+
   private:  // serialization
     friend class boost::serialization::access;
 
@@ -40,6 +43,9 @@ namespace cbm::algo::sts
       ar& fTimeError;
       ar& fDu;
       ar& fDv;
+
+      ar& fFrontClusterId;
+      ar& fBackClusterId;
     }
   };
 
diff --git a/algo/detectors/sts/Hitfinder.cxx b/algo/detectors/sts/Hitfinder.cxx
index 986e81aabab3e8601c17fa7eb6a9300d1adcfe45..bee37563e8eedcd94be2a5032dd87b9937db7908 100644
--- a/algo/detectors/sts/Hitfinder.cxx
+++ b/algo/detectors/sts/Hitfinder.cxx
@@ -465,8 +465,8 @@ XPU_D void sts::Hitfinder::SortClusters(SortClusters::context& ctx) const
 
   for (int iModule = firstModule; iModule <= lastModule; iModule++) {
     size_t offset                = iModule * maxClustersPerModule;
-    GpuClusterIdx* clusterIdx    = &clusterIdxPerModule[offset];
-    GpuClusterIdx* clusterIdxTmp = &clusterIdxPerModuleTmp[offset];
+    ClusterIdx* clusterIdx       = &clusterIdxPerModule[offset];
+    ClusterIdx* clusterIdxTmp    = &clusterIdxPerModuleTmp[offset];
     int nClusters                = nClustersPerModule[iModule];
 
     // if (nClusters == 0 && ctx.thread_idx_x() == 0) {
@@ -479,7 +479,7 @@ XPU_D void sts::Hitfinder::SortClusters(SortClusters::context& ctx) const
     // }
 
     SortClusters::sort_t clsSort(ctx.pos(), ctx.smem());
-    clusterIdx = clsSort.sort(clusterIdx, nClusters, clusterIdxTmp, [](const GpuClusterIdx a) { return a.fTime; });
+    clusterIdx = clsSort.sort(clusterIdx, nClusters, clusterIdxTmp, [](const ClusterIdx a) { return a.fTime; });
 
     // if (ctx.thread_idx_x() == 0) {
     //   printf("Module %d: Finished sorting %d clusters\n", iModule, nClusters);
@@ -514,8 +514,8 @@ XPU_D void sts::Hitfinder::FindHits(FindHits::context& ctx) const
   int iModuleB               = iModuleF + nModules;
   size_t offsetF             = iModuleF * maxClustersPerModule;
   size_t offsetB             = iModuleB * maxClustersPerModule;
-  GpuClusterIdx* clusterIdxF = clusterIdxSortedPerModule[iModuleF];
-  GpuClusterIdx* clusterIdxB = clusterIdxSortedPerModule[iModuleB];
+  ClusterIdx* clusterIdxF    = clusterIdxSortedPerModule[iModuleF];
+  ClusterIdx* clusterIdxB    = clusterIdxSortedPerModule[iModuleB];
   sts::Cluster* clusterDataF = &clusterDataPerModule[offsetF];
   sts::Cluster* clusterDataB = &clusterDataPerModule[offsetB];
   int nClustersF             = nClustersPerModule[iModuleF];
@@ -578,13 +578,13 @@ XPU_D void sts::Hitfinder::FindHits(FindHits::context& ctx) const
   // Temporarily disable clang-format because it indents this block wrongly for some reason.
   // TODO: Code above needs cleanup. Then clang-format can be re-enabled too.
   // clang-format off
-    GpuClusterIdx clsIdxF = clusterIdxF[iClusterF];
+    ClusterIdx clsIdxF = clusterIdxF[iClusterF];
     sts::Cluster clsDataF = clusterDataF[clsIdxF.fIdx];
 
     float maxSigma = 4.f * xpu::sqrt(clsDataF.fTimeError * clsDataF.fTimeError + maxTerrB * maxTerrB);
 
     for (int iClusterB = startB; iClusterB < nClustersB; iClusterB++) {
-      GpuClusterIdx clsIdxB = clusterIdxB[iClusterB];
+      ClusterIdx clsIdxB = clusterIdxB[iClusterB];
       sts::Cluster clsDataB = clusterDataB[clsIdxB.fIdx];
 
       float timeDiff = float(clsIdxF.fTime) - float(clsIdxB.fTime);
@@ -611,14 +611,15 @@ XPU_D void sts::Hitfinder::FindHits(FindHits::context& ctx) const
 
       if (xpu::abs(float(clsIdxF.fTime) - float(clsIdxB.fTime)) > timeCut) continue;
 
-      IntersectClusters(iModule, pars, clsIdxF.fTime, clsDataF, clsIdxB.fTime, clsDataB);
+      IntersectClusters(iModule, pars, clsIdxF, clsDataF, clsIdxB, clsDataB);
     }
   }
 // clang-format on
 }
 
-XPU_D void sts::Hitfinder::IntersectClusters(int iBlock, const HitfinderCache& pars, uint32_t timeF,
-                                             const sts::Cluster& clsF, uint32_t timeB, const sts::Cluster& clsB) const
+XPU_D void sts::Hitfinder::IntersectClusters(int iBlock, const HitfinderCache& pars, const ClusterIdx& idxF,
+                                             const sts::Cluster& clsF, const ClusterIdx& idxB,
+                                             const sts::Cluster& clsB) const
 {
   // --- Calculate cluster centre position on readout edge
 
@@ -670,7 +671,7 @@ XPU_D void sts::Hitfinder::IntersectClusters(int iBlock, const HitfinderCache& p
       xC -= 0.5f * pars.dX;
       yC -= 0.5f * pars.dY;
 
-      CreateHit(iBlock, xC, yC, varX, varY, varXY, timeF, clsF, timeB, clsB, du, dv);
+      CreateHit(iBlock, xC, yC, varX, varY, varXY, idxF, clsF, idxB, clsB, du, dv);
     }
   }
 }
@@ -763,8 +764,8 @@ XPU_D bool sts::Hitfinder::IsInside(const HitfinderCache& pars, float x, float y
 }
 
 XPU_D void sts::Hitfinder::CreateHit(int iModule, float xLocal, float yLocal, float varX, float varY, float varXY,
-                                     uint32_t timeF, const sts::Cluster& clsF, uint32_t timeB, const sts::Cluster& clsB,
-                                     float du, float dv) const
+                                     const ClusterIdx& idxF, const Cluster& clsF, const ClusterIdx& idxB,
+                                     const Cluster& clsB, float du, float dv) const
 {
   // --- Transform into global coordinate system
   float globalX, globalY, globalZ;
@@ -778,7 +779,7 @@ XPU_D void sts::Hitfinder::CreateHit(int iModule, float xLocal, float yLocal, fl
   float errZ = 0.f;
 
   // --- Calculate hit time (average of cluster times)
-  float hitTime      = 0.5f * (float(timeF) + float(timeB));
+  float hitTime      = 0.5f * (float(idxF.fTime) + float(idxB.fTime));
   float etF          = clsF.fTimeError;
   float etB          = clsB.fTimeError;
   float hitTimeError = 0.5f * xpu::sqrt(etF * etF + etB * etB);
@@ -795,6 +796,9 @@ XPU_D void sts::Hitfinder::CreateHit(int iModule, float xLocal, float yLocal, fl
     .fTimeError = hitTimeError,
     .fDu        = du,
     .fDv        = dv,
+
+    .fFrontClusterId = idxF.fIdx,
+    .fBackClusterId  = idxB.fIdx,
   };
 
   int idx = xpu::atomic_add(&nHitsPerModule[iModule], 1);
diff --git a/algo/detectors/sts/Hitfinder.h b/algo/detectors/sts/Hitfinder.h
index d0e06e799b1d3af3511abc26d31992c042823a57..7bd5ad70f44993a4d171b06b805788ea6de0509d 100644
--- a/algo/detectors/sts/Hitfinder.h
+++ b/algo/detectors/sts/Hitfinder.h
@@ -48,9 +48,9 @@ namespace cbm::algo::sts
   // GPU Data structures
   // TODO: Replace with regular StsCluster / StsHit once the changes
   // land to make them fit for GPU
-  struct GpuClusterIdx {
-    uint32_t fTime;  ///< Cluster time (average of digi times) [ns]
-    int fIdx;
+  struct ClusterIdx {
+    u32 fTime;  ///< Cluster time (average of digi times) [ns]
+    u32 fIdx;
   };
 
   // Declare Constant Memory
@@ -97,7 +97,7 @@ namespace cbm::algo::sts
 
   struct SortClusters : xpu::kernel<GPUReco> {
     using block_size    = xpu::block_size<kSortClustersBlockSize>;
-    using sort_t        = xpu::block_sort<u32, GpuClusterIdx, kSortClustersBlockSize, kSortClustersItemsPerThread>;
+    using sort_t        = xpu::block_sort<u32, ClusterIdx, kSortClustersBlockSize, kSortClustersItemsPerThread>;
     using shared_memory = sort_t::storage_t;
     using constants     = xpu::cmem<TheHitfinder, Params>;
     using context       = xpu::kernel_context<shared_memory, constants>;
@@ -250,16 +250,16 @@ namespace cbm::algo::sts
     // Stored as buckets with a max size of maxClustersPerModule
     // Actual number of entries in each bucket is stored in nClustersPerModule
     // size = 2 * nModules * maxClustersPerModule
-    xpu::buffer<GpuClusterIdx> clusterIdxPerModule;
+    xpu::buffer<ClusterIdx> clusterIdxPerModule;
 
     // Temporary cluster index array used for sorting as sorting is not in place.
     // size =  2 * nModules * maxClustersPerModule
-    xpu::buffer<GpuClusterIdx> clusterIdxPerModuleTmp;
+    xpu::buffer<ClusterIdx> clusterIdxPerModuleTmp;
 
     // Pointer to sorted cluster idx for every module side.
     // Either points to clusterIdxPerModule or clusterIdxPerModuleTmp.
     // size = 2 * nModules
-    xpu::buffer<GpuClusterIdx*> clusterIdxSortedPerModule;
+    xpu::buffer<ClusterIdx*> clusterIdxSortedPerModule;
 
     // Clusters stored by modules. Stored as buckets with a max size of maxClustersPerModule
     // Actual number of entries in each bucket is stored in nClustersPerModule
@@ -347,17 +347,17 @@ namespace cbm::algo::sts
 
     XPU_D void AddCluster(int iModule, uint32_t time, const sts::Cluster& cls) const
     {
-      GpuClusterIdx* tgtIdx = &clusterIdxPerModule[iModule * maxClustersPerModule];
+      ClusterIdx* tgtIdx    = &clusterIdxPerModule[iModule * maxClustersPerModule];
       sts::Cluster* tgtData = &clusterDataPerModule[iModule * maxClustersPerModule];
 
-      int pos = xpu::atomic_add_block(&nClustersPerModule[iModule], 1);
+      u32 pos = xpu::atomic_add_block(&nClustersPerModule[iModule], 1);
 
       if (size_t(pos) >= maxClustersPerModule) {
         xpu::atomic_add(&monitor->fNumClusterBucketOverflow, 1);
         return;
       }
 
-      GpuClusterIdx idx {time, pos};
+      ClusterIdx idx {time, pos};
       tgtIdx[idx.fIdx]  = idx;
       tgtData[idx.fIdx] = cls;
     }
@@ -368,14 +368,15 @@ namespace cbm::algo::sts
 
     XPU_D void ToGlobal(int iModule, float lx, float ly, float lz, float& gx, float& gy, float& gz) const;
 
-    XPU_D void IntersectClusters(int iBlock, const HitfinderCache& pars, uint32_t timeF, const sts::Cluster& clsF,
-                                 uint32_t timeB, const sts::Cluster& clsB) const;
+    XPU_D void IntersectClusters(int iBlock, const HitfinderCache& pars, const ClusterIdx& idxF,
+                                 const sts::Cluster& clsF, const ClusterIdx& idxB, const sts::Cluster& clsB) const;
     XPU_D float GetClusterPosition(const HitfinderCache& pars, float centre, bool isFront) const;
     XPU_D bool Intersect(const HitfinderCache& pars, float xF, float exF, float xB, float exB, float& x, float& y,
                          float& varX, float& varY, float& varXY) const;
     XPU_D bool IsInside(const HitfinderCache& pars, float x, float y) const;
-    XPU_D void CreateHit(int iBlocks, float xLocal, float yLocal, float varX, float varY, float varXY, uint32_t timeF,
-                         const sts::Cluster& clsF, uint32_t timeB, const sts::Cluster& clsB, float du, float dv) const;
+    XPU_D void CreateHit(int iBlocks, float xLocal, float yLocal, float varX, float varY, float varXY,
+                         const ClusterIdx& idxF, const Cluster& clsF, const ClusterIdx& idxB, const sts::Cluster& clsB,
+                         float du, float dv) const;
 
     XPU_D void SaveMaxError(float errorValue, int iModule) const
     {
diff --git a/algo/detectors/sts/HitfinderChain.cxx b/algo/detectors/sts/HitfinderChain.cxx
index be4e75377b76e31b2b311bb03cbdd2b6c34dd4c4..0291b93622f9cf2f18b062fc5da2805e53311176 100644
--- a/algo/detectors/sts/HitfinderChain.cxx
+++ b/algo/detectors/sts/HitfinderChain.cxx
@@ -112,7 +112,7 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
     L_(trace) << "Ensuring STS clusters are ok...";
     xpu::buffer_prop props {hfc.clusterIdxPerModule};
 
-    std::vector<GpuClusterIdx> clusterIdxPerModule;
+    std::vector<ClusterIdx> clusterIdxPerModule;
     clusterIdxPerModule.resize(props.size());
     std::vector<int> nClustersPerModule;
     nClustersPerModule.resize(fPars->modules.size() * 2);
@@ -137,7 +137,7 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
   queue.wait();
   xpu::h_view nClusters(hfc.nClustersPerModule);
   size_t nClustersTotal = std::accumulate(nClusters.begin(), nClusters.end(), 0);
-  xpu::k_add_bytes<SortClusters>(nClustersTotal * sizeof(GpuClusterIdx));
+  xpu::k_add_bytes<SortClusters>(nClustersTotal * sizeof(ClusterIdx));
 
 
   size_t nClustersFront = std::accumulate(nClusters.begin(), nClusters.begin() + nModules, 0);
@@ -642,7 +642,7 @@ void sts::HitfinderChain::EnsureChannelOffsets(gsl::span<u32> channelOffsetsByMo
   }
 }
 
-void sts::HitfinderChain::EnsureClustersSane(gsl::span<GpuClusterIdx> hClusterIdx, gsl::span<int> hNClusters)
+void sts::HitfinderChain::EnsureClustersSane(gsl::span<ClusterIdx> hClusterIdx, gsl::span<int> hNClusters)
 {
   for (size_t m = 0; m < 2 * fPars->modules.size(); m++) {
     int nClusters = hNClusters[m];
diff --git a/algo/detectors/sts/HitfinderChain.h b/algo/detectors/sts/HitfinderChain.h
index 008eeddcbf5a1af87cff8434c0c297a456d58d3f..da0b8c8470b6184a8cac7cdbab260e50b4e207f5 100644
--- a/algo/detectors/sts/HitfinderChain.h
+++ b/algo/detectors/sts/HitfinderChain.h
@@ -114,7 +114,7 @@ namespace cbm::algo::sts
     void EnsureDigiOffsets(DigiMap&);
     void EnsureDigisSorted();
     void EnsureChannelOffsets(gsl::span<u32>);
-    void EnsureClustersSane(gsl::span<GpuClusterIdx>, gsl::span<int>);
+    void EnsureClustersSane(gsl::span<ClusterIdx>, gsl::span<int>);
     void EnsureClustersSorted();
 
     std::optional<const sts::HitfinderPars> fPars;