diff --git a/algo/detectors/sts/StsHitfinder.cxx b/algo/detectors/sts/StsHitfinder.cxx
index 4bed13ff029103417cbe4b885191fe84f1a39672..1b5933982ced0bb1b9ac023425ed5ffa3fb9f167 100644
--- a/algo/detectors/sts/StsHitfinder.cxx
+++ b/algo/detectors/sts/StsHitfinder.cxx
@@ -496,7 +496,7 @@ XPU_D void sts::Hitfinder::FindHits(FindHits::context& ctx) const
 // On GPU process all front clusters in parallel instead (one thread per cluster)
 //   to fully utilize the GPU.
 // Currently use option 2 for both as it is faster on CPU as well.
-#if XPU_IS_CPU
+#if 0
   int iModule = ctx.block_idx_x();
 #else
   int iModule = 0;
@@ -545,7 +545,7 @@ XPU_D void sts::Hitfinder::FindHits(FindHits::context& ctx) const
   float maxSigmaBoth = 4.f * xpu::sqrt(maxTerrF * maxTerrF + maxTerrB * maxTerrB);
 
   int startB = 0;
-#if XPU_IS_CPU
+#if 0
   for (int iClusterF = ctx.thread_idx_x(); iClusterF < nClustersF; iClusterF += ctx.block_dim_x()) {
 #else
   int iClusterF = iThread;
diff --git a/algo/detectors/sts/StsHitfinderChain.cxx b/algo/detectors/sts/StsHitfinderChain.cxx
index d743d65d8168ee662e2ca3153abebc8fc900131e..e70bfe9f1c0c473e21bb306d5ca0d0e8a2c901a8 100644
--- a/algo/detectors/sts/StsHitfinderChain.cxx
+++ b/algo/detectors/sts/StsHitfinderChain.cxx
@@ -134,7 +134,12 @@ void sts::HitfinderChain::operator()(gsl::span<const CbmStsDigi> digis)
   queue.wait();
   xpu::h_view nClusters {hfc.nClustersPerModule};
   size_t nClustersFront = std::accumulate(nClusters.begin(), nClusters.begin() + nModules, 0);
-  bool isCpu            = xpu::device::active().backend() == xpu::cpu;
+
+  // FindHits supports to modes of parallelization: One thread per cluster or one block per module
+  // Currently we use method one for CPU and GPU.
+  // See sts::Hitfinder::FindHits() for details.
+  // bool isCpu            = xpu::device::active().backend() == xpu::cpu;
+  bool isCpu            = false;
   xpu::grid findHitsG   = isCpu ? xpu::n_blocks(nModules) : xpu::n_threads(nClustersFront);
   queue.launch<FindHits>(findHitsG);