diff --git a/algo/detectors/sts/HitfinderChain.cxx b/algo/detectors/sts/HitfinderChain.cxx
index 18c6d0a4b6db6eeaa462a2da9175ef0693616727..f638a6ac79771e5639c44e714cdb6dd4c8fe9fa0 100644
--- a/algo/detectors/sts/HitfinderChain.cxx
+++ b/algo/detectors/sts/HitfinderChain.cxx
@@ -52,9 +52,6 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
 
   Result result;
 
-  xpu::scoped_timer t_("STS Hitfinder", &result.monitor.time);
-  xpu::t_add_bytes(digis.size_bytes());
-
   size_t nModules     = fPars->setup.modules.size();
   size_t nModuleSides = nModules * 2;
   size_t nDigisTotal  = digis.size();
diff --git a/algo/detectors/sts/HitfinderChain.h b/algo/detectors/sts/HitfinderChain.h
index de9c80a8752bdf389b121c21bd56e514040c539d..78f7b2c5c6af58e3ac613d49dd0c3a97bfc33eab 100644
--- a/algo/detectors/sts/HitfinderChain.h
+++ b/algo/detectors/sts/HitfinderChain.h
@@ -28,7 +28,6 @@ namespace cbm::algo::sts
   struct HitfinderMon : HitfinderMonDevice {
     u64 nClusterTotal;
     u64 nHitsTotal;
-    xpu::timings time;
 
     void SetDeviceMon(const HitfinderMonDevice& devMon) { HitfinderMonDevice::operator=(devMon); }
   };
diff --git a/algo/detectors/trd/Hitfind.cxx b/algo/detectors/trd/Hitfind.cxx
index 21929d5d7c791854793af0d894c6dcbdff18983f..467767dcf8690921f62fce06cd720eef65686fee 100644
--- a/algo/detectors/trd/Hitfind.cxx
+++ b/algo/detectors/trd/Hitfind.cxx
@@ -112,8 +112,6 @@ namespace cbm::algo::trd
     auto& hitsOut     = std::get<0>(result);
     auto& monitor     = std::get<1>(result);
 
-    xpu::scoped_timer t_{"TRDHitfind", &monitor.timeTotal};
-
     // Intermediate digi storage variables (digi, index) per module and row
     std::unordered_map<int, std::vector<std::vector<std::pair<CbmTrdDigi, int32_t>>>> digiBuffer;  //[modAddress][row]
 
diff --git a/algo/detectors/trd/Hitfind.h b/algo/detectors/trd/Hitfind.h
index b3882f94af2351678aa14686dbf8a1f26332d13c..93dc592f647c4899f26eb3fc0cefc7b30e4294ec 100644
--- a/algo/detectors/trd/Hitfind.h
+++ b/algo/detectors/trd/Hitfind.h
@@ -31,7 +31,6 @@ namespace cbm::algo::trd
    ** @brief Monitoring data for hitfinding
    **/
   struct HitfindMonitorData {
-    xpu::timings timeTotal;
     xpu::timings timeHitfind;
     xpu::timings timeClusterize;
     xpu::timings sortTime;
@@ -41,9 +40,7 @@ namespace cbm::algo::trd
     std::string print() const
     {
       std::stringstream ss;
-      ss << "Hitfind stats: num digis " << numDigis << ", time " << timeTotal.wall() << " ms ( "
-         << timeTotal.throughput() << " GB/s ), sort time " << sortTime.wall() << " ms, num hits " << numHits
-         << std::endl;
+      ss << "Hitfind stats: num digis " << numDigis << ", num hits " << numHits << std::endl;
       return ss.str();
     }
   };
diff --git a/algo/global/Reco.cxx b/algo/global/Reco.cxx
index f2951e7fad0c57378c171c3bd7c23491a31b80e5..94cf218a6aad04a66398d15c75ef971ab19af24c 100644
--- a/algo/global/Reco.cxx
+++ b/algo/global/Reco.cxx
@@ -255,6 +255,8 @@ RecoResults Reco::Run(const fles::Timeslice& ts)
 
     sts::HitfinderMon stsHitfinderMonitor;
     if (fStsHitFinder) {
+      xpu::scoped_timer timerSTS("STS Reco", &procMon.timeSTS);
+      xpu::t_add_bytes(digis.fSts.size() * sizeof(CbmStsDigi));
       bool storeClusters  = Opts().HasOutput(RecoData::Cluster);
       auto stsResults     = (*fStsHitFinder)(digis.fSts, storeClusters);
       stsHitfinderMonitor = std::move(stsResults.monitor);
@@ -267,6 +269,8 @@ RecoResults Reco::Run(const fles::Timeslice& ts)
 
     PartitionedVector<tof::Hit> tofHits;
     if (Opts().Has(Step::LocalReco) && Opts().Has(fles::Subsystem::TOF)) {
+      xpu::scoped_timer timerTOF("TOF Reco", &procMon.timeTOF);
+      xpu::t_add_bytes(digis.fTof.size() * sizeof(CbmTofDigi));
       auto [caldigis, calmonitor] = (*fTofCalibrator)(digis.fTof);
       auto nUnknownRPC            = calmonitor.fDigiCalibUnknownRPC;
       if (nUnknownRPC > 0) {
@@ -282,6 +286,8 @@ RecoResults Reco::Run(const fles::Timeslice& ts)
 
     PartitionedVector<trd::Hit> trdHits;
     if (fTrdHitfind) {
+      xpu::scoped_timer timerTRD("TRD Reco", &procMon.timeTRD);
+      xpu::t_add_bytes(digis.fTrd.size() * sizeof(CbmTrdDigi));
       // FIXME: additional copy of digis, figure out how to pass 1d + 2d digis at once to hitfinder
       const auto& digis1d = digis.fTrd;
       const auto& digis2d = digis.fTrd2d;
@@ -298,6 +304,10 @@ RecoResults Reco::Run(const fles::Timeslice& ts)
     // --- Tracking
     TrackingChain::Output_t trackingOutput{};
     if (Opts().Has(Step::Tracking)) {
+      xpu::scoped_timer timerCA("CA", &procMon.timeCA);
+      xpu::t_add_bytes(recoData.stsHits.NElements() * sizeof(sts::Hit));
+      xpu::t_add_bytes(recoData.tofHits.NElements() * sizeof(tof::Hit));
+      xpu::t_add_bytes(recoData.trdHits.NElements() * sizeof(trd::Hit));
       TrackingChain::Input_t input{
         .stsHits = recoData.stsHits,
         .tofHits = recoData.tofHits,
@@ -425,8 +435,6 @@ void Reco::QueueStsRecoMetrics(const sts::HitfinderMon& monitor)
 
   GetMonitor().QueueMetric("cbmreco", {{"hostname", fles::system::current_hostname()}, {"child", Opts().ChildId()}},
                            {
-                             {"stsRecoTimeTotal", monitor.time.wall()},
-                             {"stsRecoThroughput", monitor.time.throughput()},
                              {"stsRecoNumClusters", (unsigned long) monitor.nClusterTotal},
                              {"stsRecoNumHits", (unsigned long) monitor.nHitsTotal},
                              {"stsRecoNumClusterBucketOverflow", monitor.nClusterBucketOverflow},
@@ -440,8 +448,6 @@ void Reco::QueueTofRecoMetrics(const tof::HitfindMonitorData& mon)
 
   GetMonitor().QueueMetric("cbmreco", {{"hostname", fles::system::current_hostname()}, {"child", Opts().ChildId()}},
                            {
-                             {"tofRecoTimeTotal", mon.fTime.wall()},
-                             {"tofRecoThroughput", mon.fTime.throughput()},
                              {"tofRecoNumDigisIn", mon.fNumDigis},
                              {"tofRecoNumHits", mon.fNumHits},
                            });
@@ -455,8 +461,6 @@ void Reco::QueueTrdRecoMetrics(const trd::HitfindMonitorData& mon)
 
   GetMonitor().QueueMetric("cbmreco", {{"hostname", fles::system::current_hostname()}, {"child", Opts().ChildId()}},
                            {
-                             {"trdRecoTimeTotal", mon.timeTotal.wall()},
-                             {"trdRecoThroughput", mon.timeTotal.throughput()},
                              {"trdRecoNumDigisIn", mon.numDigis},
                              {"trdRecoNumHits", mon.numHits},
                            });
@@ -532,8 +536,7 @@ void Reco::QueueTrackingMetrics(const ca::TrackingMonitorData& monitor)
   }
 
   GetMonitor().QueueMetric("cbmreco", {{"hostname", fles::system::current_hostname()}, {"child", Opts().ChildId()}},
-                           {{"caRecoTimeTotal", monitor.GetTimer(ca::ETimer::TrackingChain).GetTotalMs()},
-                            {"caTrackFinderTime", monitor.GetTimer(ca::ETimer::FindTracks).GetTotalMs()},
+                           {{"caTrackFinderTime", monitor.GetTimer(ca::ETimer::FindTracks).GetTotalMs()},
                             {"caTrackFitterTime", monitor.GetTimer(ca::ETimer::FitTracks).GetTotalMs()},
                             {"caNofRecoTracks", monitor.GetCounterValue(ca::ECounter::RecoTrack)},
                             {"caNofRecoHitsTotal", monitor.GetCounterValue(ca::ECounter::RecoHit)},
@@ -548,9 +551,12 @@ void Reco::QueueProcessingMetrics(const ProcessingMonitor& mon)
   }
 
   MetricFieldSet fields = {
-    {"processingTimeTotal", mon.time.wall()},
-    {"processingThroughput", mon.time.throughput()},
-  };
+    {"processingTimeTotal", mon.time.wall()},   {"processingThroughput", mon.time.throughput()},
+    {"caRecoTimeTotal", mon.timeCA.wall()},     {"caRecoThroughput", mon.timeCA.throughput()},
+    {"trdRecoTimeTotal", mon.timeTRD.wall()},   {"trdRecoThroughput", mon.timeTRD.throughput()},
+    {"tofRecoTimeTotal", mon.timeTOF.wall()},   {"tofRecoThroughput", mon.timeTOF.throughput()},
+    {"stsRecoTimeTotal", mon.timeSTS.wall()},   {"stsRecoThroughput", mon.timeSTS.throughput()},
+    {"unpackTimeTotal", mon.timeUnpack.wall()}, {"unpackThroughput", mon.timeUnpack.throughput()}};
 
   if (mon.tsDelta) {
     fields.emplace_back("tsDelta", *mon.tsDelta);
diff --git a/algo/global/Reco.h b/algo/global/Reco.h
index cce762d6965c12ac7861837014452519e29aed0d..0b8483d57f23d26dba20df7e548d624b11a8d6d5 100644
--- a/algo/global/Reco.h
+++ b/algo/global/Reco.h
@@ -90,6 +90,10 @@ namespace cbm::algo
   struct ProcessingMonitor {
     xpu::timings time;           //< total processing time
     xpu::timings timeUnpack;     //< time spent in unpacking
+    xpu::timings timeSTS;        //< time spent in STS reco
+    xpu::timings timeTOF;        //< time spent in TOF reco
+    xpu::timings timeTRD;        //< time spent in TRD reco
+    xpu::timings timeCA;         //< time spent in tracking
     std::optional<i64> tsDelta;  //< id difference between current and previous timeslice
   };
 
diff --git a/algo/unpack/CommonUnpacker.h b/algo/unpack/CommonUnpacker.h
index e4e54eddf86b4caeb37b8eb1dfcca6e6d5b8bfbd..c76119cdbd6bc93047181c8f7b9e76fe600e664c 100644
--- a/algo/unpack/CommonUnpacker.h
+++ b/algo/unpack/CommonUnpacker.h
@@ -148,7 +148,10 @@ namespace cbm::algo
       }
       xpu::pop_timer();
 
+      xpu::push_timer("Sort");
+      xpu::t_add_bytes(monitorOut.sizeBytesOut);
       DoSort(digisOut);
+      xpu::pop_timer();
 
       return out;
     }