From e8e4845bec7b4ef2706475f8bb39200ea37920b4 Mon Sep 17 00:00:00 2001
From: Felix Weiglhofer <weiglhofer@fias.uni-frankfurt.de>
Date: Wed, 28 Jun 2023 11:36:51 +0000
Subject: [PATCH] algo::Unpack: Annotate timers with input sizes to calculate
 throughput.

---
 algo/unpack/Unpack.cxx | 38 +++++++++++++++++++++++---------------
 algo/unpack/Unpack.h   | 17 +++++++++++------
 2 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/algo/unpack/Unpack.cxx b/algo/unpack/Unpack.cxx
index 3a4b014125..23e1e201d9 100644
--- a/algo/unpack/Unpack.cxx
+++ b/algo/unpack/Unpack.cxx
@@ -30,11 +30,13 @@ namespace cbm::algo
 
     if (DetectorEnabled(fles::SubsystemIdentifier::STS)) {
       xpu::scoped_timer t1("STS");
+      xpu::t_add_bytes(fParallelStsSetup.sizeBytes);
       ParallelMsLoop(fParallelStsSetup, digiTs.fData.fSts.fDigis, monitor.fSts, *timeslice, fAlgoSts, 0x20);
     }
 
     if (DetectorEnabled(fles::SubsystemIdentifier::RPC)) {
       xpu::scoped_timer t1("TOF");
+      xpu::t_add_bytes(fParallelTofSetup.sizeBytes);
       ParallelMsLoop(fParallelTofSetup, digiTs.fData.fTof.fDigis, monitor.fTof, *timeslice, fAlgoTof, 0x00);
     }
 
@@ -47,6 +49,7 @@ namespace cbm::algo
       if (!DetectorEnabled(systemId)) continue;
 
       xpu::scoped_timer t1(fles::to_string(systemId));
+      xpu::t_add_bytes(timeslice->size_component(comp));
 
       // Equipment ID of current component
       const uint16_t equipmentId = timeslice->descriptor(comp, 0).eq_id;
@@ -82,20 +85,14 @@ namespace cbm::algo
     // --- Sorting of output digis. Is required by both digi trigger and event builder.
 
     xpu::scoped_timer t2("Sort");
-    Sort(digiTs.fData.fSts.fDigis.begin(), digiTs.fData.fSts.fDigis.end(),
-         [](CbmStsDigi digi1, CbmStsDigi digi2) { return digi1.GetTime() < digi2.GetTime(); });
-    Sort(digiTs.fData.fMuch.fDigis.begin(), digiTs.fData.fMuch.fDigis.end(),
-         [](CbmMuchDigi digi1, CbmMuchDigi digi2) { return digi1.GetTime() < digi2.GetTime(); });
-    Sort(digiTs.fData.fTof.fDigis.begin(), digiTs.fData.fTof.fDigis.end(),
-         [](CbmTofDigi digi1, CbmTofDigi digi2) { return digi1.GetTime() < digi2.GetTime(); });
-    Sort(digiTs.fData.fT0.fDigis.begin(), digiTs.fData.fT0.fDigis.end(),
-         [](CbmTofDigi digi1, CbmTofDigi digi2) { return digi1.GetTime() < digi2.GetTime(); });
-    Sort(digiTs.fData.fTrd.fDigis.begin(), digiTs.fData.fTrd.fDigis.end(),
-         [](CbmTrdDigi digi1, CbmTrdDigi digi2) { return digi1.GetTime() < digi2.GetTime(); });
-    Sort(digiTs.fData.fTrd2d.fDigis.begin(), digiTs.fData.fTrd2d.fDigis.end(),
-         [](CbmTrdDigi digi1, CbmTrdDigi digi2) { return digi1.GetTime() < digi2.GetTime(); });
-    Sort(digiTs.fData.fRich.fDigis.begin(), digiTs.fData.fRich.fDigis.end(),
-         [](CbmRichDigi digi1, CbmRichDigi digi2) { return digi1.GetTime() < digi2.GetTime(); });
+    auto& digiData = digiTs.fData;
+    DoSort(digiData.fSts.fDigis);
+    DoSort(digiData.fMuch.fDigis);
+    DoSort(digiData.fTof.fDigis);
+    DoSort(digiData.fT0.fDigis);
+    DoSort(digiData.fTrd.fDigis);
+    DoSort(digiData.fTrd2d.fDigis);
+    DoSort(digiData.fRich.fDigis);
 
     return result;
   }
@@ -350,6 +347,7 @@ namespace cbm::algo
 
     u64 numMsInComp = timeslice.num_microslices(component);
     u16 componentId = timeslice.descriptor(component, 0).eq_id;
+    setup.sizeBytes += timeslice.size_component(component);
     setup.numMs += numMsInComp;
     for (u64 mslice = 0; mslice < numMsInComp; mslice++) {
       uint64_t msByteSize = timeslice.descriptor(component, mslice).size;
@@ -358,6 +356,7 @@ namespace cbm::algo
       msContent.push_back(timeslice.content(component, mslice));
     }
   }
+  // ----------------------------------------------------------------------------
 
   // ----------------------------------------------------------------------------
   template<class Digi, class UnpackAlgo, class Monitor>
@@ -401,8 +400,17 @@ namespace cbm::algo
     xpu::pop_timer();
 
     monitorOut = std::move(monitor);
+  }
+  // ----------------------------------------------------------------------------
 
-    // Todo: Combine monitor Data
+  // ----------------------------------------------------------------------------
+  template<class Digi>
+  void Unpack::DoSort(std::vector<Digi>& digis)
+  {
+    xpu::t_add_bytes(digis.size() * sizeof(Digi));  // Add bytes to timer, assumes xpu::timers are started in operator()
+    Sort(digis.begin(), digis.end(),
+         [](const Digi& digi1, const Digi& digi2) { return digi1.GetTime() < digi2.GetTime(); });
   }
 
+
 } /* namespace cbm::algo */
diff --git a/algo/unpack/Unpack.h b/algo/unpack/Unpack.h
index 94dce34b0d..ad5d6c0935 100644
--- a/algo/unpack/Unpack.h
+++ b/algo/unpack/Unpack.h
@@ -133,12 +133,13 @@ namespace cbm::algo
   private:  // types
     template<class Digi, class Monitor>
     struct ParallelSetup {
-      size_t numMs = 0;
-      std::vector<u16> msEquipmentIds;
-      std::vector<fles::MicrosliceDescriptor> msDescriptors;
-      std::vector<const u8*> msContent;
-      std::vector<std::vector<Digi>> msDigis;
-      std::vector<Monitor> msMonitorData;
+      size_t numMs     = 0;                                   //< number of microslices to unpack
+      size_t sizeBytes = 0;                                   // total size of microslices in bytes
+      std::vector<u16> msEquipmentIds;                        //< equipment ids of microslices
+      std::vector<fles::MicrosliceDescriptor> msDescriptors;  //< microslice descriptors
+      std::vector<const u8*> msContent;                       //< pointer to microslice content
+      std::vector<std::vector<Digi>> msDigis;                 //< unpacked digis per microslice
+      std::vector<Monitor> msMonitorData;                     //< unpacking monitoring data per microslice
     };
 
   private:  // methods
@@ -159,6 +160,10 @@ namespace cbm::algo
     void ParallelMsLoop(ParallelSetup<Digi, Monitor>& setup, std::vector<Digi>& digisOut, std::vector<Monitor>& monitor,
                         const fles::Timeslice& ts, const std::map<u16, UnpackAlgo>& algos, u8 sys_ver);
 
+    /** @brief Sort Digis and add bytes to timer for throughput */
+    template<class Digi>
+    void DoSort(std::vector<Digi>& digis);
+
 
   private:                                                  // members
     bool fApplyWalkCorrection                      = true;  ///< Apply walk correction
-- 
GitLab