diff --git a/algo/unpack/Unpack.cxx b/algo/unpack/Unpack.cxx index 98f29b0e6a9ac757e21bf91588d225d1352c9bc8..403e0bcf21678b13cdbd8811ec06900dc249c8c1 100644 --- a/algo/unpack/Unpack.cxx +++ b/algo/unpack/Unpack.cxx @@ -26,18 +26,14 @@ namespace cbm::algo CbmDigiTimeslice& digiTs = result.first; UnpackMonitorData& monitor = result.second; - ParallelInit(*timeslice); - if (DetectorEnabled(fles::SubsystemIdentifier::STS)) { - xpu::scoped_timer t1("STS"); - xpu::t_add_bytes(fParallelStsSetup.sizeBytes); - ParallelMsLoop(fParallelStsSetup, digiTs.fData.fSts.fDigis, monitor.fSts, *timeslice, fAlgoSts, 0x20); + ParallelMsLoop(fles::SubsystemIdentifier::STS, digiTs.fData.fSts.fDigis, monitor.fSts, *timeslice, fAlgoSts, + 0x20); } if (DetectorEnabled(fles::SubsystemIdentifier::RPC)) { - xpu::scoped_timer t1("TOF"); - xpu::t_add_bytes(fParallelTofSetup.sizeBytes); - ParallelMsLoop(fParallelTofSetup, digiTs.fData.fTof.fDigis, monitor.fTof, *timeslice, fAlgoTof, 0x00); + ParallelMsLoop(fles::SubsystemIdentifier::RPC, digiTs.fData.fTof.fDigis, monitor.fTof, *timeslice, fAlgoTof, + 0x00); } // --- Component loop @@ -98,6 +94,86 @@ namespace cbm::algo } // ---------------------------------------------------------------------------- + + // ---------------------------------------------------------------------------- + std::pair<size_t, size_t> Unpack::ParallelInit(const fles::Timeslice& ts, fles::SubsystemIdentifier sysId, + std::vector<u16>& msEqIds, + std::vector<fles::MicrosliceDescriptor>& msDesc, + std::vector<const u8*>& msContent) + { + size_t numMs = 0; + size_t sizeBytes = 0; + for (uint64_t comp = 0; comp < ts.num_components(); comp++) { + auto systemId = static_cast<fles::SubsystemIdentifier>(ts.descriptor(comp, 0).sys_id); + if (systemId == sysId) { + const u64 numMsInComp = ts.num_microslices(comp); + const u16 componentId = ts.descriptor(comp, 0).eq_id; + sizeBytes += ts.size_component(comp); + numMs += numMsInComp; + for (u64 mslice = 0; mslice < numMsInComp; mslice++) { + uint64_t msByteSize = ts.descriptor(comp, mslice).size; + msEqIds.push_back(componentId); + msDesc.push_back(ts.descriptor(comp, mslice)); + msContent.push_back(ts.content(comp, mslice)); + } + } + } + + return {numMs, sizeBytes}; + } + // ---------------------------------------------------------------------------- + + + // ---------------------------------------------------------------------------- + template<class Digi, class UnpackAlgo, class Monitor> + void Unpack::ParallelMsLoop(const fles::SubsystemIdentifier sysId, std::vector<Digi>& digisOut, + std::vector<Monitor>& monitorOut, const fles::Timeslice& ts, + const std::map<u16, UnpackAlgo>& algos, u8 sys_ver) + { + xpu::scoped_timer t_(fles::to_string(sysId)); + + std::vector<u16> msEqIds; // equipment ids of microslices + std::vector<fles::MicrosliceDescriptor> msDesc; // microslice descriptors + std::vector<const u8*> msContent; // pointer to microslice content + auto [numMs, sizeBytes] = ParallelInit(ts, sysId, msEqIds, msDesc, msContent); + std::vector<std::vector<Digi>> msDigis(numMs); // unpacked digis per microslice + std::vector<Monitor> monitor(numMs); // unpacking monitoring data per microslice + + xpu::t_add_bytes(sizeBytes); + + xpu::push_timer("Unpack"); +#pragma omp parallel for schedule(dynamic) + for (size_t i = 0; i < numMs; i++) { + auto result = algos.at(msEqIds[i])(msContent[i], msDesc[i], ts.start_time()); + msDigis[i] = std::move(result.first); + monitor[i] = std::move(result.second); + } + xpu::pop_timer(); + + size_t nDigisTotal = 0; + for (const auto& digis : msDigis) { + nDigisTotal += digis.size(); + } + + xpu::push_timer("Resize"); + digisOut.resize(nDigisTotal); + xpu::pop_timer(); + + xpu::push_timer("Merge"); +#pragma omp parallel for schedule(dynamic) + for (unsigned int i = 0; i < numMs; i++) { + unsigned int offset = 0; + for (unsigned int x = 0; x < i; x++) + offset += msDigis[x].size(); + std::copy(msDigis[i].begin(), msDigis[i].end(), digisOut.begin() + offset); + } + xpu::pop_timer(); + + monitorOut = std::move(monitor); + } + // ---------------------------------------------------------------------------- + + // ----------------- Microslice loop ------------------------------------------ template<class Digi, class UnpackAlgo, class MonitorData> void Unpack::MsLoop(const fles::Timeslice* timeslice, std::map<uint16_t, UnpackAlgo>& algoMap, const uint64_t comp, @@ -313,95 +389,6 @@ namespace cbm::algo } // ---------------------------------------------------------------------------- - // ---------------------------------------------------------------------------- - void Unpack::ParallelInit(const fles::Timeslice& timeslice) - { - xpu::scoped_timer t("ParallelInit"); - - fParallelStsSetup = {}; - fParallelTofSetup = {}; - - for (uint64_t comp = 0; comp < timeslice.num_components(); comp++) { - auto systemId = static_cast<fles::SubsystemIdentifier>(timeslice.descriptor(comp, 0).sys_id); - switch (systemId) { - case fles::SubsystemIdentifier::STS: ParallelInitComponent(fParallelStsSetup, timeslice, comp); break; - case fles::SubsystemIdentifier::RPC: ParallelInitComponent(fParallelTofSetup, timeslice, comp); break; - default: break; - } - } - fParallelStsSetup.msDigis.resize(fParallelStsSetup.numMs); - fParallelStsSetup.msMonitorData.resize(fParallelStsSetup.numMs); - fParallelTofSetup.msDigis.resize(fParallelTofSetup.numMs); - fParallelTofSetup.msMonitorData.resize(fParallelTofSetup.numMs); - } - // ---------------------------------------------------------------------------- - - // ---------------------------------------------------------------------------- - template<class Digi, class Monitor> - void Unpack::ParallelInitComponent(ParallelSetup<Digi, Monitor>& setup, const fles::Timeslice& timeslice, - u64 component) - { - auto& msDesc = setup.msDescriptors; - auto& msEqIds = setup.msEquipmentIds; - auto& msContent = setup.msContent; - - u64 numMsInComp = timeslice.num_microslices(component); - u16 componentId = timeslice.descriptor(component, 0).eq_id; - setup.sizeBytes += timeslice.size_component(component); - setup.numMs += numMsInComp; - for (u64 mslice = 0; mslice < numMsInComp; mslice++) { - uint64_t msByteSize = timeslice.descriptor(component, mslice).size; - msEqIds.push_back(componentId); - msDesc.push_back(timeslice.descriptor(component, mslice)); - msContent.push_back(timeslice.content(component, mslice)); - } - } - // ---------------------------------------------------------------------------- - - // ---------------------------------------------------------------------------- - template<class Digi, class UnpackAlgo, class Monitor> - void Unpack::ParallelMsLoop(ParallelSetup<Digi, Monitor>& setup, std::vector<Digi>& digisOut, - std::vector<Monitor>& monitorOut, const fles::Timeslice& ts, - const std::map<u16, UnpackAlgo>& algos, u8 sys_ver) - { - const auto& msContent = setup.msContent; - const auto& msDesc = setup.msDescriptors; - const auto& msEqIds = setup.msEquipmentIds; - auto& monitor = setup.msMonitorData; - auto& msDigis = setup.msDigis; - size_t numMs = msDigis.size(); - - xpu::push_timer("Unpack"); -#pragma omp parallel for schedule(dynamic) - for (size_t i = 0; i < numMs; i++) { - auto result = algos.at(msEqIds[i])(msContent[i], msDesc[i], ts.start_time()); - msDigis[i] = std::move(result.first); - monitor[i] = std::move(result.second); - } - xpu::pop_timer(); - - size_t nDigisTotal = 0; - for (const auto& digis : msDigis) { - nDigisTotal += digis.size(); - } - - xpu::push_timer("Resize"); - digisOut.resize(nDigisTotal); - xpu::pop_timer(); - - xpu::push_timer("Merge"); -#pragma omp parallel for schedule(dynamic) - for (unsigned int i = 0; i < numMs; i++) { - unsigned int offset = 0; - for (unsigned int x = 0; x < i; x++) - offset += msDigis[x].size(); - std::copy(msDigis[i].begin(), msDigis[i].end(), digisOut.begin() + offset); - } - xpu::pop_timer(); - - monitorOut = std::move(monitor); - } - // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- template<class Digi> diff --git a/algo/unpack/Unpack.h b/algo/unpack/Unpack.h index a6ee04766b528453ee218245eb79c08409d3045d..081914a0bf7d415a10c1b03242c1e8d9a3882d34 100644 --- a/algo/unpack/Unpack.h +++ b/algo/unpack/Unpack.h @@ -130,18 +130,6 @@ namespace cbm::algo return std::find(fSubIds.begin(), fSubIds.end(), subId) != fSubIds.end(); } - private: // types - template<class Digi, class Monitor> - struct ParallelSetup { - size_t numMs = 0; //< number of microslices to unpack - size_t sizeBytes = 0; // total size of microslices in bytes - std::vector<u16> msEquipmentIds; //< equipment ids of microslices - std::vector<fles::MicrosliceDescriptor> msDescriptors; //< microslice descriptors - std::vector<const u8*> msContent; //< pointer to microslice content - std::vector<std::vector<Digi>> msDigis; //< unpacked digis per microslice - std::vector<Monitor> msMonitorData; //< unpacking monitoring data per microslice - }; - private: // methods /** @brief Microslice loop **/ template<class Digi, class UnpackAlgo, class MonitorData> @@ -149,16 +137,15 @@ namespace cbm::algo const uint16_t eqId, std::vector<Digi>* digis, UnpackMonitorData& monitor, std::vector<MonitorData>* monitorMs, uint8_t sys_ver); - /** Init parallel unpacker */ - void ParallelInit(const fles::Timeslice& timeslice); - - template<class Digi, class Monitor> - void ParallelInitComponent(ParallelSetup<Digi, Monitor>& setup, const fles::Timeslice& timeslice, u64 component); - /** @brief Parallel microslice loop **/ template<class Digi, class UnpackAlgo, class Monitor> - void ParallelMsLoop(ParallelSetup<Digi, Monitor>& setup, std::vector<Digi>& digisOut, std::vector<Monitor>& monitor, - const fles::Timeslice& ts, const std::map<u16, UnpackAlgo>& algos, u8 sys_ver); + void ParallelMsLoop(const fles::SubsystemIdentifier sysId, std::vector<Digi>& digisOut, + std::vector<Monitor>& monitor, const fles::Timeslice& ts, + const std::map<u16, UnpackAlgo>& algos, u8 sys_ver); + + std::pair<size_t, size_t> ParallelInit(const fles::Timeslice& ts, fles::SubsystemIdentifier sysId, + std::vector<u16>& eqIds, std::vector<fles::MicrosliceDescriptor>& msDesc, + std::vector<const u8*>& msContent); /** @brief Sort Digis and add bytes to timer for throughput */ template<class Digi> @@ -196,9 +183,6 @@ namespace cbm::algo {fles::SubsystemIdentifier::RICH, 100}, {fles::SubsystemIdentifier::RPC, 40}, {fles::SubsystemIdentifier::T0, 0}, {fles::SubsystemIdentifier::TRD, 1300}, {fles::SubsystemIdentifier::TRD2D, -510}}; - /** @brief Parallel STS Setup */ - ParallelSetup<sts::Digi, UnpackStsMonitorData> fParallelStsSetup = {}; - ParallelSetup<CbmTofDigi, UnpackTofMonitorData> fParallelTofSetup = {}; }; } // namespace cbm::algo