diff --git a/algo/CMakeLists.txt b/algo/CMakeLists.txt index b81382879e529fdbb0e967435abfae3db1429ec0..f7e03a8eede7ebc43055a16f1219ca2a113096fc 100644 --- a/algo/CMakeLists.txt +++ b/algo/CMakeLists.txt @@ -203,10 +203,12 @@ install( FILES base/ChainContext.h base/Definitions.h + base/HistogramSender.h base/Options.h base/RecoParams.h base/SubChain.h base/PartitionedVector.h + base/PartitionedSpan.h global/Reco.h global/RecoResults.h global/RecoResultsInputArchive.h diff --git a/algo/base/Exceptions.h b/algo/base/Exceptions.h new file mode 100644 index 0000000000000000000000000000000000000000..002795e1ab4b834934caef81fa15adf53cbdfb43 --- /dev/null +++ b/algo/base/Exceptions.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main + SPDX-License-Identifier: GPL-3.0-only + Authors: Felix Weiglhofer [committer] */ +#ifndef CBM_ALGO_BASE_EXCEPTIONS_H +#define CBM_ALGO_BASE_EXCEPTIONS_H + +#include <exception> + +#include <fmt/format.h> + +namespace cbm::algo +{ + /** + * @brief Base class for exceptions + */ + class Exception : public std::runtime_error { + public: + template<typename... Args> + Exception(const char* fmt, Args&&... args) : std::runtime_error(fmt::format(fmt, std::forward<Args>(args)...)) + { + } + }; + + /** + * @brief Indicates an unrecoverable error. Should tear down the process. + */ + class FatalError : public Exception { + using Exception::Exception; + }; + + /** + * Indicates an error during timeslice processing. Timeslice will be discarded. + * Processing can continue with new timeslice. + */ + class ProcessingError : public Exception { + using Exception::Exception; + }; + +} // namespace cbm::algo + +#endif diff --git a/algo/base/HistogramSender.h b/algo/base/HistogramSender.h new file mode 100644 index 0000000000000000000000000000000000000000..e4c13d192f110e58e69f3ed9a36cd0d696885928 --- /dev/null +++ b/algo/base/HistogramSender.h @@ -0,0 +1,64 @@ +/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main + SPDX-License-Identifier: GPL-3.0-only + Authors: Felix Weiglhofer [committer] */ +#ifndef CBM_ALGO_BASE_HISTOGRAM_SENDER_H +#define CBM_ALGO_BASE_HISTOGRAM_SENDER_H + +#include <boost/archive/binary_oarchive.hpp> +#include <boost/iostreams/device/array.hpp> +#include <boost/iostreams/device/back_inserter.hpp> +#include <boost/iostreams/stream.hpp> +#include <boost/serialization/utility.hpp> +#include <boost/serialization/vector.hpp> + +#include <string> +#include <string_view> +#include <zmq.hpp> + +namespace cbm::algo +{ + + class HistogramSender { + public: + HistogramSender(std::string_view address) + : fHistComChan(address) + , fZmqContext(1) + , fZmqSocket(fZmqContext, zmq::socket_type::push) + { + fZmqSocket.connect(fHistComChan); // This side "connects" to socket => Other side should have "bind"!!!! + } + + /** @brief Serialize object and send it to the histogram server + ** @param obj: object to be serialized in the message, e.g. config pairs of strings or Histo1D + ** @param flags: or'ed values from zmq::send_flags, typ. zmq::send_flags::sndmore to indicate multi-parts message + **/ + template<typename Object> + void PrepareAndSendMsg(const Object& obj, zmq::send_flags flags) + { + /// Needed ressources (serializd string, boost inserter, boost stream, boost binary output archive) + namespace b_io = boost::iostreams; + namespace b_ar = boost::archive; + + std::string serial_str; + b_io::back_insert_device<std::string> inserter(serial_str); + b_io::stream<b_io::back_insert_device<std::string>> bstream(inserter); + b_ar::binary_oarchive oa(bstream); + + serial_str.clear(); + oa << obj; + bstream.flush(); + + zmq::message_t msg(serial_str.size()); + std::copy_n(static_cast<const char*>(serial_str.data()), msg.size(), static_cast<char*>(msg.data())); + fZmqSocket.send(msg, flags); + } + + private: + std::string fHistComChan = "tcp://127.0.0.1:56800"; + zmq::context_t fZmqContext; ///< ZMQ context FIXME: should be only one context per binary! + zmq::socket_t fZmqSocket; ///< ZMQ socket to histogram server + }; + +} // namespace cbm::algo + +#endif diff --git a/algo/base/Options.cxx b/algo/base/Options.cxx index e743e92ab61d7f72c64cfc5ac5fa1ec6237a9ae6..3f3ff92adf2049bd476e74a77e0306c5070e5420 100644 --- a/algo/base/Options.cxx +++ b/algo/base/Options.cxx @@ -69,6 +69,7 @@ Options::Options(int argc, char** argv) "set log level (debug, info, warning, error, fatal)") ("monitor,m", po::value(&fMonitorUri)->value_name("<uri>")->implicit_value("file:cout"), "URI specifying monitor output (e.g. file:/tmp/monitor.txt, influx1:login:8086:cbmreco_status). Prints to cout when no argument is given. Monitor is disabled when flag is not set.") + ("histogram", po::value(&fHistogramUri)->value_name("<uri>"), "URI to specify histogram server") ("log-file,L", po::value(&fLogFile)->value_name("<file>"), "write log messages to file") ("output-types,O", po::value(&fOutputTypes)->multitoken()->default_value({RecoData::Hit})->value_name("<types>"), diff --git a/algo/base/Options.h b/algo/base/Options.h index 064cc6bd3949246a140bbd3957ec4f51ef3a9dc3..d1a83d93d6e72708207d57aa3161dc53dcba206b 100644 --- a/algo/base/Options.h +++ b/algo/base/Options.h @@ -28,6 +28,7 @@ namespace cbm::algo fs::path LogFile() const { return fLogFile; } const std::string& Device() const { return fDevice; } const std::string& MonitorUri() const { return fMonitorUri; } + const std::string& HistogramUri() const { return fHistogramUri; } bool CollectKernelTimes() const { return fCollectKernelTimes; } int NumTimeslices() const { return fNumTimeslices; } int SkipTimeslices() const { return fSkipTimeslices; } @@ -64,6 +65,7 @@ namespace cbm::algo std::string fLogFile; std::string fDevice; std::string fMonitorUri; + std::string fHistogramUri; bool fDumpArchive = false; bool fCollectKernelTimes = false; int fNumTimeslices = -1; diff --git a/algo/base/PartitionedSpan.h b/algo/base/PartitionedSpan.h new file mode 100644 index 0000000000000000000000000000000000000000..988cc6f2daba2dedbb37730f7951cffdd23af62f --- /dev/null +++ b/algo/base/PartitionedSpan.h @@ -0,0 +1,184 @@ +/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main + SPDX-License-Identifier: GPL-3.0-only + Authors: Felix Weiglhofer [committer] */ +#ifndef CBM_ALGO_BASE_PARTITIONED_SPAN_H +#define CBM_ALGO_BASE_PARTITIONED_SPAN_H + +#include <array> +#include <gsl/span> +#include <stdexcept> +#include <vector> + +#include "Definitions.h" + +namespace cbm::algo +{ + + template<typename T, typename Allocator> + class PartitionedVector; + + namespace detail + { + template<typename U, typename T> + using EnableOnConst = std::enable_if_t<std::is_const_v<T> && std::is_same_v<U, std::remove_cv_t<T>>>; + + template<typename U, typename T> + using EnableOnNonConst = std::enable_if_t<!std::is_const_v<T> && std::is_same_v<U, std::remove_cv_t<T>>>; + } // namespace detail + + template<typename T> + class PartitionedSpan { + + public: + PartitionedSpan() : fData(), fOffsets(NullOffset), fAdresses() { EnsureDimensions(); } + + // Intellisense and clang workaround, fails on template deduction with stl containers for some reason + // #if defined(__INTELLISENSE__) || defined(__clang__) + template<typename Allocator> + PartitionedSpan(std::vector<T, Allocator>& container, gsl::span<const size_t> offsets, + gsl::span<const u32> addresses) + : fData(container) + , fOffsets(offsets) + , fAdresses(addresses) + { + EnsureDimensions(); + } + + // FIXME disable if T is non-const via SFINAE, otherwise get misleading compiler errors + template<typename Allocator> + PartitionedSpan(const std::vector<T, Allocator>& container, gsl::span<const size_t> offsets, + gsl::span<const u32> addresses) + : fData(container) + , fOffsets(offsets) + , fAdresses(addresses) + { + EnsureDimensions(); + } + + template<size_t N> + PartitionedSpan(std::array<T, N>& container, gsl::span<const size_t> offsets, gsl::span<const u32> addresses) + : fData(container) + , fOffsets(offsets) + , fAdresses(addresses) + { + EnsureDimensions(); + } + + // FIXME disable if T is non-const via SFINAE + template<size_t N> + PartitionedSpan(const std::array<T, N>& container, gsl::span<const size_t> offsets, gsl::span<const u32> addresses) + : fData(container) + , fOffsets(offsets) + , fAdresses(addresses) + { + EnsureDimensions(); + } + // #endif + + PartitionedSpan(gsl::span<T> data, gsl::span<const size_t> offsets, gsl::span<const u32> addresses) + : fData(data) + , fOffsets(offsets) + , fAdresses(addresses) + { + EnsureDimensions(); + } + + template<typename U, typename Allocator, typename = detail::EnableOnConst<U, T>> + PartitionedSpan(const PartitionedVector<U, Allocator>& container) + : fData(container.Data()) + , fOffsets(container.Offsets()) + , fAdresses(container.Addresses()) + { + EnsureDimensions(); + } + + template<typename U, typename Allocator, typename = detail::EnableOnNonConst<U, T>> + PartitionedSpan(PartitionedVector<U, Allocator>& container) + : fData(container.Data()) + , fOffsets(container.Offsets()) + , fAdresses(container.Addresses()) + { + EnsureDimensions(); + } + + gsl::span<T> operator[](size_t i) const + { + EnsureBounds(i); + return UnsafePartitionSpan(i); + } + + u32 Address(size_t i) const + { + EnsureBounds(i); + return fAdresses[i]; + } + + std::pair<gsl::span<T>, u32> Partition(size_t i) const + { + EnsureBounds(i); + return std::pair<gsl::span<T>, u32>(UnsafePartitionSpan(i), fAdresses[i]); + } + + size_t NPartitions() const { return fAdresses.size(); } + + size_t Size(size_t i) const + { + EnsureBounds(i); + return UnsafeSize(i); + } + + size_t NElements() const { return fData.size(); } + + gsl::span<T> Data() const { return fData; } + + gsl::span<const u32> Addresses() const { return fAdresses; } + + gsl::span<const size_t> Offsets() const { return fOffsets; } + + private: + // Required for default constructor, don't use std::array to avoid additional dependency + static constexpr size_t NullOffset[1] = {0}; + + gsl::span<T> fData; + gsl::span<const size_t> fOffsets; + gsl::span<const u32> fAdresses; + + // FIXME code duplication with PartitionedVector + + void EnsureDimensions() const + { + if (fOffsets.size() - 1 != fAdresses.size()) { + throw std::runtime_error("PartitionedSpan: fOffsets.size() != fAdresses.size()"); + } + if (fOffsets.front() != 0) throw std::runtime_error("PartitionedSpan: fOffsets.front() != 0"); + if (fOffsets.back() != fData.size()) { + throw std::runtime_error("PartitionedSpan: fOffsets.back() != fData.size()"); + } + } + + void EnsureBounds(size_t i) const + { + if (i >= fAdresses.size()) throw std::out_of_range("PartitionedSpan: index out of bounds"); + } + + size_t UnsafeSize(size_t i) const { return fOffsets[i + 1] - fOffsets[i]; } + + gsl::span<T> UnsafePartitionSpan(size_t i) const { return fData.subspan(fOffsets[i], UnsafeSize(i)); } + }; + + // template auto deduction + template<typename T, template<typename> class Container> + PartitionedSpan(Container<T>&, gsl::span<const size_t>, gsl::span<const u32>) -> PartitionedSpan<T>; + + template<typename T, template<typename> class Container> + PartitionedSpan(const Container<T>&, gsl::span<const size_t>, gsl::span<const u32>) -> PartitionedSpan<const T>; + + template<typename T, typename Allocator> + PartitionedSpan(PartitionedVector<T, Allocator>&) -> PartitionedSpan<T>; + + template<typename T, typename Allocator> + PartitionedSpan(const PartitionedVector<T, Allocator>&) -> PartitionedSpan<const T>; + +} // namespace cbm::algo + +#endif diff --git a/algo/base/PartitionedVector.h b/algo/base/PartitionedVector.h index 008b65d738887f207381e23362d0bcf65876c4c7..71d812eea81555122783f2ecb54386ee21a96b1f 100644 --- a/algo/base/PartitionedVector.h +++ b/algo/base/PartitionedVector.h @@ -15,6 +15,8 @@ namespace cbm::algo { + template<typename T> + class PartitionedSpan; /** * @brief A vector that is partitioned into multiple subvectors. @@ -33,11 +35,7 @@ namespace cbm::algo /** * @brief Default constructor. Creates an empty vector. */ - PartitionedVector() - { - fOffsets.push_back(0); - EnsureDimensions(); - } + PartitionedVector() : fData(), fOffsets({0}), fAdresses() { EnsureDimensions(); } /** * @brief Constructor. Creates a vector with n partitions. @@ -71,6 +69,15 @@ namespace cbm::algo EnsureDimensions(); } + template<typename U> + PartitionedVector(PartitionedSpan<U> other) + : fData(other.Data().begin(), other.Data().end()) + , fOffsets(other.Offsets().begin(), other.Offsets().end()) + , fAdresses(other.Addresses().begin(), other.Addresses().end()) + { + EnsureDimensions(); + } + /** * @brief Access data at partition i. */ @@ -119,7 +126,7 @@ namespace cbm::algo /** * @brief Get the number of partitions. */ - size_t NPartitions() const { return fOffsets.size() - 1; } + size_t NPartitions() const { return fAdresses.size(); } /** * @brief Get the size of partition i. @@ -138,7 +145,12 @@ namespace cbm::algo /** * @brief Get the underlying data. */ - const Container_t& Data() const { return fData; } + gsl::span<T> Data() { return fData; } + + /** + * @brief Get the underlying data. + */ + gsl::span<const T> Data() const { return fData; } /** * @brief Get the addresses. @@ -160,6 +172,7 @@ namespace cbm::algo if (fOffsets.size() - 1 != fAdresses.size()) { throw std::runtime_error("PartitionedVector: fOffsets.size() != fAdresses.size()"); } + if (fOffsets.front() != 0) { throw std::runtime_error("PartitionedVector: fOffsets.front() != 0"); } if (fOffsets.back() != fData.size()) { throw std::runtime_error("PartitionedVector: fOffsets.back() != fData.size()"); } @@ -167,7 +180,7 @@ namespace cbm::algo void EnsureBounds(size_t i) const { - if (i >= fOffsets.size() - 1) throw std::out_of_range("PartitionedVector: index out of bounds"); + if (i >= fAdresses.size()) throw std::out_of_range("PartitionedVector: index out of bounds"); } void ComputeOffsets(gsl::span<const size_t> sizes) diff --git a/algo/base/RecoParams.h b/algo/base/RecoParams.h index 45c845a50e55b97792bb0a4be07ba92e0c451dc1..e459950ee67125b544dfa7781eebb19488f25bda 100644 --- a/algo/base/RecoParams.h +++ b/algo/base/RecoParams.h @@ -27,6 +27,12 @@ namespace cbm::algo CPU, XPU, }; + enum class AllocationMode : u8 + { + Auto, //< Static on GPU, dynamic on CPU + Static, //< Allocate all buffers beforehand + Dynamic, //< Allocate buffers per timeslice + }; struct STS { SortMode digiSortMode; @@ -40,33 +46,30 @@ namespace cbm::algo f32 timeCutClusterAbs; f32 timeCutClusterSig; - struct MemoryLimits { - u64 maxDigisPerTS; - u64 maxDigisPerMS; - u64 maxDigisPerModule; - f64 clustersPerDigiTS; - f64 clustersPerDigiModule; - f64 hitsPerClusterTS; - f64 hitsPerClusterModule; + struct Memory { + AllocationMode allocationMode; + u64 maxNDigisPerTS; + u64 maxNDigisPerModule; + f64 clustersPerDigi; + f64 hitsPerCluster; + + u64 NClustersUpperBound(u64 nDigis) const { return nDigis * clustersPerDigi; } + u64 NHitsUpperBound(u64 nDigis) const { return NClustersUpperBound(nDigis) * hitsPerCluster; } - XPU_D u64 maxClustersPerTS() const { return maxDigisPerTS * clustersPerDigiTS; } - XPU_D u64 maxClustersPerModule() const { return maxDigisPerModule * clustersPerDigiModule; } - XPU_D u64 maxHitsPerClusterTS() const { return maxClustersPerTS() * hitsPerClusterTS; } - XPU_D u64 maxHitsPerClusterModule() const { return maxClustersPerModule() * hitsPerClusterModule; } + u64 MaxNClustersPerModule() const { return NClustersUpperBound(maxNDigisPerModule); } + u64 MaxNHitsPerModule() const { return MaxNClustersPerModule() * hitsPerCluster; } + + bool IsDynamic() const { return allocationMode == RecoParams::AllocationMode::Dynamic; } + bool IsStatic() const { return allocationMode == RecoParams::AllocationMode::Static; } + bool IsAuto() const { return allocationMode == RecoParams::AllocationMode::Auto; } static constexpr auto Properties = std::make_tuple( - config::Property(&MemoryLimits::maxDigisPerTS, "maxDigisPerTS", "Maximal number of digis per time slice"), - config::Property(&MemoryLimits::maxDigisPerMS, "maxDigisPerMS", "Maximal number of digis per micro slice"), - config::Property(&MemoryLimits::maxDigisPerModule, "maxDigisPerModule", "Maximal number of digis per module"), - config::Property(&MemoryLimits::clustersPerDigiTS, "clustersPerDigiTS", - "Number of clusters per digi in a time slice"), - config::Property(&MemoryLimits::clustersPerDigiModule, "clustersPerDigiModule", - "Number of clusters per digi in a module"), - config::Property(&MemoryLimits::hitsPerClusterTS, "hitsPerClusterTS", - "Number of hits per cluster in a time slice"), - config::Property(&MemoryLimits::hitsPerClusterModule, "hitsPerClusterModule", - "Number of hits per cluster in a module")); - } memoryLimits; + config::Property(&Memory::allocationMode, "allocationMode", "Allocation mode (Auto, Static, Dynamic)"), + config::Property(&Memory::maxNDigisPerTS, "maxNDigisPerTS", "Maximal number of digis per time slice"), + config::Property(&Memory::maxNDigisPerModule, "maxNDigisPerModule", "Maximal number of digis per module"), + config::Property(&Memory::clustersPerDigi, "clustersPerDigi", "Number of clusters per digi in a time slice"), + config::Property(&Memory::hitsPerCluster, "hitsPerCluster", "Number of hits per cluster in a time slice")); + } memory; static constexpr auto Properties = std::make_tuple( config::Property(&STS::digiSortMode, "digiSortMode", @@ -89,7 +92,7 @@ namespace cbm::algo &STS::timeCutClusterSig, "timeCutClusterSig", "Time cut for clusters." " Two clusters are considered it their time difference is below 'value * sqrt(terr1**2 + terr2*+2)'"), - config::Property(&STS::memoryLimits, "memoryLimits", "Memory limits for STS reco")); + config::Property(&STS::memory, "memory", "Memory limits for STS reco")); }; STS sts; @@ -109,4 +112,10 @@ CBM_ENUM_DICT(cbm::algo::RecoParams::UnpackMode, {"XPU", RecoParams::UnpackMode::XPU} ); +CBM_ENUM_DICT(cbm::algo::RecoParams::AllocationMode, + {"Auto", RecoParams::AllocationMode::Auto}, + {"Static", RecoParams::AllocationMode::Static}, + {"Dynamic", RecoParams::AllocationMode::Dynamic} +); + #endif // CBM_ALGO_BASE_RECOPARAMS_H diff --git a/algo/base/compat/OpenMP.h b/algo/base/compat/OpenMP.h index 16991f1bedc53ba27eb2a0cf737fdfda8653dfdf..84d57489debaee5f18e43976d85f8628a99dd8db 100644 --- a/algo/base/compat/OpenMP.h +++ b/algo/base/compat/OpenMP.h @@ -25,6 +25,20 @@ #define CBM_PARALLEL_FOR(...) #endif +// OpenMP parallel +#ifdef HAVE_OMP +#define CBM_PARALLEL(...) CBM_PRAGMA(omp parallel __VA_ARGS__) +#else +#define CBM_PARALLEL(...) +#endif + +// generic omp pragma for other commands +#ifdef HAVE_OMP +#define CBM_OMP(...) CBM_PRAGMA(omp __VA_ARGS__) +#else +#define CBM_OMP(...) +#endif + namespace cbm::algo::openmp { diff --git a/algo/detectors/sts/Hitfinder.h b/algo/detectors/sts/Hitfinder.h index 7bd5ad70f44993a4d171b06b805788ea6de0509d..f4f0f79c9613eeb3e26f6876c4b834b566aac1b6 100644 --- a/algo/detectors/sts/Hitfinder.h +++ b/algo/detectors/sts/Hitfinder.h @@ -30,9 +30,9 @@ namespace cbm::algo kFindClusterBlockSize = 1024, kFindHitsBlockSize = 512, #else // HIP, values ignored on CPU - kSortDigisBlockSize = 256, + kSortDigisBlockSize = 1024, kSortDigisItemsPerThread = 6, - kSortClustersBlockSize = 256, + kSortClustersBlockSize = 1024, kSortClustersItemsPerThread = 6, kFindClusterBlockSize = 1024, kFindHitsBlockSize = 1024, @@ -279,6 +279,12 @@ namespace cbm::algo::sts // output // Max number of Hits that can be stored in each module + size_t hitsAllocatedPerModule; + + // Max number of hits that should be stored in each module + // Guaranteed to be <= hitsAllocatedPerModule, + // calculated from to the number of digis + // This is a seperate value to terminate faster if we encounter monster events size_t maxHitsPerModule; // Hits sorted by module. Stored in buckets of size maxHitsPerModule. @@ -291,6 +297,10 @@ namespace cbm::algo::sts // FIXME: Should be size_t! xpu::buffer<int> nHitsPerModule; + // Flat array of hits. size = nHitsTotal + size_t hitsFlatCapacity; + xpu::buffer<sts::Hit> hitsFlat; + public: XPU_D void SortDigisInSpaceAndTime(SortDigis::context&) const; XPU_D void FindClustersSingleStep(FindClusters::context&) const; diff --git a/algo/detectors/sts/HitfinderChain.cxx b/algo/detectors/sts/HitfinderChain.cxx index 0291b93622f9cf2f18b062fc5da2805e53311176..4d0fb95b6a7f67dad79f4efe231f7c7ed081cf15 100644 --- a/algo/detectors/sts/HitfinderChain.cxx +++ b/algo/detectors/sts/HitfinderChain.cxx @@ -7,15 +7,33 @@ #include <log.hpp> #include <numeric> +#include "Exceptions.h" #include "PODVector.h" #include "compat/OpenMP.h" using namespace cbm::algo; -void sts::HitfinderChain::SetParameters(const sts::HitfinderPars& parameters) +void sts::HitfinderChain::SetParameters(const HitfinderChainPars& parameters) { fPars.emplace(parameters); AllocateStatic(); + auto& memoryPars = fPars->memory; + + if (memoryPars.IsAuto()) { + if (xpu::device().active().backend() == xpu::cpu) { + memoryPars.allocationMode = RecoParams::AllocationMode::Dynamic; + } + else { + memoryPars.allocationMode = RecoParams::AllocationMode::Static; + } + } + + if (!memoryPars.IsDynamic() && !memoryPars.IsStatic()) { + throw std::runtime_error( + fmt::format("STS Hitfinder Chain: Unknown allocation mode: {}", ToString(memoryPars.allocationMode))); + } + + if (memoryPars.IsStatic()) { AllocateDynamic(memoryPars.maxNDigisPerModule, memoryPars.maxNDigisPerTS); } } void sts::HitfinderChain::Finalize() @@ -32,30 +50,48 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS xpu::push_timer("STS Hitfinder"); - size_t nModules = fPars->modules.size(); + size_t nModules = fPars->setup.modules.size(); size_t nModuleSides = nModules * 2; size_t nDigisTotal = digis.size(); xpu::t_add_bytes(nDigisTotal * sizeof(CbmStsDigi)); // Getting the digis on the GPU requires 3 steps // 1. Sort digis into buckets by module - size_t maxNDigisPerModule; - DigiMap digiMap = SortDigisIntoModules(digis, maxNDigisPerModule); + DigiMap digiMap = CountDigisPerModules(digis); // 2. Once we know number of digis per module, we can allocate // the dynamic buffers on the gpu, as the buffer sizes depend on that value - AllocateDynamic(maxNDigisPerModule, nDigisTotal); + if (fPars->memory.IsDynamic()) AllocateDynamic(digiMap.maxNDigisPerModule, nDigisTotal); + else { + if (digiMap.maxNDigisPerModule > fPars->memory.maxNDigisPerModule) { + throw std::runtime_error( + fmt::format("STS Hitfinder Chain: Too many digis per module for static allocation: {} > {}", + digiMap.maxNDigisPerModule, fPars->memory.maxNDigisPerModule)); + } + if (nDigisTotal > fPars->memory.maxNDigisPerTS) { + throw std::runtime_error( + fmt::format("STS Hitfinder Chain: Too many digis per timeslice for static allocation: {} > {}", nDigisTotal, + fPars->memory.maxNDigisPerTS)); + } + } // 3. Copy digis into flat array with offsets per module - FlattenDigis(digiMap); + FlattenDigis(digis, digiMap); if (Opts().LogLevel() == trace) EnsureDigiOffsets(digiMap); xpu::queue queue; + // Set constants + auto& hfc = fHitfinder; + hfc.maxHitsPerModule = fPars->memory.NHitsUpperBound(digiMap.maxNDigisPerModule); + if (hfc.maxHitsPerModule > hfc.hitsAllocatedPerModule) { + L_(error) << fmt::format("STS Hitfinder Chain: Too many hits per module for static allocation: {} > {}", + hfc.maxHitsPerModule, hfc.hitsAllocatedPerModule); + hfc.maxHitsPerModule = hfc.hitsAllocatedPerModule; + } + // Clear buffers // Not all buffers have to initialized, but useful for debugging - L_(debug) << "STS Hitfinder Chain: Clearing buffers..."; - auto& hfc = fHitfinder; // xpu::memset(hfc.digisPerModule, 0); // xpu::memset(hfc.digisPerModuleTmp, 0); // xpu::memset(hfc.digisSortedPerModule, 0); @@ -74,7 +110,12 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS L_(debug) << "STS Hitfinder Chain: Copy digis buffers..."; xpu::set<TheHitfinder>(fHitfinder); - queue.copy(hfc.digisPerModule, xpu::h2d); + + // Only copy the actual number of digis, not the whole allocated buffer + // TODO add support in xpu, for buffer copies with offset + size + const CbmStsDigi* digisH = xpu::h_view(hfc.digisPerModule).data(); + CbmStsDigi* digisD = hfc.digisPerModule.get(); + if (digisH != digisD) queue.copy(digisH, digisD, sizeof(CbmStsDigi) * nDigisTotal); queue.copy(hfc.digiOffsetPerModule, xpu::h2d); L_(debug) << "STS Hitfinder Chain: Sort Digis..."; @@ -115,7 +156,7 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS std::vector<ClusterIdx> clusterIdxPerModule; clusterIdxPerModule.resize(props.size()); std::vector<int> nClustersPerModule; - nClustersPerModule.resize(fPars->modules.size() * 2); + nClustersPerModule.resize(fPars->setup.modules.size() * 2); queue.copy(hfc.clusterIdxPerModule.get(), clusterIdxPerModule.data(), props.size_bytes()); queue.copy(hfc.nClustersPerModule.get(), nClustersPerModule.data(), nClustersPerModule.size() * sizeof(int)); @@ -217,8 +258,8 @@ void sts::HitfinderChain::AllocateStatic() EnsureParameters(); // Shorthands for common constants - const int nChannels = fPars->nChannels / 2; // Only count channels on one side of the module - const int nModules = fPars->modules.size(); + const int nChannels = fPars->setup.nChannels / 2; // Only count channels on one side of the module + const int nModules = fPars->setup.modules.size(); const int nModuleSides = 2 * nModules; // Number of module front + backsides xpu::queue q; @@ -226,24 +267,24 @@ void sts::HitfinderChain::AllocateStatic() // Set GPU constants fHitfinder.nModules = nModules; fHitfinder.nChannels = nChannels; - fHitfinder.asic = fPars->asic; + fHitfinder.asic = fPars->setup.asic; // Copy landau table - size_t nLandauTableEntries = fPars->landauTable.values.size(); + size_t nLandauTableEntries = fPars->setup.landauTable.values.size(); fHitfinder.landauTableSize = nLandauTableEntries; - fHitfinder.landauStepSize = fPars->landauTable.stepSize; + fHitfinder.landauStepSize = fPars->setup.landauTable.stepSize; fHitfinder.landauTable.reset(nLandauTableEntries, xpu::buf_io); - q.copy(fPars->landauTable.values.data(), fHitfinder.landauTable.get(), nLandauTableEntries * sizeof(float)); + q.copy(fPars->setup.landauTable.values.data(), fHitfinder.landauTable.get(), nLandauTableEntries * sizeof(float)); // Copy transformation matrix fHitfinder.localToGlobalRotationByModule.reset(9 * nModules, xpu::buf_io); fHitfinder.localToGlobalTranslationByModule.reset(3 * nModules, xpu::buf_io); // - Copy matrix into flat array - xpu::h_view hRot {fHitfinder.localToGlobalRotationByModule}; - xpu::h_view hTrans {fHitfinder.localToGlobalTranslationByModule}; + xpu::h_view hRot(fHitfinder.localToGlobalRotationByModule); + xpu::h_view hTrans(fHitfinder.localToGlobalTranslationByModule); for (int m = 0; m < nModules; m++) { - const auto& module = fPars->modules.at(m); + const auto& module = fPars->setup.modules.at(m); std::copy_n(module.localToGlobal.rotation.data(), 9, &hRot[m * 9]); std::copy_n(module.localToGlobal.translation.data(), 3, &hTrans[m * 3]); } @@ -254,9 +295,9 @@ void sts::HitfinderChain::AllocateStatic() // Copy Sensor Parameteres fHitfinder.sensorPars.reset(nModules, xpu::buf_io); - xpu::h_view hSensorPars {fHitfinder.sensorPars}; + xpu::h_view hSensorPars(fHitfinder.sensorPars); for (int m = 0; m < nModules; m++) { - const auto& module = fPars->modules.at(m); + const auto& module = fPars->setup.modules.at(m); auto& gpuPars = hSensorPars[m]; gpuPars.dY = module.dY; gpuPars.pitch = module.pitch; @@ -281,19 +322,21 @@ void sts::HitfinderChain::AllocateDynamic(size_t maxNDigisPerModule, size_t nDig << nDigisTotal << " digis in total"; EnsureParameters(); - xpu::scoped_timer t_ {"Allocate"}; + xpu::scoped_timer t_("Allocate"); // TODO: some of these buffers have a constant size and can be allocated statically. // Just the data they contain is static. // Shorthands for common constants - const int nChannels = fPars->nChannels / 2; // Only count channels on one side of the module - const int nModules = fPars->modules.size(); + const int nChannels = fPars->setup.nChannels / 2; // Only count channels on one side of the module + const int nModules = fPars->setup.modules.size(); const int nModuleSides = 2 * nModules; // Number of module front + backsides + const size_t maxNClustersPerModule = fPars->memory.MaxNClustersPerModule(); + const size_t maxNHitsPerModule = fPars->memory.MaxNHitsPerModule(); - const size_t maxNClustersPerModule = maxNDigisPerModule * Params().sts.memoryLimits.clustersPerDigiModule; - const size_t maxNHitsPerModule = maxNClustersPerModule * Params().sts.memoryLimits.hitsPerClusterModule; + // TODO: this number should be much lower, estimate from max digis per TS + const size_t maxHitsTotal = maxNHitsPerModule * nModules; // Allocate Digi Buffers fHitfinder.digiOffsetPerModule.reset(nModuleSides + 1, xpu::buf_io); @@ -315,95 +358,155 @@ void sts::HitfinderChain::AllocateDynamic(size_t maxNDigisPerModule, size_t nDig fHitfinder.nClustersPerModule.reset(nModuleSides, xpu::buf_io); // Allocate Hit Buffers - fHitfinder.maxHitsPerModule = maxNHitsPerModule; - fHitfinder.hitsPerModule.reset(maxNHitsPerModule * nModules, xpu::buf_io); + fHitfinder.hitsAllocatedPerModule = maxNHitsPerModule; + fHitfinder.hitsPerModule.reset(maxNHitsPerModule * nModules, xpu::buf_device); fHitfinder.nHitsPerModule.reset(nModules, xpu::buf_io); + + fHitfinder.hitsFlatCapacity = maxHitsTotal; + fHitfinder.hitsFlat.reset(maxHitsTotal, xpu::buf_host); } -sts::HitfinderChain::DigiMap sts::HitfinderChain::SortDigisIntoModules(gsl::span<const CbmStsDigi> digis, - size_t& maxNDigisPerModule) +sts::HitfinderChain::DigiMap sts::HitfinderChain::CountDigisPerModules(gsl::span<const CbmStsDigi> digis) { L_(debug) << "STS Hitfinder Chain: Sorting " << digis.size() << " digis into modules"; - xpu::scoped_timer t_ {"Sort Digis"}; + xpu::scoped_timer t_("Count Digis By Module"); + xpu::t_add_bytes(digis.size_bytes()); + int nModules = fPars->setup.modules.size(); + int nModuleSides = nModules * 2; DigiMap digiMap; + digiMap.nDigisPerModule.resize(nModuleSides); - int nChannelsPerSide = fPars->nChannels / 2; + // Create map from module address to index + // This could be done just once in the beginning, + // but overhead is negligible compared to the digi copying + digiMap.addrToIndex.resize(1 << 17, InvalidModule); + for (size_t m = 0; m < nModules; m++) { + const auto& module = fPars->setup.modules[m]; + i32 paddr = CbmStsAddress::PackDigiAddress(module.address); + digiMap.addrToIndex[paddr] = u16(m); + } - auto isPulser = [&](const CbmStsDigi& digi) { - return std::find_if(fPars->modules.begin(), fPars->modules.end(), - [&](const auto& mod) { return mod.address == digi.GetAddress(); }) - == fPars->modules.end(); - }; + int nChannelsPerSide = fPars->setup.nChannels / 2; - size_t nPulsers = 0; + // TODO: try to parallise + // Count digis per module in parallel + // Then calculate offset per module + // Write digis directly to flat array in parallel + // Use atomic add to increment offset per module + digiMap.nDigisPerThread.resize(openmp::GetMaxThreads()); + for (auto& v : digiMap.nDigisPerThread) + v.resize(nModuleSides, 0); - for (const auto& digi : digis) { + CBM_PARALLEL() + { + int threadId = openmp::GetThreadNum(); // Move out of the loop, seems to create small overhead + auto& nDigis = digiMap.nDigisPerThread.at(threadId); - if (isPulser(digi)) { - nPulsers++; - continue; - } + CBM_OMP(for schedule(static)) + for (size_t i = 0; i < digis.size(); i++) { + const auto& digi = digis[i]; + u16 moduleIndex = digiMap.ModuleIndex(digi); - int moduleAddr = CbmStsAddress::GetMotherAddress(digi.GetAddress(), kStsModule); - bool isFront = digi.GetChannel() < fPars->nChannels / 2; - if (isFront) digiMap.front[moduleAddr].emplace_back(digi); - else { - CbmStsDigi tmpdigi = digi; - tmpdigi.SetChannel(tmpdigi.GetChannel() - nChannelsPerSide); - digiMap.back[moduleAddr].emplace_back(tmpdigi); + if (moduleIndex == InvalidModule) continue; + + bool isFront = digi.GetChannel() < nChannelsPerSide; + nDigis[moduleIndex + (isFront ? 0 : nModules)]++; } } - if (nPulsers > 0) L_(warning) << "STS Hitfinder: Discarded " << nPulsers << " pulser digis"; - // Print digi counts per module - for (const auto& mod : fPars->modules) { - i32 moduleAddr = mod.address; - L_(debug) << "Module " << moduleAddr << " has " << digiMap.front[moduleAddr].size() << " front digis and " - << digiMap.back[moduleAddr].size() << " back digis"; + // Sum up digis per module + for (auto& v : digiMap.nDigisPerThread) { + for (size_t m = 0; m < nModuleSides; m++) { + digiMap.nDigisPerModule[m] += v[m]; + } } - maxNDigisPerModule = 0; - for (const auto& mod : digiMap.front) { - maxNDigisPerModule = std::max(maxNDigisPerModule, mod.second.size()); + // if (nPulsers > 0) L_(warning) << "STS Hitfinder: Discarded " << nPulsers << " pulser digis"; + + // Print digi counts per module + for (const auto& mod : fPars->setup.modules) { + // FIXME should use module index here + i32 moduleAddr = CbmStsAddress::PackDigiAddress(mod.address); + u16 moduleIndex = digiMap.addrToIndex[moduleAddr]; + L_(debug) << "Module " << moduleAddr << " has " << digiMap.nDigisPerModule[moduleIndex] << " front digis and " + << digiMap.nDigisPerModule[moduleIndex + nModules] << " back digis"; } - for (const auto& mod : digiMap.back) { - maxNDigisPerModule = std::max(maxNDigisPerModule, mod.second.size()); + digiMap.maxNDigisPerModule = 0; + for (const auto& nDigis : digiMap.nDigisPerModule) { + digiMap.maxNDigisPerModule = std::max(digiMap.maxNDigisPerModule, nDigis); } return digiMap; } -void sts::HitfinderChain::FlattenDigis(DigiMap& digiMap) +void sts::HitfinderChain::FlattenDigis(gsl::span<const CbmStsDigi> digis, DigiMap& digiMap) { L_(debug) << "STS Hitfinder Chain: Flattening digis"; - xpu::scoped_timer t_ {"Flatten Digis"}; - FlattenDigisSide(digiMap.front, true); - FlattenDigisSide(digiMap.back, false); -} + xpu::scoped_timer t_("Flatten Digis"); + xpu::t_add_bytes(digis.size_bytes()); -void sts::HitfinderChain::FlattenDigisSide(DigiMapSide& digis, bool isFront) -{ - EnsureParameters(); + const auto& modules = fPars->setup.modules; - int nModules = fPars->modules.size(); - xpu::h_view pMdigiOffset {fHitfinder.digiOffsetPerModule}; - xpu::h_view pDigisFlat {fHitfinder.digisPerModule}; + int nModules = modules.size(); + int nModuleSides = nModules * 2; + int nChannelsPerSide = fPars->setup.nChannels / 2; + xpu::h_view pDigisFlat(fHitfinder.digisPerModule); // Final input copied the GPU - int sideOffset = (isFront ? 0 : nModules); + xpu::h_view pMdigiOffset(fHitfinder.digiOffsetPerModule); + pMdigiOffset[0] = 0; + for (int m = 1; m < nModuleSides + 1; m++) { + pMdigiOffset[m] = pMdigiOffset[m - 1] + digiMap.nDigisPerModule.at(m - 1); + } - if (isFront) pMdigiOffset[0] = 0; - for (int m = 0; m < nModules; m++) { - size_t moduleOffset = pMdigiOffset[sideOffset + m]; - const auto& moduleDigis = digis[fPars->modules[m].address]; - std::copy(moduleDigis.begin(), moduleDigis.end(), &pDigisFlat[moduleOffset]); - pMdigiOffset[sideOffset + m + 1] = moduleOffset + moduleDigis.size(); + std::vector<std::vector<size_t>> digiOffsetsPerThread(openmp::GetMaxThreads()); + for (auto& v : digiOffsetsPerThread) + v.resize(nModuleSides); + for (size_t i = 1; i < digiOffsetsPerThread.size(); i++) { + for (size_t m = 0; m < nModuleSides; m++) { + for (size_t j = 0; j < i; j++) { + digiOffsetsPerThread[i][m] += digiMap.nDigisPerThread[j][m]; + } + } + } + + CBM_PARALLEL() + { + int threadId = openmp::GetThreadNum(); // Move out of the loop, seems to create small overhead + auto& digiOffsets = digiOffsetsPerThread.at(threadId); + + CBM_OMP(for schedule(static)) + for (size_t i = 0; i < digis.size(); i++) { + const auto& digi = digis[i]; + + u16 moduleIndex = digiMap.ModuleIndex(digi); + + if (moduleIndex == InvalidModule) continue; + + bool isFront = digi.GetChannel() < nChannelsPerSide; + moduleIndex += isFront ? 0 : nModules; + + size_t moduleOffset = pMdigiOffset[moduleIndex]; + size_t digiOffset = digiOffsets[moduleIndex]++; + + pDigisFlat[moduleOffset + digiOffset] = digi; + } + } + + // Channels in Digis are counted 0 to 2047 by default. + // Channel 0 -> 1023 is the front side of the module + // Channel 1024 -> 2047 is the back side of the module + // Cluster finder only operates on one side, so we have to shift the channel numbers + CBM_PARALLEL_FOR(schedule(static)) + for (size_t i = pMdigiOffset[nModules]; i < pMdigiOffset[nModuleSides]; i++) { + auto& digi = pDigisFlat[i]; + digi.SetChannel(digi.GetChannel() - nChannelsPerSide); } } -PartitionedPODVector<sts::Hit> sts::HitfinderChain::FlattenHits(xpu::queue queue) +PartitionedSpan<sts::Hit> sts::HitfinderChain::FlattenHits(xpu::queue queue) { auto& hfc = fHitfinder; xpu::h_view nHits(hfc.nHitsPerModule); @@ -411,8 +514,15 @@ PartitionedPODVector<sts::Hit> sts::HitfinderChain::FlattenHits(xpu::queue queue size_t nHitsTotal = 0; for (size_t m = 0; m < hfc.nModules; m++) nHitsTotal += GetNHits(nHits, m); - L_(info) << "STS Hitfinder Chain: Flattening " << nHitsTotal << " hits"; - PODVector<sts::Hit> hits(nHitsTotal); + L_(debug) << "STS Hitfinder Chain: Flattening " << nHitsTotal << " hits"; + + if (nHitsTotal > hfc.hitsFlatCapacity) + throw ProcessingError("STS Hitfinder Chain: Not enough memory for hits: {} > {}", nHitsTotal, hfc.hitsFlatCapacity); + + // hitBuffer is potentially bigger than nHitsTotal + // So construct a span to the memory region we actually use + xpu::h_view hitBuffer(hfc.hitsFlat); + gsl::span hits(hitBuffer.data(), nHitsTotal); // Transfer Hits / Cluster back to host // Hits and Clusters are stored in buckets. Buckets aren't necessarily full. @@ -434,7 +544,7 @@ PartitionedPODVector<sts::Hit> sts::HitfinderChain::FlattenHits(xpu::queue queue for (size_t i = 0; i < m; i++) { offset += GetNHits(nHits, i); } - std::copy_n(hfc.hitsPerModule.get() + hfc.maxHitsPerModule * m, GetNHits(nHits, m), hits.begin() + offset); + std::copy_n(hfc.hitsPerModule.get() + hfc.hitsAllocatedPerModule * m, GetNHits(nHits, m), hits.begin() + offset); } } else { // GPU @@ -448,20 +558,25 @@ PartitionedPODVector<sts::Hit> sts::HitfinderChain::FlattenHits(xpu::queue queue size_t nHitsCopied = 0; for (size_t m = 0; m < hfc.nModules; m++) { size_t numHitsInModule = GetNHits(nHits, m); - queue.copy(hfc.hitsPerModule.get() + hfc.maxHitsPerModule * m, hits.data() + nHitsCopied, + queue.copy(hfc.hitsPerModule.get() + hfc.hitsAllocatedPerModule * m, hits.data() + nHitsCopied, numHitsInModule * sizeof(sts::Hit)); nHitsCopied += numHitsInModule; } } - std::vector<size_t> nHitsPerModule; + + // Could do this more efficiently, cache addresses once in the beginning, ... + // doesn't really matter overhead wise + fHitOffsets.clear(); + fHitOffsets.push_back(0); for (size_t m = 0; m < hfc.nModules; m++) - nHitsPerModule.push_back(GetNHits(nHits, m)); - std::vector<u32> addresses; - for (auto& m : fPars->modules) - addresses.push_back(m.address); - PartitionedPODVector<sts::Hit> partitionedHits(std::move(hits), nHitsPerModule, addresses); + fHitOffsets.push_back(fHitOffsets.back() + GetNHits(nHits, m)); + + fAddresses = {}; + for (auto& m : fPars->setup.modules) + fAddresses.push_back(m.address); + PartitionedSpan partitionedHits(hits, fHitOffsets, fAddresses); return partitionedHits; } @@ -470,89 +585,20 @@ size_t sts::HitfinderChain::GetNHits(xpu::h_view<int> nHitsPerModule, int module return std::min<size_t>(nHitsPerModule[module], fHitfinder.maxHitsPerModule); } -void sts::HitfinderChain::AppendClustersModule(int module, bool isFront, std::vector<sts::Cluster>& clusters) -{ - EnsureParameters(); - - auto& hfc = fHitfinder; - int moduleSide = (isFront ? module : 2 * module); - xpu::h_view hNClusters {hfc.nClustersPerModule}; - xpu::h_view hClusters {hfc.clusterDataPerModule}; - - int nClusters = hNClusters[moduleSide]; - auto* gpuClusters = &hClusters[moduleSide * hfc.maxClustersPerModule]; - - clusters.reserve(clusters.size() + nClusters); - - for (int i = 0; i < nClusters; i++) { - clusters.emplace_back(gpuClusters[i]); - - // auto& clusterGpu = gpuClusters[i]; - - // auto& clusterOut = clusters.emplace_back(); - - // clusterOut.SetAddress(fPars->modules[module].address); - // clusterOut.SetProperties(clusterGpu.fCharge, clusterGpu.fPosition, clusterGpu.fPositionError, clusterGpu.fTime, - // clusterGpu.fTimeError); - // clusterOut.SetSize(clusterGpu.fSize); - } -} - -void sts::HitfinderChain::AppendHitsModule(int module, std::vector<sts::Hit>& hits) -{ - EnsureParameters(); - - xpu::h_view hHits {fHitfinder.hitsPerModule}; - xpu::h_view hNHits {fHitfinder.nHitsPerModule}; - - auto* gpuHits = &hHits[module * fHitfinder.maxHitsPerModule]; - int nHitsGpu = hNHits[module]; - - hits.reserve(hits.size() + nHitsGpu); - - for (int i = 0; i < nHitsGpu; i++) { - hits.emplace_back(gpuHits[i]); - // auto& hitGpu = gpuHits[i]; - - // hits.emplace_back(fPars->modules[module].address, - // TVector3 {hitGpu.fX, hitGpu.fY, hitGpu.fZ}, - // TVector3 {hitGpu.fDx, hitGpu.fDy, hitGpu.fDz}, - // hitGpu.fDxy, - // 0, - // 0, - // double(hitGpu.fTime), - // hitGpu.fTimeError, - // hitGpu.fDu, - // hitGpu.fDv); - } -} - void sts::HitfinderChain::EnsureDigiOffsets(DigiMap& digi) { - xpu::h_view digiOffset {fHitfinder.digiOffsetPerModule}; + xpu::h_view digiOffset(fHitfinder.digiOffsetPerModule); - size_t nModules = fPars->modules.size(); + size_t nModules = fPars->setup.modules.size(); size_t offset = 0; // Front - for (size_t m = 0; m < nModules; m++) { - const auto& moduleDigis = digi.front[fPars->modules[m].address]; + for (size_t m = 0; m < nModules * 2; m++) { if (digiOffset[m] != offset) { L_(fatal) << "Module " << m << ": Digi offset mismatch: " << digiOffset[m] << " != " << offset; std::abort(); } - offset += moduleDigis.size(); - } - - // Back - for (size_t m = 0; m < nModules; m++) { - const auto& moduleDigis = digi.back[fPars->modules[m].address]; - - if (digiOffset[nModules + m] != offset) { - L_(fatal) << "Module " << nModules + m << ": Digi offset mismatch: " << digiOffset[nModules + m] - << " != " << offset; - std::abort(); - } - offset += moduleDigis.size(); + size_t nDigis = digi.nDigisPerModule[m]; + offset += nDigis; } if (offset != digiOffset[2 * nModules]) { @@ -563,12 +609,12 @@ void sts::HitfinderChain::EnsureDigiOffsets(DigiMap& digi) void sts::HitfinderChain::EnsureDigisSorted() { - xpu::h_view digiOffset {fHitfinder.digiOffsetPerModule}; - xpu::h_view digis {fHitfinder.digisPerModule}; + xpu::h_view digiOffset(fHitfinder.digiOffsetPerModule); + xpu::h_view digis(fHitfinder.digisPerModule); bool isSorted = true; - for (size_t m = 0; m < fPars->modules.size(); m++) { + for (size_t m = 0; m < fPars->setup.modules.size(); m++) { int nDigis = digiOffset[m + 1] - digiOffset[m]; if (nDigis == 0) continue; @@ -599,10 +645,10 @@ void sts::HitfinderChain::EnsureDigisSorted() void sts::HitfinderChain::EnsureChannelOffsets(gsl::span<u32> channelOffsetsByModule) { - xpu::h_view digisPerModule {fHitfinder.digisPerModule}; - xpu::h_view digiOffsetPerModule {fHitfinder.digiOffsetPerModule}; - for (size_t m = 0; m < fPars->modules.size() * 2; m++) { - int nChannels = fPars->nChannels / 2; // Consider module sides + xpu::h_view digisPerModule(fHitfinder.digisPerModule); + xpu::h_view digiOffsetPerModule(fHitfinder.digiOffsetPerModule); + for (size_t m = 0; m < fPars->setup.modules.size() * 2; m++) { + int nChannels = fPars->setup.nChannels / 2; // Consider module sides std::vector<u32> expectedChannelOffsets(nChannels, 0); @@ -644,7 +690,7 @@ void sts::HitfinderChain::EnsureChannelOffsets(gsl::span<u32> channelOffsetsByMo void sts::HitfinderChain::EnsureClustersSane(gsl::span<ClusterIdx> hClusterIdx, gsl::span<int> hNClusters) { - for (size_t m = 0; m < 2 * fPars->modules.size(); m++) { + for (size_t m = 0; m < 2 * fPars->setup.modules.size(); m++) { int nClusters = hNClusters[m]; L_(trace) << "Module " << m << " has " << nClusters << " clusters of " << fHitfinder.maxClustersPerModule; diff --git a/algo/detectors/sts/HitfinderChain.h b/algo/detectors/sts/HitfinderChain.h index da0b8c8470b6184a8cac7cdbab260e50b4e207f5..ddcf955a6543d78c8ecc744052a6ae3ff8c418ef 100644 --- a/algo/detectors/sts/HitfinderChain.h +++ b/algo/detectors/sts/HitfinderChain.h @@ -14,7 +14,7 @@ #include <optional> #include <vector> -#include "PartitionedVector.h" +#include "PartitionedSpan.h" #include "SubChain.h" #include "gpu/xpu_legacy.h" #include "sts/Hitfinder.h" @@ -24,12 +24,9 @@ namespace cbm::algo::sts { - struct HitfinderTimes { - double timeIO = 0; - double timeSortDigi = 0; - double timeCluster = 0; - double timeSortCluster = 0; - double timeHits = 0; + struct HitfinderChainPars { + HitfinderPars setup; // TODO: HitfinderPars should renamed to HitfinderSetup + RecoParams::STS::Memory memory; }; /** @@ -42,12 +39,12 @@ namespace cbm::algo::sts public: struct Result { - PartitionedPODVector<sts::Hit> hits; + PartitionedSpan<sts::Hit> hits; sts::HitfinderMonitor monitor; }; - void SetParameters(const sts::HitfinderPars& parameters); - const sts::HitfinderPars& GetParameters() const { return *fPars; } + void SetParameters(const HitfinderChainPars& parameters); + const HitfinderChainPars& GetParameters() const { return *fPars; } /** * Teardown chain. @@ -57,11 +54,22 @@ namespace cbm::algo::sts Result operator()(gsl::span<const CbmStsDigi>); private: - // Shorthands to map module-address onto digis in that module - using DigiMapSide = std::map<int, std::vector<CbmStsDigi>>; + static constexpr u16 InvalidModule = 0xFFFF; + struct DigiMap { - DigiMapSide front; - DigiMapSide back; + //< Map module address to index in digis array. Only works for 17 bit packed addresses. + std::vector<u16> addrToIndex; + // Map modules to number of Digis, 2 * NModules entries, first half is front, second half is back + std::vector<size_t> nDigisPerModule; + std::vector<std::vector<size_t>> nDigisPerThread; //< Number of digis per module per thread [thread][module] + size_t maxNDigisPerModule = 0; //< Upper bound on number of digis per module + + u16 ModuleIndex(const CbmStsDigi& digi) const + { + i32 moduleAddr = digi.GetAddressPacked(); + u16 moduleIndex = addrToIndex[moduleAddr]; + return moduleIndex; + } }; /** @@ -82,20 +90,19 @@ namespace cbm::algo::sts void AllocateDynamic(size_t, size_t); /** - * Sort Digis into buckets by module. - */ - DigiMap SortDigisIntoModules(gsl::span<const CbmStsDigi> digis, size_t& maxNDigisPerModule); + * Count Digis per module. + */ + DigiMap CountDigisPerModules(gsl::span<const CbmStsDigi> digis); /** - * Copy Digis into flat array that can be copied to the GPU. - */ - void FlattenDigis(DigiMap& digiMap); - void FlattenDigisSide(DigiMapSide& digis, bool isFront); + * Copy Digis into flat array that can be copied to the GPU. + */ + void FlattenDigis(gsl::span<const CbmStsDigi> digis, DigiMap& digiMap); /** * Copy Hits back to host into a flat array. - */ - PartitionedPODVector<sts::Hit> FlattenHits(xpu::queue); + */ + PartitionedSpan<sts::Hit> FlattenHits(xpu::queue); /** * @brief Returns the number of hits of a module. @@ -104,12 +111,6 @@ namespace cbm::algo::sts */ size_t GetNHits(xpu::h_view<int> nHitsPerModule, int module); - /** - * Transfer Hits / Clusters back to host and convert to common CBM types. - */ - void AppendClustersModule(int module, bool isFront, std::vector<sts::Cluster>&); - void AppendHitsModule(int module, std::vector<sts::Hit>&); - // Debug functions, ensure reco produces sane results void EnsureDigiOffsets(DigiMap&); void EnsureDigisSorted(); @@ -117,9 +118,13 @@ namespace cbm::algo::sts void EnsureClustersSane(gsl::span<ClusterIdx>, gsl::span<int>); void EnsureClustersSorted(); - std::optional<const sts::HitfinderPars> fPars; + std::optional<sts::HitfinderChainPars> fPars; Hitfinder fHitfinder; + + // Output buffer, used by the returned PartitionedSpan + std::vector<u32> fAddresses; + std::vector<size_t> fHitOffsets; }; } // namespace cbm::algo::sts diff --git a/algo/evbuild/EventbuildChain.cxx b/algo/evbuild/EventbuildChain.cxx index fe8cc5b599b3a1690226b0e10cbf5195015ca7da..6d2af6325aff0c8e8939e992e2aecf8a7f0bc44f 100644 --- a/algo/evbuild/EventbuildChain.cxx +++ b/algo/evbuild/EventbuildChain.cxx @@ -6,13 +6,6 @@ #include "CbmDigiTimeslice.h" -#include <boost/archive/binary_oarchive.hpp> -#include <boost/iostreams/device/array.hpp> -#include <boost/iostreams/device/back_inserter.hpp> -#include <boost/iostreams/stream.hpp> -#include <boost/serialization/utility.hpp> -#include <boost/serialization/vector.hpp> - #include <sstream> #include <string> @@ -23,63 +16,50 @@ using namespace cbm::algo; using namespace cbm::algo::evbuild; -namespace b_io = boost::iostreams; -namespace b_ar = boost::archive; // ----- Constructor ------------------------------------------------------ -EventbuildChain::EventbuildChain(const Config& config) +EventbuildChain::EventbuildChain(const Config& config, std::shared_ptr<HistogramSender> sender) : fTriggerDet(config.fTrigger.Detector()) , fTrigger(config.fTrigger) , fBuilder(config.fBuilder) , fSelector(config.fSelector) , fQa(DigiEventQaConfig(config.fBuilder, 10., 100)) + , fSender(sender) { Status(); - if ("" != fHistComChan) { - fZmqContext = new zmq::context_t(1); - fZmqSocket = new zmq::socket_t(*fZmqContext, ZMQ_PUSH); - fZmqSocket->connect(fHistComChan); // This side "connects" to socket => Other side should have "bind"!!!! - + if (fSender) { /// FIXME: based on JdC question, decide whether config re-emitted on each iteration instead of only at startup? /// => Header for multi-part message with Configuration + data /// => Format: std::pair< Nb histogram configs, Nb canvas configs > std::vector<std::pair<std::string, std::string>> histsCfg = fQa.GetConfig().GetHistosConfigs(); std::vector<std::pair<std::string, std::string>> canvsCfg = fQa.GetConfig().GetCanvasConfigs(); - PrepareAndSendMsg(std::pair<uint32_t, uint32_t>(histsCfg.size(), canvsCfg.size()), zmq::send_flags::sndmore); + fSender->PrepareAndSendMsg(std::pair<uint32_t, uint32_t>(histsCfg.size(), canvsCfg.size()), + zmq::send_flags::sndmore); /// => Histograms configuration = destination folder in http browser, mandatory but can be empty (= root folder) /// => 1 ZMQ message per histogram (= 1 part) /// => If no (new) histograms declared (e.g. new canvas declaration), has to be en empty message + `0` in the header for (const auto& cfg : histsCfg) { - PrepareAndSendMsg(cfg, zmq::send_flags::sndmore); + fSender->PrepareAndSendMsg(cfg, zmq::send_flags::sndmore); } /// => Canvas configuration /// => 1 ZMQ message per canvas (= 1 part) /// => If no (new) canvas declared (e.g. only histos declaration), has to be en empty message + `0` in the header for (const auto& cfg : canvsCfg) { - PrepareAndSendMsg(cfg, zmq::send_flags::sndmore); + fSender->PrepareAndSendMsg(cfg, zmq::send_flags::sndmore); } /// => (empty) Histograms serialization and emission to close multi-part message - PrepareAndSendMsg(std::vector<Histo1D> {}, zmq::send_flags::none); + fSender->PrepareAndSendMsg(std::vector<Histo1D> {}, zmq::send_flags::none); } } // ---------------------------------------------------------------------------- -// ----- Constructor ------------------------------------------------------ +// ----- Destructor ------------------------------------------------------ EventbuildChain::~EventbuildChain() { - if ("" != fHistComChan) { - if (fZmqSocket) { - fZmqSocket->close(); - delete fZmqSocket; - } - if (fZmqContext) { // - delete fZmqContext; - } - } } // ---------------------------------------------------------------------------- @@ -97,11 +77,11 @@ EventbuildChain::ResultType EventbuildChain::Run(const DigiData& timeslice) auto [events, evbuildMon] = fBuilder(timeslice, triggers, fSelector); /// => Histograms serialization and emission - if ("" != fHistComChan) { + if (fSender) { // --- Run event QA DigiEventQaData qaData = fQa(events); - PrepareAndSendMsg(qaData.fVectHistos, zmq::send_flags::none); + fSender->PrepareAndSendMsg(qaData.fVectHistos, zmq::send_flags::none); L_(info) << "Published histograms, nb: " << qaData.fVectHistos.size(); } @@ -202,24 +182,3 @@ std::vector<double> EventbuildChain::GetDigiTimes(const DigiData& timeslice, ECb return result; } // ---------------------------------------------------------------------------- - - -// ----- Send a message to the histogram server ---------------------------- -template<class Object> -void EventbuildChain::PrepareAndSendMsg(const Object& obj, zmq::send_flags flags) -{ - /// Needed ressources (serializd string, boost inserter, boost stream, boost binary output archive) - std::string serial_str; - b_io::back_insert_device<std::string> inserter(serial_str); - b_io::stream<b_io::back_insert_device<std::string>> bstream(inserter); - b_ar::binary_oarchive oa(bstream); - - serial_str.clear(); - oa << obj; - bstream.flush(); - - zmq::message_t msg(serial_str.size()); - std::copy_n(static_cast<const char*>(serial_str.data()), msg.size(), static_cast<char*>(msg.data())); - fZmqSocket->send(msg, flags); -} -// ---------------------------------------------------------------------------- diff --git a/algo/evbuild/EventbuildChain.h b/algo/evbuild/EventbuildChain.h index 4c50976cbf8eee6cea7f2aa1f78822e30baede53..6b4676383168f46f53474b9ed53be54c0c7511d0 100644 --- a/algo/evbuild/EventbuildChain.h +++ b/algo/evbuild/EventbuildChain.h @@ -9,11 +9,12 @@ #include "TimeClusterTrigger.h" -#include <zmq.hpp> +#include <memory> #include "DigiEventQa.h" #include "DigiEventSelector.h" #include "EventBuilder.h" +#include "HistogramSender.h" #include "SubChain.h" namespace cbm::algo @@ -47,7 +48,7 @@ namespace cbm::algo::evbuild using ResultType = std::pair<std::vector<DigiEvent>, EventbuildChainMonitorData>; /** @brief Constructor **/ - EventbuildChain(const Config& config); + EventbuildChain(const Config& config, std::shared_ptr<HistogramSender> sender = nullptr); /** @brief Destructor **/ ~EventbuildChain(); @@ -58,20 +59,13 @@ namespace cbm::algo::evbuild /** @brief Status info to logger **/ void Status() const; - private: // members ECbmModuleId fTriggerDet = ECbmModuleId::kNotExist; ///< Trigger detector TimeClusterTrigger fTrigger; ///< Trigger algorithm EventBuilder fBuilder; ///< Event builder algorithm DigiEventSelector fSelector; ///< Event selector algorithm DigiEventQa fQa; ///< Event QA algorithm - - /// FIXME: decide if the address and port of the histogram server should come from command line or YAML - //std::string fHistComChan = ""; - std::string fHistComChan = "tcp://127.0.0.1:56800"; - zmq::context_t* fZmqContext; ///< ZMQ context FIXME: should be only one context per binary! - zmq::socket_t* fZmqSocket; ///< ZMQ socket to histogram server - + std::shared_ptr<HistogramSender> fSender; ///< Histogram sender private: // methods /** @brief Extract digi times from CbmDigiTimeslice @@ -80,12 +74,6 @@ namespace cbm::algo::evbuild **/ std::vector<double> GetDigiTimes(const DigiData& timeslice, ECbmModuleId system); - /** @brief Serialize object and send it to the histogram server - ** @param obj: object to be serialized in the message, e.g. config pairs of strings or Histo1D - ** @param flags: or'ed values from zmq::send_flags, typ. zmq::send_flags::sndmore to indicate multi-parts message - **/ - template<class Object> - void PrepareAndSendMsg(const Object& obj, zmq::send_flags flags); }; } // namespace cbm::algo::evbuild diff --git a/algo/global/Reco.cxx b/algo/global/Reco.cxx index d3f03cfa46ef1a9e73b5145074e5867c314e2234..c6ab97da629207f37c61e9369783b32c068de35c 100644 --- a/algo/global/Reco.cxx +++ b/algo/global/Reco.cxx @@ -47,6 +47,11 @@ void Reco::Init(const Options& opts) fStsHitFinder.SetContext(&fContext); fTracking.SetContext(&fContext); + if (Opts().HistogramUri() != "") { + fSender = std::make_shared<HistogramSender>(Opts().HistogramUri()); + // fContext.sender = fSender; + } + xpu::device_prop props {xpu::device::active()}; L_(info) << "Running CBM Reco on Device '" << props.name() << "' (Using " << openmp::GetMaxThreads() << " OpenMP threads)"; @@ -67,14 +72,17 @@ void Reco::Init(const Options& opts) // --- Event building fs::path configFile = opts.ParamsDir() / "EventbuildConfig.yaml"; evbuild::Config config(YAML::LoadFile(configFile.string())); - fEventBuild = std::make_unique<evbuild::EventbuildChain>(config); + fEventBuild = std::make_unique<evbuild::EventbuildChain>(config, fSender); // STS Hitfinder fs::path stsHitfinderParamsPath = opts.ParamsDir() / "StsHitfinder.yaml"; yaml = YAML::LoadFile(stsHitfinderParamsPath.string()); - sts::HitfinderPars hitFinderConfig = config::Read<sts::HitfinderPars>(yaml); - hitFinderConfig.landauTable = sts::LandauTable::FromFile(opts.ParamsDir() / "LandauWidthTable.txt"); - fStsHitFinder.SetParameters(hitFinderConfig); + sts::HitfinderPars hitFinderSetup = config::Read<sts::HitfinderPars>(yaml); + hitFinderSetup.landauTable = sts::LandauTable::FromFile(opts.ParamsDir() / "LandauWidthTable.txt"); + sts::HitfinderChainPars hitFinderPars; + hitFinderPars.setup = std::move(hitFinderSetup); + hitFinderPars.memory = Params().sts.memory; + fStsHitFinder.SetParameters(hitFinderPars); // Tracking fTracking.Init(); @@ -89,70 +97,69 @@ void Reco::Init(const Options& opts) RecoResults Reco::Run(const fles::Timeslice& ts) { - if (!fInitialized) { throw std::runtime_error("Chain not initialized"); } + if (!fInitialized) throw std::runtime_error("Chain not initialized"); for (uint64_t comp = 0; comp < ts.num_components(); comp++) { xpu::t_add_bytes(ts.size_component(comp)); } - xpu::push_timer(fmt::format("TS {}", ts.index())); - - L_(info) << ">>> Processing TS " << ts.index(); - xpu::set<cbm::algo::Params>(Params()); - - Unpack::resultType unpackResult; - UnpackMonitorData unpackMonitor; - - if (Opts().HasStep(Step::Unpack)) { - switch (Params().sts.unpackMode) { - case RecoParams::UnpackMode::XPU: - // digis = fUnpackXpu.Exec(ts); - throw std::runtime_error("XPU unpacker currently not implemented"); - break; - default: - case RecoParams::UnpackMode::CPU: - unpackResult = fUnpack.Run(ts); - unpackMonitor = unpackResult.second; - break; + RecoResults results; + xpu::timings tsTimes; + { + xpu::scoped_timer t_(fmt::format("TS {}", ts.index()), &tsTimes); + + L_(info) << ">>> Processing TS " << ts.index(); + xpu::set<cbm::algo::Params>(Params()); + + Unpack::resultType unpackResult; + UnpackMonitorData unpackMonitor; + + if (Opts().HasStep(Step::Unpack)) { + switch (Params().sts.unpackMode) { + case RecoParams::UnpackMode::XPU: + // digis = fUnpackXpu.Exec(ts); + throw std::runtime_error("XPU unpacker currently not implemented"); + break; + default: + case RecoParams::UnpackMode::CPU: + unpackResult = fUnpack.Run(ts); + unpackMonitor = unpackResult.second; + break; + } } - } - - // --- Digi event building - std::vector<DigiEvent> events; - evbuild::EventbuildChainMonitorData evbuildMonitor; - if (Opts().HasStep(Step::DigiTrigger)) { - auto [ev, mon] = fEventBuild->Run(unpackResult.first); - events = std::move(ev); - evbuildMonitor = mon; - } - - sts::HitfinderMonitor stsHitfinderMonitor; - PartitionedPODVector<sts::Hit> stsHits; - if (Opts().HasStep(Step::LocalReco) && Opts().HasDetector(fles::Subsystem::STS)) { - auto stsResults = fStsHitFinder(unpackResult.first.fSts); - stsHits = std::move(stsResults.hits); - stsHitfinderMonitor = stsResults.monitor; - } + // --- Digi event building + std::vector<DigiEvent> events; + evbuild::EventbuildChainMonitorData evbuildMonitor; + if (Opts().HasStep(Step::DigiTrigger)) { + auto [ev, mon] = fEventBuild->Run(unpackResult.first); + events = std::move(ev); + evbuildMonitor = mon; + } - xpu::timings ts_times = xpu::pop_timer(); + sts::HitfinderMonitor stsHitfinderMonitor; + PartitionedSpan<sts::Hit> stsHits; + if (Opts().HasStep(Step::LocalReco) && Opts().HasDetector(fles::Subsystem::STS)) { + auto stsResults = fStsHitFinder(unpackResult.first.fSts); + stsHits = std::move(stsResults.hits); + stsHitfinderMonitor = stsResults.monitor; + } - QueueUnpackerMetrics(ts, unpackMonitor, unpackResult.first); - QueueStsRecoMetrics(stsHitfinderMonitor); - QueueEvbuildMetrics(evbuildMonitor); + // --- Tracking + if (Opts().HasStep(Step::Tracking)) { + TrackingChain::Return_t trackingOutput = fTracking.Run(results); + if (Opts().HasOutput(RecoData::Track)) { results.tracks = std::move(trackingOutput.first); } + QueueTrackingMetrics(trackingOutput.second); + } - PrintTimings(ts_times); + QueueUnpackerMetrics(ts, unpackMonitor, unpackResult.first); + QueueStsRecoMetrics(stsHitfinderMonitor); + QueueEvbuildMetrics(evbuildMonitor); - RecoResults results; - if (Opts().HasOutput(RecoData::DigiEvent)) results.events = std::move(events); - if (Opts().HasOutput(RecoData::Hit)) results.stsHits = std::move(stsHits); - - // --- Tracking - if (Opts().HasStep(Step::Tracking)) { - TrackingChain::Return_t trackingOutput = fTracking.Run(results); - if (Opts().HasOutput(RecoData::Track)) { results.tracks = std::move(trackingOutput.first); } - QueueTrackingMetrics(trackingOutput.second); + if (Opts().HasOutput(RecoData::DigiEvent)) results.events = std::move(events); + if (Opts().HasOutput(RecoData::Hit)) results.stsHits = std::move(stsHits); } + PrintTimings(tsTimes); return results; } diff --git a/algo/global/Reco.h b/algo/global/Reco.h index 83efd74ace93014a6dafd71fd3bff78b1dbbaf91..7932985b6bb93d755d58416755a708690fbe74ba 100644 --- a/algo/global/Reco.h +++ b/algo/global/Reco.h @@ -7,6 +7,7 @@ #include <xpu/host.h> #include "EventbuildChain.h" +#include "HistogramSender.h" #include "SubChain.h" #include "UnpackChain.h" #include "ca/TrackingChain.h" @@ -41,6 +42,7 @@ namespace cbm::algo bool fInitialized = false; ChainContext fContext; xpu::timings fTimesliceTimesAcc; + std::shared_ptr<HistogramSender> fSender; size_t prevTsId = 0; diff --git a/algo/global/RecoResults.h b/algo/global/RecoResults.h index 0a2a3bbced9381de36b268bb371904910af2ca7f..a8ed4e5d741d67623e8c3d7ee82429729d319291 100644 --- a/algo/global/RecoResults.h +++ b/algo/global/RecoResults.h @@ -12,7 +12,7 @@ #include <vector> #include "DigiData.h" -#include "base/PartitionedVector.h" +#include "PartitionedSpan.h" #include "ca/core/utils/CaVector.h" #include "data/sts/Hit.h" @@ -22,7 +22,7 @@ namespace cbm::algo /// \brief Structure to keep reconstructed results: digi-events, hits and tracks struct RecoResults { std::vector<DigiEvent> events; - PartitionedPODVector<sts::Hit> stsHits; + PartitionedSpan<sts::Hit> stsHits; ca::Vector<ca::Track> tracks; }; } // namespace cbm::algo diff --git a/algo/global/StorableRecoResults.h b/algo/global/StorableRecoResults.h index c29d434c8dbffd5309395fd01927f5a1edc51b8b..bc1f089d88ef670b33cf87b9070d351d54d2c274 100644 --- a/algo/global/StorableRecoResults.h +++ b/algo/global/StorableRecoResults.h @@ -11,6 +11,8 @@ #include <cstdint> +#include "PartitionedVector.h" + namespace cbm::algo { diff --git a/algo/params/RecoParams.yaml b/algo/params/RecoParams.yaml index 0e2f4456779e021b01accb18d66e58d3dca74ca2..9fc82381041d55089cc6f5e6e9379b8bf871cbfa 100644 --- a/algo/params/RecoParams.yaml +++ b/algo/params/RecoParams.yaml @@ -9,14 +9,10 @@ sts: timeCutClusterAbs: -1 timeCutClusterSig: 4 - memoryLimits: - maxDigisPerTS: 80000000 - # TODO: Find sensible value! - maxDigisPerMS: 100000 - # TODO: Find sensible value - maxDigisPerModule: 4000000 - clustersPerDigiTS: 1.0 - clustersPerDigiModule: 1.0 - hitsPerClusterTS: 1.0 - hitsPerClusterModule: 0.85 + memory: + allocationMode: Auto + maxNDigisPerTS: 30000000 + maxNDigisPerModule: 3000000 + clustersPerDigi: 1.0 + hitsPerCluster: 0.85 ... diff --git a/algo/test/CMakeLists.txt b/algo/test/CMakeLists.txt index 6eeb86a8a18d468faf7907d6f337fefc1c6f45f9..dcd036d4c90c71418e2a1fcda1aa19cf18173c71 100644 --- a/algo/test/CMakeLists.txt +++ b/algo/test/CMakeLists.txt @@ -57,3 +57,7 @@ Set(PartitionedVectorSources ) CreateGTestExeAndAddTest(_GTestPartitionedVector "${INCLUDE_DIRECTORIES}" "${LINK_DIRECTORIES}" "${PartitionedVectorSources}" "${PUB_DEPS}" "${PVT_DEPS}" "${INT_DEPS}" "") + +Set(PartitionedSpanSources _GTestPartitionedSpan.cxx) +CreateGTestExeAndAddTest(_GTestPartitionedSpan "${INCLUDE_DIRECTORIES}" "${LINK_DIRECTORIES}" + "${PartitionedSpanSources}" "${PUB_DEPS}" "${PVT_DEPS}" "${INT_DEPS}" "") diff --git a/algo/test/_GTestPartitionedSpan.cxx b/algo/test/_GTestPartitionedSpan.cxx new file mode 100644 index 0000000000000000000000000000000000000000..7afc70f50d6eb6a1728e606dab79701f62c0e032 --- /dev/null +++ b/algo/test/_GTestPartitionedSpan.cxx @@ -0,0 +1,122 @@ +/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main + SPDX-License-Identifier: GPL-3.0-only + Authors: Felix Weiglhofer [committer] */ +#include <gtest/gtest.h> + +#include "PartitionedSpan.h" +#include "PartitionedVector.h" + +using namespace cbm::algo; + +template<typename T> +void EXPECT_CONTAINER_EQ(gsl::span<T> a, std::vector<T> b) +{ + EXPECT_EQ(a.size(), b.size()); + for (size_t i = 0; i < a.size(); ++i) { + EXPECT_EQ(a[i], b[i]); + } +} + +class PartitionedSpanTest : public ::testing::Test { +protected: + std::vector<i32> fData; + std::vector<size_t> fSizes; + std::vector<size_t> fOffsets; + std::vector<size_t> fOffsetsInvalid; + std::vector<u32> fAddresses; + std::vector<u32> fAddressesInvalid; + + void SetUp() override + { + fData = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + fSizes = {2, 4, 3}; + fOffsets = {0, 2, 6, 9}; + fOffsetsInvalid = {0, 2, 6}; + fAddresses = {0x0, 0x100, 0x200}; + fAddressesInvalid = {0x0, 0x100}; + } + + void Ensure(const PartitionedSpan<i32>& vec) + { + EXPECT_EQ(vec.NElements(), 9); + EXPECT_EQ(vec.NPartitions(), 3); + + EXPECT_EQ(vec.Size(0), 2); + + EXPECT_EQ(vec.Size(0), 2); + EXPECT_EQ(vec[0].size(), 2); + EXPECT_EQ(vec.Address(0), 0x0); + EXPECT_CONTAINER_EQ(vec[0], {1, 2}); + + auto part = vec.Partition(0); + EXPECT_EQ(part.first.size(), vec.Size(0)); + EXPECT_EQ(part.second, vec.Address(0)); + + EXPECT_EQ(vec.Size(1), 4); + EXPECT_EQ(vec[1].size(), 4); + EXPECT_EQ(vec.Address(1), 0x100); + EXPECT_CONTAINER_EQ(vec[1], {3, 4, 5, 6}); + + part = vec.Partition(1); + EXPECT_EQ(part.first.size(), vec.Size(1)); + EXPECT_EQ(part.second, vec.Address(1)); + + EXPECT_EQ(vec.Size(2), 3); + EXPECT_EQ(vec[2].size(), 3); + EXPECT_EQ(vec.Address(2), 0x200); + EXPECT_CONTAINER_EQ(vec[2], {7, 8, 9}); + + part = vec.Partition(2); + EXPECT_EQ(part.first.size(), vec.Size(2)); + EXPECT_EQ(part.second, vec.Address(2)); + } +}; + +TEST_F(PartitionedSpanTest, IsDefaultConstructable) +{ + PartitionedSpan<i32> vec; + EXPECT_EQ(vec.NElements(), 0); + EXPECT_EQ(vec.NPartitions(), 0); +} + +TEST_F(PartitionedSpanTest, CanCreateWithPartitions) +{ + PartitionedSpan vec(fData, fOffsets, fAddresses); + Ensure(vec); +} + +TEST_F(PartitionedSpanTest, IsConstructableFromPartitionedVector) +{ + PartitionedVector vec(std::move(fData), fSizes, fAddresses); + + PartitionedSpan span(vec); + Ensure(span); +} + +TEST_F(PartitionedSpanTest, IsCopyConstructable) +{ + PartitionedSpan vec(fData, fOffsets, fAddresses); + + PartitionedSpan vecCopy(vec); + Ensure(vecCopy); +} + +TEST_F(PartitionedSpanTest, ThrowsOnNumAddressesMismatchesNumPartions) +{ + EXPECT_THROW(PartitionedSpan vec(fData, fOffsets, fAddressesInvalid), std::runtime_error); +} + +TEST_F(PartitionedSpanTest, ThrowsOnOffsetsMismatchesDataSize) +{ + EXPECT_THROW(PartitionedSpan vec(fData, fOffsetsInvalid, fAddresses), std::runtime_error); +} + +TEST_F(PartitionedSpanTest, ThrowsOnOutOfBounds) +{ + PartitionedSpan vec(fData, fOffsets, fAddresses); + + EXPECT_THROW(vec[3], std::out_of_range); + EXPECT_THROW(vec.Partition(3), std::out_of_range); + EXPECT_THROW(vec.Address(3), std::out_of_range); + EXPECT_THROW(vec.Size(3), std::out_of_range); +} diff --git a/algo/test/_GTestPartitionedVector.cxx b/algo/test/_GTestPartitionedVector.cxx index 41dca1e213cada52c16459df9c274b539caecc2e..0d9be0cf31c99f8001a1c3c4eabb0819224c6ec3 100644 --- a/algo/test/_GTestPartitionedVector.cxx +++ b/algo/test/_GTestPartitionedVector.cxx @@ -9,7 +9,7 @@ using namespace cbm::algo; template<typename T> -void EXPECT_CONTAINER_EQ(gsl::span<T> a, std::vector<T> b) +void EXPECT_CONTAINER_EQ(gsl::span<const T> a, std::vector<T> b) { EXPECT_EQ(a.size(), b.size()); for (size_t i = 0; i < a.size(); ++i) { @@ -17,77 +17,83 @@ void EXPECT_CONTAINER_EQ(gsl::span<T> a, std::vector<T> b) } } -TEST(PartitionedVector, CanCreateEmpty) +class PartitionedVectorTest : public ::testing::Test { +protected: + std::vector<i32> fData; + std::vector<size_t> fSizes; + std::vector<size_t> fSizesInvalid; + std::vector<u32> fAddresses; + std::vector<u32> fAddressesInvalid; + + void SetUp() override + { + fData = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + fSizes = {2, 4, 3}; + fSizesInvalid = {2, 4, 4}; + fAddresses = {0x0, 0x100, 0x200}; + fAddressesInvalid = {0x0, 0x100}; + } + + void Ensure(const PartitionedVector<i32>& vec) + { + EXPECT_EQ(vec.NElements(), 9); + EXPECT_EQ(vec.NPartitions(), 3); + + EXPECT_EQ(vec.Size(0), 2); + EXPECT_EQ(vec[0].size(), 2); + EXPECT_EQ(vec.Address(0), 0x0); + EXPECT_CONTAINER_EQ(vec[0], {1, 2}); + + auto part = vec.Partition(0); + EXPECT_EQ(part.first.size(), vec.Size(0)); + EXPECT_EQ(part.second, vec.Address(0)); + + EXPECT_EQ(vec.Size(1), 4); + EXPECT_EQ(vec[1].size(), 4); + EXPECT_EQ(vec.Address(1), 0x100); + EXPECT_CONTAINER_EQ(vec[1], {3, 4, 5, 6}); + + part = vec.Partition(1); + EXPECT_EQ(part.first.size(), vec.Size(1)); + EXPECT_EQ(part.second, vec.Address(1)); + + EXPECT_EQ(vec.Size(2), 3); + EXPECT_EQ(vec[2].size(), 3); + EXPECT_EQ(vec.Address(2), 0x200); + EXPECT_CONTAINER_EQ(vec[2], {7, 8, 9}); + + part = vec.Partition(2); + EXPECT_EQ(part.first.size(), vec.Size(2)); + EXPECT_EQ(part.second, vec.Address(2)); + } +}; + +TEST_F(PartitionedVectorTest, IsDefaultConstructable) { PartitionedVector<i32> vec; EXPECT_EQ(vec.NElements(), 0); EXPECT_EQ(vec.NPartitions(), 0); } -TEST(PartitionedVector, CanCreateWithPartitions) +TEST_F(PartitionedVectorTest, CanCreateWithPartitions) { - std::vector<i32> data = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - std::vector<size_t> sizes = {2, 4, 3}; - std::vector<u32> addresses = {0x0, 0x100, 0x200}; - - PartitionedVector vec(std::move(data), sizes, addresses); - - EXPECT_EQ(vec.NElements(), 9); - EXPECT_EQ(vec.NPartitions(), 3); - - EXPECT_EQ(vec.Size(0), 2); - EXPECT_EQ(vec[0].size(), 2); - EXPECT_EQ(vec.Address(0), 0x0); - EXPECT_CONTAINER_EQ(vec[0], {1, 2}); - - auto part = vec.Partition(0); - EXPECT_EQ(part.first.size(), vec.Size(0)); - EXPECT_EQ(part.second, vec.Address(0)); - - EXPECT_EQ(vec.Size(1), 4); - EXPECT_EQ(vec[1].size(), 4); - EXPECT_EQ(vec.Address(1), 0x100); - EXPECT_CONTAINER_EQ(vec[1], {3, 4, 5, 6}); - - part = vec.Partition(1); - EXPECT_EQ(part.first.size(), vec.Size(1)); - EXPECT_EQ(part.second, vec.Address(1)); - - EXPECT_EQ(vec.Size(2), 3); - EXPECT_EQ(vec[2].size(), 3); - EXPECT_EQ(vec.Address(2), 0x200); - EXPECT_CONTAINER_EQ(vec[2], {7, 8, 9}); - - part = vec.Partition(2); - EXPECT_EQ(part.first.size(), vec.Size(2)); - EXPECT_EQ(part.second, vec.Address(2)); + PartitionedVector vec(std::move(fData), fSizes, fAddresses); + Ensure(vec); } -TEST(PartitionedVector, ThrowsOnAddressSizesMismatch) +TEST_F(PartitionedVectorTest, ThrowsOnNumAddressesMismatchesNumPartions) { - std::vector<i32> data = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - std::vector<size_t> sizes = {2, 4, 3}; - std::vector<u32> addresses = {0x0, 0x100}; - - EXPECT_THROW(PartitionedVector vec(std::move(data), sizes, addresses), std::runtime_error); + EXPECT_THROW(PartitionedVector vec(std::move(fData), fSizes, fAddressesInvalid), std::runtime_error); } -TEST(PartitionedVector, ThrowsOnSizesMismatch) +TEST_F(PartitionedVectorTest, ThrowsOnSizesMismatchesDataSize) { - std::vector<i32> data = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - std::vector<size_t> sizes = {2, 3, 3}; - std::vector<u32> addresses = {0x0, 0x100, 0x200}; - - EXPECT_THROW(PartitionedVector vec(std::move(data), sizes, addresses), std::runtime_error); + EXPECT_THROW(PartitionedVector vec(std::move(fData), fSizesInvalid, fAddresses), std::runtime_error); } -TEST(PartitionedVector, ThrowsOnOutOfBounds) +TEST_F(PartitionedVectorTest, ThrowsOnOutOfBounds) { - std::vector<i32> data = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - std::vector<size_t> sizes = {2, 4, 3}; - std::vector<u32> addresses = {0x0, 0x100, 0x200}; - - PartitionedVector vec(std::move(data), sizes, addresses); + PartitionedVector vec(std::move(fData), fSizes, fAddresses); EXPECT_THROW(vec[3], std::out_of_range); EXPECT_THROW(vec.Partition(3), std::out_of_range); @@ -95,4 +101,10 @@ TEST(PartitionedVector, ThrowsOnOutOfBounds) EXPECT_THROW(vec.Size(3), std::out_of_range); } -TEST(PartitionedVector, IsCopyConstructable) {} +TEST_F(PartitionedVectorTest, IsCopyConstructable) +{ + PartitionedVector vec(std::move(fData), fSizes, fAddresses); + + PartitionedVector vecCopy(vec); + Ensure(vecCopy); +} diff --git a/core/data/sts/CbmStsDigi.h b/core/data/sts/CbmStsDigi.h index caf5e8289a597ba059119a8cbef9ee2b9adc52da..08e39172edb528458facda87af26e664f1a9bf46 100644 --- a/core/data/sts/CbmStsDigi.h +++ b/core/data/sts/CbmStsDigi.h @@ -74,6 +74,13 @@ public: XPU_D int32_t GetAddress() const { return UnpackAddress(); } + XPU_D int32_t GetAddressPacked() const + { + int32_t highestBitAddr = fTime >> kNumTimestampBits; + int32_t packedAddress = (highestBitAddr << kNumLowerAddrBits) | int32_t(fAddress); + return packedAddress; + } + /** @brief Get the desired name of the branch for this obj in the cbm output tree (static) ** @return "StsDigi" **/ @@ -195,8 +202,7 @@ private: XPU_D int32_t UnpackAddress() const { - int32_t highestBitAddr = fTime >> kNumTimestampBits; - int32_t packedAddress = (highestBitAddr << kNumLowerAddrBits) | int32_t(fAddress); + int32_t packedAddress = GetAddressPacked(); return CbmStsAddress::UnpackDigiAddress(packedAddress); } diff --git a/reco/app/cbmreco/main.cxx b/reco/app/cbmreco/main.cxx index f0f69c7e07056caf09ce4a505b17deca980feabe..4e9fb1e9564472369e0395d836f75f56a87c82de 100644 --- a/reco/app/cbmreco/main.cxx +++ b/reco/app/cbmreco/main.cxx @@ -11,6 +11,7 @@ #include <xpu/host.h> #include "BuildInfo.h" +#include "Exceptions.h" #include "Options.h" #include "Reco.h" #include "RecoResultsInputArchive.h" @@ -143,12 +144,19 @@ int main(int argc, char** argv) tsIdx++; continue; } - auto result = reco.Run(*ts); - if (archive) { - auto storable = makeStorableRecoResults(*ts, result); - archive->put(storable); + try { + RecoResults result = reco.Run(*ts); + if (archive) { + auto storable = makeStorableRecoResults(*ts, result); + archive->put(storable); + } + } + catch (const ProcessingError& e) { + // TODO: Add flag if we want to abort on exception or continue with next timeslice + L_(error) << "Caught ProcessingError while processing timeslice " << tsIdx << ": " << e.what(); } + tsIdx++; if (num_ts > 0 && tsIdx >= num_ts) break; diff --git a/reco/detectors/sts/CbmRecoSts.cxx b/reco/detectors/sts/CbmRecoSts.cxx index 39d76808a29fcbaddf2fbc974aa1c4d9b26146b5..b0765929160bf145c17ba67f4f7bb5f68b527899 100644 --- a/reco/detectors/sts/CbmRecoSts.cxx +++ b/reco/detectors/sts/CbmRecoSts.cxx @@ -182,7 +182,11 @@ UInt_t CbmRecoSts::CreateModules() .stepSize = float(landauStepSize), }, }; - if (fUseGpuReco) fGpuReco.SetParameters(pars); + cbm::algo::sts::HitfinderChainPars chainPars { + .setup = pars, + .memory = {}, + }; + if (fUseGpuReco) fGpuReco.SetParameters(chainPars); return fModules.size(); }