Compare revisions

79b61e1d · 79b61e1d · 79b61e1d · 79b61e1d · 79b61e1d · 79b61e1d
--- a/algo/base/PartitionedVector.h
+++ b/algo/base/PartitionedVector.h
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_PARTITIONED_VECTOR_H
+#define CBM_ALGO_BASE_PARTITIONED_VECTOR_H
+
+#include "Definitions.h"
+#include "util/PODAllocator.h"
+
+#include <boost/serialization/access.hpp>
+#include <boost/serialization/vector.hpp>
+
+#include <gsl/span>
+#include <vector>
+
+namespace cbm::algo
+{
+  template<typename T>
+  class PartitionedSpan;
+
+  /**
+   * @brief A vector that is partitioned into multiple subvectors.
+   *
+   * @tparam T Type of the elements
+   * @tparam Allocator Allocator for the underlying container
+   *
+   * @note The underlying container is contiguous in memory.
+   */
+  template<typename T, class Allocator = std::allocator<T>>
+  class PartitionedVector {
+
+   public:
+    using Container_t = std::vector<T, Allocator>;  //< Underlying container type
+
+    /**
+     * @brief Default constructor. Creates an empty vector.
+     */
+    PartitionedVector() : fData(), fOffsets({0}), fAdresses() { EnsureDimensions(); }
+
+    /**
+     * @brief Constructor. Creates a vector with n partitions.
+     *
+     * @param data Underlying data. Assusmes that the data is already partitioned and takes ownership of it.
+     * @param sizes Sizes of each partitions
+     * @param addresses Hardware addresses of each partition
+     *
+     * @note Requires sizes.size() == addresses.size()
+     */
+    PartitionedVector(Container_t&& data, gsl::span<const size_t> sizes, gsl::span<const u32> addresses)
+      : fData(std::move(data))
+      , fOffsets()
+      , fAdresses(addresses.begin(), addresses.end())
+    {
+      ComputeOffsets(sizes);
+      EnsureDimensions();
+    }
+
+    /**
+     * @brief Copy constructor. Copy the data from other vector.
+     */
+    template<typename OtherAllocator>
+    PartitionedVector(const PartitionedVector<T, OtherAllocator>& other)
+      : fData(other.Data().begin(), other.Data().end())
+      , fOffsets(other.Offsets())
+      , fAdresses(other.Addresses())
+    {
+      // TODO: this check is overkill? We already know that the dimensions are correct,
+      // since they were already checked in the other vector
+      EnsureDimensions();
+    }
+
+    template<typename U>
+    PartitionedVector(PartitionedSpan<U> other)
+      : fData(other.Data().begin(), other.Data().end())
+      , fOffsets(other.Offsets().begin(), other.Offsets().end())
+      , fAdresses(other.Addresses().begin(), other.Addresses().end())
+    {
+      EnsureDimensions();
+    }
+
+    /**
+     * @brief Access data at partition i.
+     */
+    gsl::span<T> operator[](size_t i)
+    {
+      EnsureBounds(i);
+      return UnsafePartitionSpan(i);
+    }
+
+    /**
+     * @brief Access data at partition i.
+     */
+    gsl::span<const T> operator[](size_t i) const
+    {
+      EnsureBounds(i);
+      return UnsafePartitionSpan(i);
+    }
+
+    /**
+     * @brief Get the hardware address of partition i.
+     */
+    u32 Address(size_t i) const
+    {
+      EnsureBounds(i);
+      return fAdresses[i];
+    }
+
+    /**
+     * @brief Get a pair of the data and the hardware address of partition i.
+     */
+    std::pair<gsl::span<T>, u32> Partition(size_t i)
+    {
+      EnsureBounds(i);
+      return std::pair<gsl::span<T>, u32>(UnsafePartitionSpan(i), fAdresses[i]);
+    }
+
+    /**
+     * @brief Get a pair of the data and the hardware address of partition i.
+     */
+    std::pair<gsl::span<const T>, u32> Partition(size_t i) const
+    {
+      EnsureBounds(i);
+      return std::pair<gsl::span<const T>, u32>(UnsafePartitionSpan(i), fAdresses[i]);
+    }
+
+    /**
+     * @brief Get the number of partitions.
+     */
+    size_t NPartitions() const { return fAdresses.size(); }
+
+    /**
+     * @brief Get the size of partition i.
+     */
+    size_t Size(size_t i) const
+    {
+      EnsureBounds(i);
+      return UnsafeSize(i);
+    }
+
+    /**
+     * @brief Get the total number of elements in the container across all partitions.
+     */
+    size_t NElements() const { return fData.size(); }
+
+    /**
+     * @brief Return total size in bytes of the underlying data.
+     */
+    size_t SizeBytes() const { return fData.size() * sizeof(T); }
+
+    /**
+     * @brief Get the underlying data.
+     */
+    gsl::span<T> Data() { return fData; }
+
+    /**
+     * @brief Get the underlying data.
+     */
+    gsl::span<const T> Data() const { return fData; }
+
+    /**
+     * @brief Get the addresses.
+     */
+    const std::vector<u32>& Addresses() const { return fAdresses; }
+
+    /**
+     * @brief Get the underlying offsets.
+     */
+    const std::vector<size_t>& Offsets() const { return fOffsets; }
+
+   private:
+    Container_t fData;             //< Data
+    std::vector<size_t> fOffsets;  // < Offsets of the partitions in fData
+    std::vector<u32> fAdresses;    //< Hardware addresses of the partitions
+
+    void EnsureDimensions() const
+    {
+      if (fOffsets.size() - 1 != fAdresses.size()) {
+        throw std::runtime_error("PartitionedVector: fOffsets.size() != fAdresses.size()");
+      }
+      if (fOffsets.front() != 0) {
+        throw std::runtime_error("PartitionedVector: fOffsets.front() != 0");
+      }
+      if (fOffsets.back() != fData.size()) {
+        throw std::runtime_error("PartitionedVector: fOffsets.back() != fData.size()");
+      }
+    }
+
+    void EnsureBounds(size_t i) const
+    {
+      if (i >= fAdresses.size()) throw std::out_of_range("PartitionedVector: index out of bounds");
+    }
+
+    void ComputeOffsets(gsl::span<const size_t> sizes)
+    {
+      fOffsets.reserve(sizes.size() + 1);
+      fOffsets.push_back(0);
+      for (auto n : sizes) {
+        fOffsets.push_back(fOffsets.back() + n);
+      }
+    }
+
+    size_t UnsafeSize(size_t i) const { return fOffsets[i + 1] - fOffsets[i]; }
+
+    gsl::span<T> UnsafePartitionSpan(size_t i) { return gsl::span<T>(fData.data() + fOffsets[i], UnsafeSize(i)); }
+
+    gsl::span<const T> UnsafePartitionSpan(size_t i) const
+    {
+      return gsl::span<const T>(fData.data() + fOffsets[i], UnsafeSize(i));
+    }
+
+   private:  // serialization
+    friend class boost::serialization::access;
+
+    template<class Archive>
+    void serialize(Archive& ar, unsigned int /*version*/)
+    {
+      ar& fData;
+      ar& fOffsets;
+      ar& fAdresses;
+    }
+  };
+
+  template<typename T>
+  using PartitionedPODVector = PartitionedVector<T, PODAllocator<T>>;
+
+}  // namespace cbm::algo
+
+#endif
--- a/algo/base/RecoParams.cxx
+++ b/algo/base/RecoParams.cxx
+/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#include "RecoParams.h"
+
+CBM_YAML_INSTANTIATE(cbm::algo::RecoParams);
--- a/algo/base/RecoParams.h
+++ b/algo/base/RecoParams.h
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_RECOPARAMS_H
+#define CBM_ALGO_BASE_RECOPARAMS_H
+
+#include "Definitions.h"
+#include "util/EnumDict.h"
+#include "yaml/Property.h"
+#include "yaml/Yaml.h"
+
+#include <xpu/defines.h>
+
+namespace cbm::algo
+{
+
+  /**
+   * @brief RecoParams contains all parameters to configure reconstruction
+   */
+  struct RecoParams {
+    enum class SortMode : u8
+    {
+      BlockSort        = 0,
+      CUBSegmentedSort = 1,
+    };
+    enum class AllocationMode : u8
+    {
+      Auto,     //< Static on GPU, dynamic on CPU
+      Static,   //< Allocate all buffers beforehand
+      Dynamic,  //< Allocate buffers per timeslice
+    };
+
+    struct STS {
+      SortMode digiSortMode;
+      SortMode clusterSortMode;
+
+      u8 findClustersMultiKernels;
+
+      f32 timeCutDigiAbs;
+      f32 timeCutDigiSig;
+      f32 timeCutClusterAbs;
+      f32 timeCutClusterSig;
+
+      bool doChargeCorrelation;
+      f32 chargeCorrelationDelta;
+
+      struct Memory {
+        AllocationMode allocationMode;
+        u64 maxNDigisPerTS;
+        u64 maxNDigisPerModule;
+        f64 clustersPerDigi;
+        f64 hitsPerCluster;
+
+        u64 NClustersUpperBound(u64 nDigis) const { return nDigis * clustersPerDigi; }
+        u64 NHitsUpperBound(u64 nDigis) const { return NClustersUpperBound(nDigis) * hitsPerCluster; }
+
+        u64 MaxNClustersPerModule() const { return NClustersUpperBound(maxNDigisPerModule); }
+        u64 MaxNHitsPerModule() const { return MaxNClustersPerModule() * hitsPerCluster; }
+
+        bool IsDynamic() const { return allocationMode == RecoParams::AllocationMode::Dynamic; }
+        bool IsStatic() const { return allocationMode == RecoParams::AllocationMode::Static; }
+        bool IsAuto() const { return allocationMode == RecoParams::AllocationMode::Auto; }
+
+        CBM_YAML_PROPERTIES(
+          yaml::Property(&Memory::allocationMode, "allocationMode", "Allocation mode (Auto, Static, Dynamic)"),
+          yaml::Property(&Memory::maxNDigisPerTS, "maxNDigisPerTS", "Maximal number of digis per time slice"),
+          yaml::Property(&Memory::maxNDigisPerModule, "maxNDigisPerModule", "Maximal number of digis per module"),
+          yaml::Property(&Memory::clustersPerDigi, "clustersPerDigi", "Number of clusters per digi in a time slice"),
+          yaml::Property(&Memory::hitsPerCluster, "hitsPerCluster", "Number of hits per cluster in a time slice"));
+      } memory;
+
+      CBM_YAML_PROPERTIES(
+        yaml::Property(&STS::digiSortMode, "digiSortMode",
+                         "Digi sort mode (0 = block sort, 1 = cub segmented sort))"),
+        yaml::Property(&STS::clusterSortMode, "clusterSortMode", "Cluster sort mode"),
+
+        yaml::Property(&STS::findClustersMultiKernels, "findClustersMultiKernels",
+                         "Split cluster finding into multiple kernels"),
+
+        yaml::Property(&STS::timeCutDigiAbs, "timeCutDigiAbs",
+                         "Time delta for neighboring digis to be considered for the same cluster. [ns]"),
+        yaml::Property(
+          &STS::timeCutDigiSig, "timeCutDigiSig",
+          "Used if timeCutDigiAbs is negative. Time delta must be < 'value * sqrt2 * timeResolution'. [ns]"),
+        yaml::Property(&STS::timeCutClusterAbs, "timeCutClusterAbs",
+                         "Maximal time difference between two clusters in a hit [ns]."
+                         " Setting to a positive value will override timeCutClustersSig."),
+        yaml::Property(
+          &STS::timeCutClusterSig, "timeCutClusterSig",
+          "Time cut for clusters."
+          " Two clusters are considered it their time difference is below 'value * sqrt(terr1**2 + terr2*+2)'"),
+
+        yaml::Property(&STS::doChargeCorrelation, "doChargeCorrelation",
+                         "Enable charge correlation between front+back clusters during hit finding"),
+        yaml::Property(&STS::chargeCorrelationDelta, "chargeCorrelationDelta", "Delta in total charge between front and back clusters to be considered for hit finding"),
+        yaml::Property(&STS::memory, "memory", "Memory limits for STS reco"));
+    };
+
+    STS sts;
+
+    CBM_YAML_PROPERTIES(yaml::Property(&RecoParams::sts, "sts", "STS reco settings"));
+  };
+
+}  // namespace cbm::algo
+
+CBM_YAML_EXTERN_DECL(cbm::algo::RecoParams);
+
+CBM_ENUM_DICT(cbm::algo::RecoParams::SortMode,
+  {"BlockSort", RecoParams::SortMode::BlockSort},
+  {"CUBSegmentedSort", RecoParams::SortMode::CUBSegmentedSort}
+);
+
+CBM_ENUM_DICT(cbm::algo::RecoParams::AllocationMode,
+  {"Auto", RecoParams::AllocationMode::Auto},
+  {"Static", RecoParams::AllocationMode::Static},
+  {"Dynamic", RecoParams::AllocationMode::Dynamic}
+);
+
+#endif  // CBM_ALGO_BASE_RECOPARAMS_H
--- a/algo/base/SubChain.h
+++ b/algo/base/SubChain.h
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_SUBCHAIN_H
+#define CBM_ALGO_BASE_SUBCHAIN_H
+
+#include "ChainContext.h"
+
+#include <gsl/pointers>
+
+namespace cbm::algo
+{
+  class SubChain {
+
+   public:
+    const ChainContext* GetContext() { return fContext; }
+
+    void SetContext(const ChainContext* ctx) { fContext = ctx; }
+
+    const Options& Opts() const { return gsl::make_not_null(fContext)->opts; }
+    const RecoParams& Params() const { return gsl::make_not_null(fContext)->recoParams; }
+
+    bool HasMonitor() const { return gsl::make_not_null(fContext)->monitor != nullptr; }
+
+    Monitor& GetMonitor() const
+    {
+      // Need Get-prefix to avoid conflict with Monitor-class name
+      if (!HasMonitor()) throw std::runtime_error("No monitor available");
+      return *gsl::make_not_null(fContext)->monitor;
+    }
+
+
+   private:
+    const ChainContext* fContext = nullptr;
+  };
+}  // namespace cbm::algo
+
+#endif
--- a/algo/base/System.cxx
+++ b/algo/base/System.cxx
+/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+
+#include "System.h"
+
+#include <cstdio>
+
+#ifdef __linux__
+#include <sys/resource.h>
+#include <unistd.h>
+#endif
+
+
+size_t cbm::algo::GetCurrentRSS()
+{
+  // Implementation copied from https://stackoverflow.com/a/14927379
+#ifndef __linux__
+  return 0;
+#else
+  unsigned long rss = 0L;
+  FILE* fp          = nullptr;
+  if ((fp = fopen("/proc/self/statm", "r")) == nullptr) {
+    return size_t(0L); /* Can't open? */
+  }
+  if (fscanf(fp, "%*s%lu", &rss) != 1) {
+    fclose(fp);
+    return size_t(0L); /* Can't read? */
+  }
+  fclose(fp);
+  return size_t(rss) * size_t(sysconf(_SC_PAGESIZE));
+#endif
+}
+
+size_t cbm::algo::GetPeakRSS()
+{
+  // Implementation copied from https://stackoverflow.com/a/14927379
+#ifndef __linux__
+  return 0;
+#else
+  struct rusage rusage;
+  getrusage(RUSAGE_SELF, &rusage);
+
+  return size_t(rusage.ru_maxrss * 1024L);
+#endif
+}
--- a/algo/base/System.h
+++ b/algo/base/System.h
+/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+
+#pragma once
+
+#include <cstddef>
+
+/**
+ * @file System.h
+ * @brief System functions
+**/
+
+namespace cbm::algo
+{
+
+  /**
+   * @brief Get the current resident set size (pyhysical memory usage) of the process
+   * @return The current resident set size in bytes
+   * @note Returns zero if the value cannot be determined
+  **/
+  size_t GetCurrentRSS();
+
+  /**
+   * @brief Get the peak resident set size (pyhysical memory usage) of the process
+   * @return The peak resident set size in bytes
+   * @note Returns zero if the value cannot be determined
+  **/
+  size_t GetPeakRSS();
+
+}  // namespace cbm::algo
--- a/algo/base/compat/Algorithm.cxx
+++ b/algo/base/compat/Algorithm.cxx
+/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+
+#include "Algorithm.h"
+
+
+task_thread_pool::task_thread_pool& cbm::algo::GetGlobalSTLThreadPool()
+{
+  static task_thread_pool::task_thread_pool pool;
+  return pool;
+}
--- a/algo/base/compat/Algorithm.h
+++ b/algo/base/compat/Algorithm.h
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_COMPAT_ALGORITHMS_H
+#define CBM_ALGO_BASE_COMPAT_ALGORITHMS_H
+
+/**
+ * @file Algorithms.h
+ * @brief This file contains compatibility wrappers for parallel stl algorithms.
+ *
+ * The parallel algorithms are only available if the compiler supports C++17. Some older
+ * compilers don't ship with the parallel algorithms, so this wrapper falls back to
+ * sequential algorithms in that case.
+ * Also gcc requires the TBB library to be installed to use the parallel algorithms.
+ * If TBB is not available, we also falls back to sequential algorithms.
+**/
+
+#include "BuildInfo.h"
+
+#include <algorithm>
+#include <poolstl/algorithm>
+#include <poolstl/execution>
+
+#ifdef HAVE_PARALLEL_ALGORITHM
+#include <execution>
+#endif
+
+namespace cbm::algo
+{
+
+  /**
+   * @brief Get the global thread pool for parallel stl algorithms
+   * @details This function returns a reference to the global thread pool used by the parallel stl algorithms.
+   * At the beginning it's initialized with the number of available threads. Otherwise this function should only be
+   * used in conjunction with the parallel stl algorithms via poolstl.
+  **/
+  task_thread_pool::task_thread_pool& GetGlobalSTLThreadPool();
+
+  /**
+   * @brief Wrapper for std::sort
+   *
+   * Attempts to use the parallel version of std::sort if available. Falls back to the sequential version otherwise.
+   * Parallel version currently requires Linux, GCC compiler and libTBB.
+  */
+  template<typename It, typename Compare>
+  void Sort(It first, It last, Compare comp)
+  {
+// Disable parallel sorting for the moment
+// The underlying implementation in libTBB has a massive memory leak:
+// https://community.intel.com/t5/Intel-oneAPI-Threading-Building/std-sort-std-execution-par-unseq-has-a-memory-leak-on-Linux/m-p/1580910
+//
+// Update 2024-05-02: Add poolstl as a replacement for libTBB
+#if 0
+// #ifdef HAVE_PARALLEL_STL_LIBTBB
+    std::sort(std::execution::par_unseq, first, last, comp);
+#elif defined(HAVE_PARALLEL_STL_POOLSTL)
+    std::sort(poolstl::par.on(GetGlobalSTLThreadPool()), first, last, comp);
+#else
+    std::sort(first, last, comp);
+#endif
+  }
+}  // namespace cbm::algo
+
+
+#endif
--- a/algo/base/compat/Filesystem.h
+++ b/algo/base/compat/Filesystem.h
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_FILESYSTEM_H
+#define CBM_ALGO_BASE_FILESYSTEM_H
+
+#include <boost/filesystem.hpp>
+
+namespace cbm::algo
+{
+
+  // Use boost::filesystem by default instead of std::filesystem for
+  // compatibility with older compilers and ROOT versions
+  namespace fs = boost::filesystem;
+
+}  // namespace cbm::algo
+
+#endif  // CBM_ALGO_BASE_FILESYSTEM_H
--- a/algo/base/compat/OpenMP.h
+++ b/algo/base/compat/OpenMP.h
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_COMPAT_OPENMP_H
+#define CBM_ALGO_BASE_COMPAT_OPENMP_H
+
+#include "BuildInfo.h"
+
+#ifdef HAVE_OMP
+#include <omp.h>
+#endif
+
+#define CBM_PRAGMA(...) _Pragma(#__VA_ARGS__)
+
+// OpenMP parallel for
+// If OpenMP is not available, this macro expands to nothing
+//
+// Hiding the pragma in a macro isn't technically necessary, as the compiler will ignore it if OpenMP is not available.
+// But it slightly increases readability as it's indented to the same level as the code it applies to.
+//
+// Accepts the same arguments as the OpenMP parallel for pragma.
+#ifdef HAVE_OMP
+#define CBM_PARALLEL_FOR(...) CBM_PRAGMA(omp parallel for __VA_ARGS__)
+#else
+#define CBM_PARALLEL_FOR(...)
+#endif
+
+// OpenMP parallel
+#ifdef HAVE_OMP
+#define CBM_PARALLEL(...) CBM_PRAGMA(omp parallel __VA_ARGS__)
+#else
+#define CBM_PARALLEL(...)
+#endif
+
+// generic omp pragma for other commands
+#ifdef HAVE_OMP
+#define CBM_OMP(...) CBM_PRAGMA(omp __VA_ARGS__)
+#else
+#define CBM_OMP(...)
+#endif
+
+namespace cbm::algo::openmp
+{
+
+#ifndef HAVE_OMP
+  inline int GetMaxThreads() { return 1; }
+  inline int GetThreadNum() { return 0; }
+  inline int GetNumThreads() { return 1; }
+  inline void SetNumThreads(int) {}
+#else
+  inline int GetMaxThreads() { return omp_get_max_threads(); }
+  inline int GetThreadNum() { return omp_get_thread_num(); }
+  inline int GetNumThreads() { return omp_get_num_threads(); }
+  inline void SetNumThreads(int n) { omp_set_num_threads(n); }
+#endif
+
+}  // namespace cbm::algo::openmp
+
+#endif  // CBM_ALGO_BASE_COMPAT_OPENMP_H
--- a/algo/base/compat/RTypes.h
+++ b/algo/base/compat/RTypes.h
+/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#pragma once
+
+/**
+ * @file RTypes.h
+ * @brief Compatibility header for basic ROOT macros.
+**/
+
+#if __has_include(<Rtypes.h>)
+#include <Rtypes.h>
+#else
+#define BIT(n) (1ULL << (n))
+#define SETBIT(n, i) ((n) |= BIT(i))
+#define CLRBIT(n, i) ((n) &= ~BIT(i))
+#define TESTBIT(n, i) ((bool) (((n) &BIT(i)) != 0))
+#endif
--- a/algo/base/gpu/DeviceImage.cxx
+++ b/algo/base/gpu/DeviceImage.cxx
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#include "DeviceImage.h"
+
+XPU_IMAGE(cbm::algo::GPUReco);
--- a/algo/base/gpu/DeviceImage.h
+++ b/algo/base/gpu/DeviceImage.h
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_GPU_DEVICEIMAGE_H
+#define CBM_ALGO_BASE_GPU_DEVICEIMAGE_H
+#include <xpu/device.h>
+
+namespace cbm::algo
+{
+  struct GPUReco : xpu::device_image {
+  };
+}  // namespace cbm::algo
+
+#endif
--- a/algo/base/gpu/PaddedValue.h
+++ b/algo/base/gpu/PaddedValue.h
+/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+
+#pragma once
+
+#include <cstddef>
+
+#include <xpu/defines.h>
+
+/**
+ * @file PaddedValue.h
+ * @brief This file contains the definition of the PaddedValue class.
+ */
+
+namespace cbm::algo
+{
+
+  /**
+   * @brief A class that represents a value with padding to a certain size.
+   * @tparam T The type of the value.
+   * @tparam N Number of bytes the value should be padded to.
+   *
+   * @note This class is useful for aligning values to a certain size, e.g. to ensure that atomic counters are spread across different cache lines. (Prevent false sharing)
+   */
+  template<typename T, size_t N>
+  class PaddedValue {
+    static_assert(N % alignof(T) == 0, "N must be a multiple of alignof(T)");
+
+   public:
+    XPU_D PaddedValue() = default;
+    XPU_D PaddedValue(const T& value) : fValue(value) {}
+
+    XPU_D PaddedValue(const PaddedValue& other) : fValue(other.fValue) {}
+    XPU_D PaddedValue& operator=(const PaddedValue& other)
+    {
+      fValue = other.fValue;
+      return *this;
+    }
+
+    XPU_D PaddedValue(PaddedValue&& other) : fValue(std::move(other.fValue)) {}
+    XPU_D PaddedValue& operator=(PaddedValue&& other)
+    {
+      fValue = std::move(other.fValue);
+      return *this;
+    }
+
+    XPU_D T& operator=(const T& value)
+    {
+      fValue = value;
+      return fValue;
+    }
+
+    XPU_D T& Get() { return fValue; }
+    XPU_D const T& Get() const { return fValue; }
+
+    XPU_D T* operator&() { return &fValue; }
+    XPU_D const T* operator&() const { return &fValue; }
+
+    XPU_D T& operator*() { return fValue; }
+    XPU_D const T& operator*() const { return fValue; }
+
+    XPU_D operator T&() { return fValue; }
+    XPU_D operator const T&() const { return fValue; }
+
+    XPU_D operator T*() { return &fValue; }
+    XPU_D operator const T*() const { return &fValue; }
+
+    XPU_D T* operator->() { return &fValue; }
+    XPU_D const T* operator->() const { return &fValue; }
+
+   private:
+    T fValue;
+    unsigned char fPadding[N - sizeof(T)];
+  };
+
+  inline constexpr size_t SizeOfCacheLine = 64;
+
+  template<typename T>
+  using PaddedToCacheLine = PaddedValue<T, SizeOfCacheLine>;
+
+}  // namespace cbm::algo
--- a/algo/base/gpu/Params.cxx
+++ b/algo/base/gpu/Params.cxx
+/* Copyright (C) 2022 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer]*/
+#include "Params.h"
+
+XPU_EXPORT(cbm::algo::Params);
--- a/algo/base/gpu/Params.h
+++ b/algo/base/gpu/Params.h
+/* Copyright (C) 2022 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_GPU_CONFIG_H
+#define CBM_ALGO_GPU_CONFIG_H
+
+#include "DeviceImage.h"
+#include "RecoParams.h"
+
+#include <xpu/device.h>
+
+namespace cbm::algo
+{
+  struct Params : xpu::constant<GPUReco, RecoParams> {
+  };
+}  // namespace cbm::algo
+
+#endif
--- a/algo/base/util/EnumDict.cxx
+++ b/algo/base/util/EnumDict.cxx
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#include "EnumDict.h"
+
+#include <sstream>
+
+void cbm::algo::detail::RaiseUnknownEntry(std::string_view str, const std::vector<std::string_view>& validEntries)
+{
+  std::ostringstream oss;
+  oss << "Could not parse '" << str << "'. Valid entries are: ";
+
+  for (size_t i = 0; i < validEntries.size(); ++i) {
+    oss << validEntries[i];
+    if (i != validEntries.size() - 1) {
+      oss << ", ";
+    }
+  }
+  throw std::invalid_argument(oss.str());
+}
--- a/algo/base/util/EnumDict.h
+++ b/algo/base/util/EnumDict.h
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_UTIL_SERIALIZABLEENUM_H
+#define CBM_ALGO_BASE_UTIL_SERIALIZABLEENUM_H
+
+#include <boost/algorithm/string/predicate.hpp>
+
+#include <algorithm>
+#include <iosfwd>
+#include <optional>
+#include <stdexcept>
+#include <string_view>
+#include <vector>
+
+#include <fmt/format.h>
+#include <xpu/defines.h>
+
+namespace cbm::algo
+{
+  namespace detail
+  {
+    template<typename T>
+    using EnumDict_t = std::vector<std::pair<std::string_view, T>>;
+
+    template<typename T>
+    inline const EnumDict_t<T> EnumDict;
+
+    template<typename T>
+    struct EnumHasDict : std::false_type {
+    };
+
+    template<typename T>
+    inline constexpr bool EnumHasDict_v = EnumHasDict<T>::value;
+
+    template<typename T, typename = std::enable_if_t<detail::EnumHasDict_v<T>>>
+    std::vector<std::string_view> ValidEntries()
+    {
+      std::vector<std::string_view> entries;
+      for (const auto& pair : EnumDict<T>) {
+        entries.push_back(pair.first);
+      }
+      return entries;
+    }
+
+    void RaiseUnknownEntry(std::string_view str, const std::vector<std::string_view>& validEntries);
+  }  // namespace detail
+
+  template<typename T, typename = std::enable_if_t<detail::EnumHasDict_v<T>>>
+  std::optional<T> FromString(std::string_view str, bool caseSensitive = false)
+  {
+    const auto& dict = detail::EnumDict<T>;
+    auto it          = std::find_if(dict.begin(), dict.end(), [&](const auto& pair) {
+      if (caseSensitive)
+        return pair.first == str;
+      else
+        return boost::iequals(pair.first, str);
+    });
+    if (it == dict.end()) return std::nullopt;
+    return it->second;
+  }
+
+  template<typename T, typename = std::enable_if_t<detail::EnumHasDict_v<T>>>
+  std::string_view ToString(T t)
+  {
+    const auto& dict = detail::EnumDict<T>;
+    auto it          = std::find_if(dict.begin(), dict.end(), [t](const auto& pair) { return pair.second == t; });
+    if (it == dict.end()) throw std::runtime_error(fmt::format("Entry {} for enum missing!", static_cast<int>(t)));
+    return it->first;
+  }
+}  // namespace cbm::algo
+
+#if XPU_IS_CPU
+/**
+   * @brief Convert enums to strings and back.
+   *
+   * @param type The enum type.
+   *
+   * Example:
+   * @code{.cpp}
+   * enum class Detector {
+   *  STS,
+   *  TOF,
+   * };
+   *
+   * CBM_ENUM_DICT(Detector,
+   *  {"sts", Detector::STS},
+   *  {"tof", Detector::TOF}
+   * );
+   *
+   * // Use it like this:
+   * L_(info) << ToString(Detector::STS); // Prints "sts"
+   *
+   * std::optional<Detector> d = FromString<Detector>("tof"); // *d == Detector::TOF
+   * std::optional<Detector> d2 = FromString<Detector>("invalid"); // d2 == std::nullopt
+   * @endcode
+   */
+#define CBM_ENUM_DICT(type, ...)                                                                                       \
+  template<>                                                                                                           \
+  inline const cbm::algo::detail::EnumDict_t<type> cbm::algo::detail::EnumDict<type> = {__VA_ARGS__};                  \
+  template<>                                                                                                           \
+  struct cbm::algo::detail::EnumHasDict<type> : std::true_type {                                                       \
+  }
+#else  // XPU_IS_CPU
+// Disable macro in GPU code, causes some issues with nvcc
+#define CBM_ENUM_DICT(type, ...)
+#endif  // XPU_IS_CPU
+
+// Stream operators for enums
+// Placed in global namespace to be found by ADL e.g. for std::ostream_iterator
+namespace std
+{
+  template<typename T, typename = std::enable_if_t<cbm::algo::detail::EnumHasDict_v<T>>>
+  std::ostream& operator<<(std::ostream& os, T t)
+  {
+    os << cbm::algo::ToString(t);
+    return os;
+  }
+
+  template<typename T, typename = std::enable_if_t<cbm::algo::detail::EnumHasDict_v<T>>>
+  std::istream& operator>>(std::istream& is, T& t)
+  {
+    std::string str;
+    is >> str;
+    auto maybet = cbm::algo::FromString<T>(str);
+
+    if (!maybet) {
+      cbm::algo::detail::RaiseUnknownEntry(str, cbm::algo::detail::ValidEntries<T>());
+    }
+    t = *maybet;
+
+    return is;
+  }
+}  // namespace std
+
+#endif  //CBM_ALGO_BASE_UTIL_SERIALIZABLEENUM_H
--- a/algo/base/util/MemoryLogger.cxx
+++ b/algo/base/util/MemoryLogger.cxx
+/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+
+#include "MemoryLogger.h"
+
+#include "AlgoFairloggerCompat.h"
+#include "System.h"
+
+using namespace cbm::algo;
+
+template<typename T>
+T MemoryLogger::BytesToMB(T bytes) const
+{
+  return bytes / (1024 * 1024);
+}
+
+void MemoryLogger::Log()
+{
+  size_t currentRSS = GetCurrentRSS();
+  size_t peakRSS    = GetPeakRSS();
+
+  ptrdiff_t deltaRSS = currentRSS - mLastRSS;
+  float deltaPercent = 100.0f * deltaRSS / currentRSS;
+
+  L_(debug) << "Current memory usage: " << BytesToMB(currentRSS) << "MB (delta  " << BytesToMB(deltaRSS) << "MB / "
+            << deltaPercent << "%)"
+            << ", peak: " << BytesToMB(peakRSS) << "MB";
+
+  mLastRSS = currentRSS;
+}
--- a/algo/base/util/MemoryLogger.h
+++ b/algo/base/util/MemoryLogger.h
+/* Copyright (C) 2024 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+
+#pragma once
+
+#include <cstddef>
+
+/**
+ * @file MemoryLogger.h
+ * @brief Memory logging
+**/
+
+namespace cbm::algo
+{
+
+  /**
+   * @brief Track the memory usage of the process and write it to the log
+  **/
+  class MemoryLogger {
+
+   public:
+    /**
+     * @brief Constructor
+    **/
+    MemoryLogger() = default;
+
+    /**
+     * @brief Destructor
+    **/
+    ~MemoryLogger() = default;
+
+    /**
+     * @brief Log the current memory usage
+    **/
+    void Log();
+
+   private:
+    size_t mLastRSS = 0;
+
+    // Convert bytes to MB
+    // Template to allow for different integer types
+    template<typename T>
+    T BytesToMB(T bytes) const;
+  };
+
+}  // namespace cbm::algo
No results found