diff --git a/algo/CMakeLists.txt b/algo/CMakeLists.txt
index b81382879e529fdbb0e967435abfae3db1429ec0..f7e03a8eede7ebc43055a16f1219ca2a113096fc 100644
--- a/algo/CMakeLists.txt
+++ b/algo/CMakeLists.txt
@@ -203,10 +203,12 @@ install(
   FILES
     base/ChainContext.h
     base/Definitions.h
+    base/HistogramSender.h
     base/Options.h
     base/RecoParams.h
     base/SubChain.h
     base/PartitionedVector.h
+    base/PartitionedSpan.h
     global/Reco.h
     global/RecoResults.h
     global/RecoResultsInputArchive.h
diff --git a/algo/base/Exceptions.h b/algo/base/Exceptions.h
new file mode 100644
index 0000000000000000000000000000000000000000..002795e1ab4b834934caef81fa15adf53cbdfb43
--- /dev/null
+++ b/algo/base/Exceptions.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_EXCEPTIONS_H
+#define CBM_ALGO_BASE_EXCEPTIONS_H
+
+#include <exception>
+
+#include <fmt/format.h>
+
+namespace cbm::algo
+{
+  /**
+   * @brief Base class for exceptions
+   */
+  class Exception : public std::runtime_error {
+  public:
+    template<typename... Args>
+    Exception(const char* fmt, Args&&... args) : std::runtime_error(fmt::format(fmt, std::forward<Args>(args)...))
+    {
+    }
+  };
+
+  /**
+   * @brief Indicates an unrecoverable error. Should tear down the process.
+   */
+  class FatalError : public Exception {
+    using Exception::Exception;
+  };
+
+  /**
+   * Indicates an error during timeslice processing. Timeslice will be discarded.
+   * Processing can continue with new timeslice.
+   */
+  class ProcessingError : public Exception {
+    using Exception::Exception;
+  };
+
+}  // namespace cbm::algo
+
+#endif
diff --git a/algo/base/HistogramSender.h b/algo/base/HistogramSender.h
new file mode 100644
index 0000000000000000000000000000000000000000..e4c13d192f110e58e69f3ed9a36cd0d696885928
--- /dev/null
+++ b/algo/base/HistogramSender.h
@@ -0,0 +1,64 @@
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_HISTOGRAM_SENDER_H
+#define CBM_ALGO_BASE_HISTOGRAM_SENDER_H
+
+#include <boost/archive/binary_oarchive.hpp>
+#include <boost/iostreams/device/array.hpp>
+#include <boost/iostreams/device/back_inserter.hpp>
+#include <boost/iostreams/stream.hpp>
+#include <boost/serialization/utility.hpp>
+#include <boost/serialization/vector.hpp>
+
+#include <string>
+#include <string_view>
+#include <zmq.hpp>
+
+namespace cbm::algo
+{
+
+  class HistogramSender {
+  public:
+    HistogramSender(std::string_view address)
+      : fHistComChan(address)
+      , fZmqContext(1)
+      , fZmqSocket(fZmqContext, zmq::socket_type::push)
+    {
+      fZmqSocket.connect(fHistComChan);  // This side "connects" to socket => Other side should have "bind"!!!!
+    }
+
+    /** @brief Serialize object and send it to the histogram server
+       ** @param obj: object to be serialized in the message, e.g. config pairs of strings or Histo1D
+       ** @param flags: or'ed values from zmq::send_flags, typ. zmq::send_flags::sndmore to indicate multi-parts message
+       **/
+    template<typename Object>
+    void PrepareAndSendMsg(const Object& obj, zmq::send_flags flags)
+    {
+      /// Needed ressources (serializd string, boost inserter, boost stream, boost binary output archive)
+      namespace b_io = boost::iostreams;
+      namespace b_ar = boost::archive;
+
+      std::string serial_str;
+      b_io::back_insert_device<std::string> inserter(serial_str);
+      b_io::stream<b_io::back_insert_device<std::string>> bstream(inserter);
+      b_ar::binary_oarchive oa(bstream);
+
+      serial_str.clear();
+      oa << obj;
+      bstream.flush();
+
+      zmq::message_t msg(serial_str.size());
+      std::copy_n(static_cast<const char*>(serial_str.data()), msg.size(), static_cast<char*>(msg.data()));
+      fZmqSocket.send(msg, flags);
+    }
+
+  private:
+    std::string fHistComChan = "tcp://127.0.0.1:56800";
+    zmq::context_t fZmqContext;  ///< ZMQ context FIXME: should be only one context per binary!
+    zmq::socket_t fZmqSocket;    ///< ZMQ socket to histogram server
+  };
+
+}  // namespace cbm::algo
+
+#endif
diff --git a/algo/base/Options.cxx b/algo/base/Options.cxx
index e743e92ab61d7f72c64cfc5ac5fa1ec6237a9ae6..3f3ff92adf2049bd476e74a77e0306c5070e5420 100644
--- a/algo/base/Options.cxx
+++ b/algo/base/Options.cxx
@@ -69,6 +69,7 @@ Options::Options(int argc, char** argv)
       "set log level (debug, info, warning, error, fatal)")
     ("monitor,m", po::value(&fMonitorUri)->value_name("<uri>")->implicit_value("file:cout"),
       "URI specifying monitor output (e.g. file:/tmp/monitor.txt, influx1:login:8086:cbmreco_status). Prints to cout when no argument is given. Monitor is disabled when flag is not set.")
+    ("histogram", po::value(&fHistogramUri)->value_name("<uri>"), "URI to specify histogram server")
     ("log-file,L", po::value(&fLogFile)->value_name("<file>"),
       "write log messages to file")
     ("output-types,O", po::value(&fOutputTypes)->multitoken()->default_value({RecoData::Hit})->value_name("<types>"),
diff --git a/algo/base/Options.h b/algo/base/Options.h
index 064cc6bd3949246a140bbd3957ec4f51ef3a9dc3..d1a83d93d6e72708207d57aa3161dc53dcba206b 100644
--- a/algo/base/Options.h
+++ b/algo/base/Options.h
@@ -28,6 +28,7 @@ namespace cbm::algo
     fs::path LogFile() const { return fLogFile; }
     const std::string& Device() const { return fDevice; }
     const std::string& MonitorUri() const { return fMonitorUri; }
+    const std::string& HistogramUri() const { return fHistogramUri; }
     bool CollectKernelTimes() const { return fCollectKernelTimes; }
     int NumTimeslices() const { return fNumTimeslices; }
     int SkipTimeslices() const { return fSkipTimeslices; }
@@ -64,6 +65,7 @@ namespace cbm::algo
     std::string fLogFile;
     std::string fDevice;
     std::string fMonitorUri;
+    std::string fHistogramUri;
     bool fDumpArchive        = false;
     bool fCollectKernelTimes = false;
     int fNumTimeslices       = -1;
diff --git a/algo/base/PartitionedSpan.h b/algo/base/PartitionedSpan.h
new file mode 100644
index 0000000000000000000000000000000000000000..988cc6f2daba2dedbb37730f7951cffdd23af62f
--- /dev/null
+++ b/algo/base/PartitionedSpan.h
@@ -0,0 +1,184 @@
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#ifndef CBM_ALGO_BASE_PARTITIONED_SPAN_H
+#define CBM_ALGO_BASE_PARTITIONED_SPAN_H
+
+#include <array>
+#include <gsl/span>
+#include <stdexcept>
+#include <vector>
+
+#include "Definitions.h"
+
+namespace cbm::algo
+{
+
+  template<typename T, typename Allocator>
+  class PartitionedVector;
+
+  namespace detail
+  {
+    template<typename U, typename T>
+    using EnableOnConst = std::enable_if_t<std::is_const_v<T> && std::is_same_v<U, std::remove_cv_t<T>>>;
+
+    template<typename U, typename T>
+    using EnableOnNonConst = std::enable_if_t<!std::is_const_v<T> && std::is_same_v<U, std::remove_cv_t<T>>>;
+  }  // namespace detail
+
+  template<typename T>
+  class PartitionedSpan {
+
+  public:
+    PartitionedSpan() : fData(), fOffsets(NullOffset), fAdresses() { EnsureDimensions(); }
+
+    // Intellisense and clang workaround, fails on template deduction with stl containers for some reason
+    // #if  defined(__INTELLISENSE__) || defined(__clang__)
+    template<typename Allocator>
+    PartitionedSpan(std::vector<T, Allocator>& container, gsl::span<const size_t> offsets,
+                    gsl::span<const u32> addresses)
+      : fData(container)
+      , fOffsets(offsets)
+      , fAdresses(addresses)
+    {
+      EnsureDimensions();
+    }
+
+    // FIXME disable if T is non-const via SFINAE, otherwise get misleading compiler errors
+    template<typename Allocator>
+    PartitionedSpan(const std::vector<T, Allocator>& container, gsl::span<const size_t> offsets,
+                    gsl::span<const u32> addresses)
+      : fData(container)
+      , fOffsets(offsets)
+      , fAdresses(addresses)
+    {
+      EnsureDimensions();
+    }
+
+    template<size_t N>
+    PartitionedSpan(std::array<T, N>& container, gsl::span<const size_t> offsets, gsl::span<const u32> addresses)
+      : fData(container)
+      , fOffsets(offsets)
+      , fAdresses(addresses)
+    {
+      EnsureDimensions();
+    }
+
+    // FIXME disable if T is non-const via SFINAE
+    template<size_t N>
+    PartitionedSpan(const std::array<T, N>& container, gsl::span<const size_t> offsets, gsl::span<const u32> addresses)
+      : fData(container)
+      , fOffsets(offsets)
+      , fAdresses(addresses)
+    {
+      EnsureDimensions();
+    }
+    // #endif
+
+    PartitionedSpan(gsl::span<T> data, gsl::span<const size_t> offsets, gsl::span<const u32> addresses)
+      : fData(data)
+      , fOffsets(offsets)
+      , fAdresses(addresses)
+    {
+      EnsureDimensions();
+    }
+
+    template<typename U, typename Allocator, typename = detail::EnableOnConst<U, T>>
+    PartitionedSpan(const PartitionedVector<U, Allocator>& container)
+      : fData(container.Data())
+      , fOffsets(container.Offsets())
+      , fAdresses(container.Addresses())
+    {
+      EnsureDimensions();
+    }
+
+    template<typename U, typename Allocator, typename = detail::EnableOnNonConst<U, T>>
+    PartitionedSpan(PartitionedVector<U, Allocator>& container)
+      : fData(container.Data())
+      , fOffsets(container.Offsets())
+      , fAdresses(container.Addresses())
+    {
+      EnsureDimensions();
+    }
+
+    gsl::span<T> operator[](size_t i) const
+    {
+      EnsureBounds(i);
+      return UnsafePartitionSpan(i);
+    }
+
+    u32 Address(size_t i) const
+    {
+      EnsureBounds(i);
+      return fAdresses[i];
+    }
+
+    std::pair<gsl::span<T>, u32> Partition(size_t i) const
+    {
+      EnsureBounds(i);
+      return std::pair<gsl::span<T>, u32>(UnsafePartitionSpan(i), fAdresses[i]);
+    }
+
+    size_t NPartitions() const { return fAdresses.size(); }
+
+    size_t Size(size_t i) const
+    {
+      EnsureBounds(i);
+      return UnsafeSize(i);
+    }
+
+    size_t NElements() const { return fData.size(); }
+
+    gsl::span<T> Data() const { return fData; }
+
+    gsl::span<const u32> Addresses() const { return fAdresses; }
+
+    gsl::span<const size_t> Offsets() const { return fOffsets; }
+
+  private:
+    // Required for default constructor, don't use std::array to avoid additional dependency
+    static constexpr size_t NullOffset[1] = {0};
+
+    gsl::span<T> fData;
+    gsl::span<const size_t> fOffsets;
+    gsl::span<const u32> fAdresses;
+
+    // FIXME code duplication with PartitionedVector
+
+    void EnsureDimensions() const
+    {
+      if (fOffsets.size() - 1 != fAdresses.size()) {
+        throw std::runtime_error("PartitionedSpan: fOffsets.size() != fAdresses.size()");
+      }
+      if (fOffsets.front() != 0) throw std::runtime_error("PartitionedSpan: fOffsets.front() != 0");
+      if (fOffsets.back() != fData.size()) {
+        throw std::runtime_error("PartitionedSpan: fOffsets.back() != fData.size()");
+      }
+    }
+
+    void EnsureBounds(size_t i) const
+    {
+      if (i >= fAdresses.size()) throw std::out_of_range("PartitionedSpan: index out of bounds");
+    }
+
+    size_t UnsafeSize(size_t i) const { return fOffsets[i + 1] - fOffsets[i]; }
+
+    gsl::span<T> UnsafePartitionSpan(size_t i) const { return fData.subspan(fOffsets[i], UnsafeSize(i)); }
+  };
+
+  // template auto deduction
+  template<typename T, template<typename> class Container>
+  PartitionedSpan(Container<T>&, gsl::span<const size_t>, gsl::span<const u32>) -> PartitionedSpan<T>;
+
+  template<typename T, template<typename> class Container>
+  PartitionedSpan(const Container<T>&, gsl::span<const size_t>, gsl::span<const u32>) -> PartitionedSpan<const T>;
+
+  template<typename T, typename Allocator>
+  PartitionedSpan(PartitionedVector<T, Allocator>&) -> PartitionedSpan<T>;
+
+  template<typename T, typename Allocator>
+  PartitionedSpan(const PartitionedVector<T, Allocator>&) -> PartitionedSpan<const T>;
+
+}  // namespace cbm::algo
+
+#endif
diff --git a/algo/base/PartitionedVector.h b/algo/base/PartitionedVector.h
index 008b65d738887f207381e23362d0bcf65876c4c7..71d812eea81555122783f2ecb54386ee21a96b1f 100644
--- a/algo/base/PartitionedVector.h
+++ b/algo/base/PartitionedVector.h
@@ -15,6 +15,8 @@
 
 namespace cbm::algo
 {
+  template<typename T>
+  class PartitionedSpan;
 
   /**
    * @brief A vector that is partitioned into multiple subvectors.
@@ -33,11 +35,7 @@ namespace cbm::algo
     /**
      * @brief Default constructor. Creates an empty vector.
      */
-    PartitionedVector()
-    {
-      fOffsets.push_back(0);
-      EnsureDimensions();
-    }
+    PartitionedVector() : fData(), fOffsets({0}), fAdresses() { EnsureDimensions(); }
 
     /**
      * @brief Constructor. Creates a vector with n partitions.
@@ -71,6 +69,15 @@ namespace cbm::algo
       EnsureDimensions();
     }
 
+    template<typename U>
+    PartitionedVector(PartitionedSpan<U> other)
+      : fData(other.Data().begin(), other.Data().end())
+      , fOffsets(other.Offsets().begin(), other.Offsets().end())
+      , fAdresses(other.Addresses().begin(), other.Addresses().end())
+    {
+      EnsureDimensions();
+    }
+
     /**
      * @brief Access data at partition i.
      */
@@ -119,7 +126,7 @@ namespace cbm::algo
     /**
      * @brief Get the number of partitions.
      */
-    size_t NPartitions() const { return fOffsets.size() - 1; }
+    size_t NPartitions() const { return fAdresses.size(); }
 
     /**
      * @brief Get the size of partition i.
@@ -138,7 +145,12 @@ namespace cbm::algo
     /**
      * @brief Get the underlying data.
      */
-    const Container_t& Data() const { return fData; }
+    gsl::span<T> Data() { return fData; }
+
+    /**
+     * @brief Get the underlying data.
+     */
+    gsl::span<const T> Data() const { return fData; }
 
     /**
      * @brief Get the addresses.
@@ -160,6 +172,7 @@ namespace cbm::algo
       if (fOffsets.size() - 1 != fAdresses.size()) {
         throw std::runtime_error("PartitionedVector: fOffsets.size() != fAdresses.size()");
       }
+      if (fOffsets.front() != 0) { throw std::runtime_error("PartitionedVector: fOffsets.front() != 0"); }
       if (fOffsets.back() != fData.size()) {
         throw std::runtime_error("PartitionedVector: fOffsets.back() != fData.size()");
       }
@@ -167,7 +180,7 @@ namespace cbm::algo
 
     void EnsureBounds(size_t i) const
     {
-      if (i >= fOffsets.size() - 1) throw std::out_of_range("PartitionedVector: index out of bounds");
+      if (i >= fAdresses.size()) throw std::out_of_range("PartitionedVector: index out of bounds");
     }
 
     void ComputeOffsets(gsl::span<const size_t> sizes)
diff --git a/algo/base/RecoParams.h b/algo/base/RecoParams.h
index 45c845a50e55b97792bb0a4be07ba92e0c451dc1..e459950ee67125b544dfa7781eebb19488f25bda 100644
--- a/algo/base/RecoParams.h
+++ b/algo/base/RecoParams.h
@@ -27,6 +27,12 @@ namespace cbm::algo
       CPU,
       XPU,
     };
+    enum class AllocationMode : u8
+    {
+      Auto,     //< Static on GPU, dynamic on CPU
+      Static,   //< Allocate all buffers beforehand
+      Dynamic,  //< Allocate buffers per timeslice
+    };
 
     struct STS {
       SortMode digiSortMode;
@@ -40,33 +46,30 @@ namespace cbm::algo
       f32 timeCutClusterAbs;
       f32 timeCutClusterSig;
 
-      struct MemoryLimits {
-        u64 maxDigisPerTS;
-        u64 maxDigisPerMS;
-        u64 maxDigisPerModule;
-        f64 clustersPerDigiTS;
-        f64 clustersPerDigiModule;
-        f64 hitsPerClusterTS;
-        f64 hitsPerClusterModule;
+      struct Memory {
+        AllocationMode allocationMode;
+        u64 maxNDigisPerTS;
+        u64 maxNDigisPerModule;
+        f64 clustersPerDigi;
+        f64 hitsPerCluster;
+
+        u64 NClustersUpperBound(u64 nDigis) const { return nDigis * clustersPerDigi; }
+        u64 NHitsUpperBound(u64 nDigis) const { return NClustersUpperBound(nDigis) * hitsPerCluster; }
 
-        XPU_D u64 maxClustersPerTS() const { return maxDigisPerTS * clustersPerDigiTS; }
-        XPU_D u64 maxClustersPerModule() const { return maxDigisPerModule * clustersPerDigiModule; }
-        XPU_D u64 maxHitsPerClusterTS() const { return maxClustersPerTS() * hitsPerClusterTS; }
-        XPU_D u64 maxHitsPerClusterModule() const { return maxClustersPerModule() * hitsPerClusterModule; }
+        u64 MaxNClustersPerModule() const { return NClustersUpperBound(maxNDigisPerModule); }
+        u64 MaxNHitsPerModule() const { return MaxNClustersPerModule() * hitsPerCluster; }
+
+        bool IsDynamic() const { return allocationMode == RecoParams::AllocationMode::Dynamic; }
+        bool IsStatic() const { return allocationMode == RecoParams::AllocationMode::Static; }
+        bool IsAuto() const { return allocationMode == RecoParams::AllocationMode::Auto; }
 
         static constexpr auto Properties = std::make_tuple(
-          config::Property(&MemoryLimits::maxDigisPerTS, "maxDigisPerTS", "Maximal number of digis per time slice"),
-          config::Property(&MemoryLimits::maxDigisPerMS, "maxDigisPerMS", "Maximal number of digis per micro slice"),
-          config::Property(&MemoryLimits::maxDigisPerModule, "maxDigisPerModule", "Maximal number of digis per module"),
-          config::Property(&MemoryLimits::clustersPerDigiTS, "clustersPerDigiTS",
-                           "Number of clusters per digi in a time slice"),
-          config::Property(&MemoryLimits::clustersPerDigiModule, "clustersPerDigiModule",
-                           "Number of clusters per digi in a module"),
-          config::Property(&MemoryLimits::hitsPerClusterTS, "hitsPerClusterTS",
-                           "Number of hits per cluster in a time slice"),
-          config::Property(&MemoryLimits::hitsPerClusterModule, "hitsPerClusterModule",
-                           "Number of hits per cluster in a module"));
-      } memoryLimits;
+          config::Property(&Memory::allocationMode, "allocationMode", "Allocation mode (Auto, Static, Dynamic)"),
+          config::Property(&Memory::maxNDigisPerTS, "maxNDigisPerTS", "Maximal number of digis per time slice"),
+          config::Property(&Memory::maxNDigisPerModule, "maxNDigisPerModule", "Maximal number of digis per module"),
+          config::Property(&Memory::clustersPerDigi, "clustersPerDigi", "Number of clusters per digi in a time slice"),
+          config::Property(&Memory::hitsPerCluster, "hitsPerCluster", "Number of hits per cluster in a time slice"));
+      } memory;
 
       static constexpr auto Properties = std::make_tuple(
         config::Property(&STS::digiSortMode, "digiSortMode",
@@ -89,7 +92,7 @@ namespace cbm::algo
           &STS::timeCutClusterSig, "timeCutClusterSig",
           "Time cut for clusters."
           " Two clusters are considered it their time difference is below 'value * sqrt(terr1**2 + terr2*+2)'"),
-        config::Property(&STS::memoryLimits, "memoryLimits", "Memory limits for STS reco"));
+        config::Property(&STS::memory, "memory", "Memory limits for STS reco"));
     };
 
     STS sts;
@@ -109,4 +112,10 @@ CBM_ENUM_DICT(cbm::algo::RecoParams::UnpackMode,
   {"XPU", RecoParams::UnpackMode::XPU}
 );
 
+CBM_ENUM_DICT(cbm::algo::RecoParams::AllocationMode,
+  {"Auto", RecoParams::AllocationMode::Auto},
+  {"Static", RecoParams::AllocationMode::Static},
+  {"Dynamic", RecoParams::AllocationMode::Dynamic}
+);
+
 #endif  // CBM_ALGO_BASE_RECOPARAMS_H
diff --git a/algo/base/compat/OpenMP.h b/algo/base/compat/OpenMP.h
index 16991f1bedc53ba27eb2a0cf737fdfda8653dfdf..84d57489debaee5f18e43976d85f8628a99dd8db 100644
--- a/algo/base/compat/OpenMP.h
+++ b/algo/base/compat/OpenMP.h
@@ -25,6 +25,20 @@
 #define CBM_PARALLEL_FOR(...)
 #endif
 
+// OpenMP parallel
+#ifdef HAVE_OMP
+#define CBM_PARALLEL(...) CBM_PRAGMA(omp parallel __VA_ARGS__)
+#else
+#define CBM_PARALLEL(...)
+#endif
+
+// generic omp pragma for other commands
+#ifdef HAVE_OMP
+#define CBM_OMP(...) CBM_PRAGMA(omp __VA_ARGS__)
+#else
+#define CBM_OMP(...)
+#endif
+
 namespace cbm::algo::openmp
 {
 
diff --git a/algo/detectors/sts/Hitfinder.h b/algo/detectors/sts/Hitfinder.h
index 7bd5ad70f44993a4d171b06b805788ea6de0509d..f4f0f79c9613eeb3e26f6876c4b834b566aac1b6 100644
--- a/algo/detectors/sts/Hitfinder.h
+++ b/algo/detectors/sts/Hitfinder.h
@@ -30,9 +30,9 @@ namespace cbm::algo
     kFindClusterBlockSize       = 1024,
     kFindHitsBlockSize          = 512,
 #else  // HIP, values ignored on CPU
-    kSortDigisBlockSize         = 256,
+    kSortDigisBlockSize         = 1024,
     kSortDigisItemsPerThread    = 6,
-    kSortClustersBlockSize      = 256,
+    kSortClustersBlockSize      = 1024,
     kSortClustersItemsPerThread = 6,
     kFindClusterBlockSize       = 1024,
     kFindHitsBlockSize          = 1024,
@@ -279,6 +279,12 @@ namespace cbm::algo::sts
     // output
 
     // Max number of Hits that can be stored in each module
+    size_t hitsAllocatedPerModule;
+
+    // Max number of hits that should be stored in each module
+    // Guaranteed to be <= hitsAllocatedPerModule,
+    // calculated from to the number of digis
+    // This is a seperate value to terminate faster if we encounter monster events
     size_t maxHitsPerModule;
 
     // Hits sorted by module. Stored in buckets of size maxHitsPerModule.
@@ -291,6 +297,10 @@ namespace cbm::algo::sts
     // FIXME: Should be size_t!
     xpu::buffer<int> nHitsPerModule;
 
+    // Flat array of hits. size = nHitsTotal
+    size_t hitsFlatCapacity;
+    xpu::buffer<sts::Hit> hitsFlat;
+
   public:
     XPU_D void SortDigisInSpaceAndTime(SortDigis::context&) const;
     XPU_D void FindClustersSingleStep(FindClusters::context&) const;
diff --git a/algo/detectors/sts/HitfinderChain.cxx b/algo/detectors/sts/HitfinderChain.cxx
index 0291b93622f9cf2f18b062fc5da2805e53311176..4d0fb95b6a7f67dad79f4efe231f7c7ed081cf15 100644
--- a/algo/detectors/sts/HitfinderChain.cxx
+++ b/algo/detectors/sts/HitfinderChain.cxx
@@ -7,15 +7,33 @@
 #include <log.hpp>
 #include <numeric>
 
+#include "Exceptions.h"
 #include "PODVector.h"
 #include "compat/OpenMP.h"
 
 using namespace cbm::algo;
 
-void sts::HitfinderChain::SetParameters(const sts::HitfinderPars& parameters)
+void sts::HitfinderChain::SetParameters(const HitfinderChainPars& parameters)
 {
   fPars.emplace(parameters);
   AllocateStatic();
+  auto& memoryPars = fPars->memory;
+
+  if (memoryPars.IsAuto()) {
+    if (xpu::device().active().backend() == xpu::cpu) {
+      memoryPars.allocationMode = RecoParams::AllocationMode::Dynamic;
+    }
+    else {
+      memoryPars.allocationMode = RecoParams::AllocationMode::Static;
+    }
+  }
+
+  if (!memoryPars.IsDynamic() && !memoryPars.IsStatic()) {
+    throw std::runtime_error(
+      fmt::format("STS Hitfinder Chain: Unknown allocation mode: {}", ToString(memoryPars.allocationMode)));
+  }
+
+  if (memoryPars.IsStatic()) { AllocateDynamic(memoryPars.maxNDigisPerModule, memoryPars.maxNDigisPerTS); }
 }
 
 void sts::HitfinderChain::Finalize()
@@ -32,30 +50,48 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
 
   xpu::push_timer("STS Hitfinder");
 
-  size_t nModules     = fPars->modules.size();
+  size_t nModules     = fPars->setup.modules.size();
   size_t nModuleSides = nModules * 2;
   size_t nDigisTotal = digis.size();
   xpu::t_add_bytes(nDigisTotal * sizeof(CbmStsDigi));
 
   // Getting the digis on the GPU requires 3 steps
   // 1. Sort digis into buckets by module
-  size_t maxNDigisPerModule;
-  DigiMap digiMap = SortDigisIntoModules(digis, maxNDigisPerModule);
+  DigiMap digiMap = CountDigisPerModules(digis);
   // 2. Once we know number of digis per module, we can allocate
   //    the dynamic buffers on the gpu, as the buffer sizes depend on that value
-  AllocateDynamic(maxNDigisPerModule, nDigisTotal);
+  if (fPars->memory.IsDynamic()) AllocateDynamic(digiMap.maxNDigisPerModule, nDigisTotal);
+  else {
+    if (digiMap.maxNDigisPerModule > fPars->memory.maxNDigisPerModule) {
+      throw std::runtime_error(
+        fmt::format("STS Hitfinder Chain: Too many digis per module for static allocation: {} > {}",
+                    digiMap.maxNDigisPerModule, fPars->memory.maxNDigisPerModule));
+    }
+    if (nDigisTotal > fPars->memory.maxNDigisPerTS) {
+      throw std::runtime_error(
+        fmt::format("STS Hitfinder Chain: Too many digis per timeslice for static allocation: {} > {}", nDigisTotal,
+                    fPars->memory.maxNDigisPerTS));
+    }
+  }
   // 3. Copy digis into flat array with offsets per module
-  FlattenDigis(digiMap);
+  FlattenDigis(digis, digiMap);
 
   if (Opts().LogLevel() == trace) EnsureDigiOffsets(digiMap);
 
   xpu::queue queue;
 
+  // Set constants
+  auto& hfc            = fHitfinder;
+  hfc.maxHitsPerModule = fPars->memory.NHitsUpperBound(digiMap.maxNDigisPerModule);
+  if (hfc.maxHitsPerModule > hfc.hitsAllocatedPerModule) {
+    L_(error) << fmt::format("STS Hitfinder Chain: Too many hits per module for static allocation: {} > {}",
+                             hfc.maxHitsPerModule, hfc.hitsAllocatedPerModule);
+    hfc.maxHitsPerModule = hfc.hitsAllocatedPerModule;
+  }
+
   // Clear buffers
   // Not all buffers have to initialized, but useful for debugging
-
   L_(debug) << "STS Hitfinder Chain: Clearing buffers...";
-  auto& hfc = fHitfinder;
   // xpu::memset(hfc.digisPerModule, 0);
   // xpu::memset(hfc.digisPerModuleTmp, 0);
   // xpu::memset(hfc.digisSortedPerModule, 0);
@@ -74,7 +110,12 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
 
   L_(debug) << "STS Hitfinder Chain: Copy digis buffers...";
   xpu::set<TheHitfinder>(fHitfinder);
-  queue.copy(hfc.digisPerModule, xpu::h2d);
+
+  // Only copy the actual number of digis, not the whole allocated buffer
+  // TODO add support in xpu, for buffer copies with offset + size
+  const CbmStsDigi* digisH = xpu::h_view(hfc.digisPerModule).data();
+  CbmStsDigi* digisD       = hfc.digisPerModule.get();
+  if (digisH != digisD) queue.copy(digisH, digisD, sizeof(CbmStsDigi) * nDigisTotal);
   queue.copy(hfc.digiOffsetPerModule, xpu::h2d);
 
   L_(debug) << "STS Hitfinder Chain: Sort Digis...";
@@ -115,7 +156,7 @@ sts::HitfinderChain::Result sts::HitfinderChain::operator()(gsl::span<const CbmS
     std::vector<ClusterIdx> clusterIdxPerModule;
     clusterIdxPerModule.resize(props.size());
     std::vector<int> nClustersPerModule;
-    nClustersPerModule.resize(fPars->modules.size() * 2);
+    nClustersPerModule.resize(fPars->setup.modules.size() * 2);
 
     queue.copy(hfc.clusterIdxPerModule.get(), clusterIdxPerModule.data(), props.size_bytes());
     queue.copy(hfc.nClustersPerModule.get(), nClustersPerModule.data(), nClustersPerModule.size() * sizeof(int));
@@ -217,8 +258,8 @@ void sts::HitfinderChain::AllocateStatic()
   EnsureParameters();
 
   // Shorthands for common constants
-  const int nChannels    = fPars->nChannels / 2;  // Only count channels on one side of the module
-  const int nModules     = fPars->modules.size();
+  const int nChannels    = fPars->setup.nChannels / 2;  // Only count channels on one side of the module
+  const int nModules     = fPars->setup.modules.size();
   const int nModuleSides = 2 * nModules;  // Number of module front + backsides
 
   xpu::queue q;
@@ -226,24 +267,24 @@ void sts::HitfinderChain::AllocateStatic()
   // Set GPU constants
   fHitfinder.nModules  = nModules;
   fHitfinder.nChannels = nChannels;
-  fHitfinder.asic      = fPars->asic;
+  fHitfinder.asic      = fPars->setup.asic;
 
   // Copy landau table
-  size_t nLandauTableEntries = fPars->landauTable.values.size();
+  size_t nLandauTableEntries = fPars->setup.landauTable.values.size();
   fHitfinder.landauTableSize = nLandauTableEntries;
-  fHitfinder.landauStepSize  = fPars->landauTable.stepSize;
+  fHitfinder.landauStepSize  = fPars->setup.landauTable.stepSize;
   fHitfinder.landauTable.reset(nLandauTableEntries, xpu::buf_io);
-  q.copy(fPars->landauTable.values.data(), fHitfinder.landauTable.get(), nLandauTableEntries * sizeof(float));
+  q.copy(fPars->setup.landauTable.values.data(), fHitfinder.landauTable.get(), nLandauTableEntries * sizeof(float));
 
   // Copy transformation matrix
   fHitfinder.localToGlobalRotationByModule.reset(9 * nModules, xpu::buf_io);
   fHitfinder.localToGlobalTranslationByModule.reset(3 * nModules, xpu::buf_io);
 
   // - Copy matrix into flat array
-  xpu::h_view hRot {fHitfinder.localToGlobalRotationByModule};
-  xpu::h_view hTrans {fHitfinder.localToGlobalTranslationByModule};
+  xpu::h_view hRot(fHitfinder.localToGlobalRotationByModule);
+  xpu::h_view hTrans(fHitfinder.localToGlobalTranslationByModule);
   for (int m = 0; m < nModules; m++) {
-    const auto& module = fPars->modules.at(m);
+    const auto& module = fPars->setup.modules.at(m);
     std::copy_n(module.localToGlobal.rotation.data(), 9, &hRot[m * 9]);
     std::copy_n(module.localToGlobal.translation.data(), 3, &hTrans[m * 3]);
   }
@@ -254,9 +295,9 @@ void sts::HitfinderChain::AllocateStatic()
 
   // Copy Sensor Parameteres
   fHitfinder.sensorPars.reset(nModules, xpu::buf_io);
-  xpu::h_view hSensorPars {fHitfinder.sensorPars};
+  xpu::h_view hSensorPars(fHitfinder.sensorPars);
   for (int m = 0; m < nModules; m++) {
-    const auto& module = fPars->modules.at(m);
+    const auto& module = fPars->setup.modules.at(m);
     auto& gpuPars      = hSensorPars[m];
     gpuPars.dY         = module.dY;
     gpuPars.pitch      = module.pitch;
@@ -281,19 +322,21 @@ void sts::HitfinderChain::AllocateDynamic(size_t maxNDigisPerModule, size_t nDig
             << nDigisTotal << " digis in total";
   EnsureParameters();
 
-  xpu::scoped_timer t_ {"Allocate"};
+  xpu::scoped_timer t_("Allocate");
 
   // TODO: some of these buffers have a constant size and can be allocated statically.
   // Just the data they contain is static.
 
   // Shorthands for common constants
-  const int nChannels    = fPars->nChannels / 2;  // Only count channels on one side of the module
-  const int nModules     = fPars->modules.size();
+  const int nChannels    = fPars->setup.nChannels / 2;  // Only count channels on one side of the module
+  const int nModules     = fPars->setup.modules.size();
   const int nModuleSides = 2 * nModules;  // Number of module front + backsides
 
+  const size_t maxNClustersPerModule = fPars->memory.MaxNClustersPerModule();
+  const size_t maxNHitsPerModule     = fPars->memory.MaxNHitsPerModule();
 
-  const size_t maxNClustersPerModule = maxNDigisPerModule * Params().sts.memoryLimits.clustersPerDigiModule;
-  const size_t maxNHitsPerModule     = maxNClustersPerModule * Params().sts.memoryLimits.hitsPerClusterModule;
+  // TODO: this number should be much lower, estimate from max digis per TS
+  const size_t maxHitsTotal = maxNHitsPerModule * nModules;
 
   // Allocate Digi Buffers
   fHitfinder.digiOffsetPerModule.reset(nModuleSides + 1, xpu::buf_io);
@@ -315,95 +358,155 @@ void sts::HitfinderChain::AllocateDynamic(size_t maxNDigisPerModule, size_t nDig
   fHitfinder.nClustersPerModule.reset(nModuleSides, xpu::buf_io);
 
   // Allocate Hit Buffers
-  fHitfinder.maxHitsPerModule = maxNHitsPerModule;
-  fHitfinder.hitsPerModule.reset(maxNHitsPerModule * nModules, xpu::buf_io);
+  fHitfinder.hitsAllocatedPerModule = maxNHitsPerModule;
+  fHitfinder.hitsPerModule.reset(maxNHitsPerModule * nModules, xpu::buf_device);
   fHitfinder.nHitsPerModule.reset(nModules, xpu::buf_io);
+
+  fHitfinder.hitsFlatCapacity = maxHitsTotal;
+  fHitfinder.hitsFlat.reset(maxHitsTotal, xpu::buf_host);
 }
 
-sts::HitfinderChain::DigiMap sts::HitfinderChain::SortDigisIntoModules(gsl::span<const CbmStsDigi> digis,
-                                                                       size_t& maxNDigisPerModule)
+sts::HitfinderChain::DigiMap sts::HitfinderChain::CountDigisPerModules(gsl::span<const CbmStsDigi> digis)
 {
   L_(debug) << "STS Hitfinder Chain: Sorting " << digis.size() << " digis into modules";
-  xpu::scoped_timer t_ {"Sort Digis"};
+  xpu::scoped_timer t_("Count Digis By Module");
+  xpu::t_add_bytes(digis.size_bytes());
 
+  int nModules     = fPars->setup.modules.size();
+  int nModuleSides = nModules * 2;
   DigiMap digiMap;
+  digiMap.nDigisPerModule.resize(nModuleSides);
 
-  int nChannelsPerSide = fPars->nChannels / 2;
+  // Create map from module address to index
+  // This could be done just once in the beginning,
+  // but overhead is negligible compared to the digi copying
+  digiMap.addrToIndex.resize(1 << 17, InvalidModule);
+  for (size_t m = 0; m < nModules; m++) {
+    const auto& module         = fPars->setup.modules[m];
+    i32 paddr                  = CbmStsAddress::PackDigiAddress(module.address);
+    digiMap.addrToIndex[paddr] = u16(m);
+  }
 
-  auto isPulser = [&](const CbmStsDigi& digi) {
-    return std::find_if(fPars->modules.begin(), fPars->modules.end(),
-                        [&](const auto& mod) { return mod.address == digi.GetAddress(); })
-           == fPars->modules.end();
-  };
+  int nChannelsPerSide = fPars->setup.nChannels / 2;
 
-  size_t nPulsers = 0;
+  // TODO: try to parallise
+  // Count digis per module in parallel
+  // Then calculate offset per module
+  // Write digis directly to flat array in parallel
+  // Use atomic add to increment offset per module
+  digiMap.nDigisPerThread.resize(openmp::GetMaxThreads());
+  for (auto& v : digiMap.nDigisPerThread)
+    v.resize(nModuleSides, 0);
 
-  for (const auto& digi : digis) {
+  CBM_PARALLEL()
+  {
+    int threadId = openmp::GetThreadNum();  // Move out of the loop, seems to create small overhead
+    auto& nDigis = digiMap.nDigisPerThread.at(threadId);
 
-    if (isPulser(digi)) {
-      nPulsers++;
-      continue;
-    }
+    CBM_OMP(for schedule(static))
+    for (size_t i = 0; i < digis.size(); i++) {
+      const auto& digi = digis[i];
+      u16 moduleIndex  = digiMap.ModuleIndex(digi);
 
-    int moduleAddr = CbmStsAddress::GetMotherAddress(digi.GetAddress(), kStsModule);
-    bool isFront   = digi.GetChannel() < fPars->nChannels / 2;
-    if (isFront) digiMap.front[moduleAddr].emplace_back(digi);
-    else {
-      CbmStsDigi tmpdigi = digi;
-      tmpdigi.SetChannel(tmpdigi.GetChannel() - nChannelsPerSide);
-      digiMap.back[moduleAddr].emplace_back(tmpdigi);
+      if (moduleIndex == InvalidModule) continue;
+
+      bool isFront = digi.GetChannel() < nChannelsPerSide;
+      nDigis[moduleIndex + (isFront ? 0 : nModules)]++;
     }
   }
 
-  if (nPulsers > 0) L_(warning) << "STS Hitfinder: Discarded " << nPulsers << " pulser digis";
 
-  // Print digi counts per module
-  for (const auto& mod : fPars->modules) {
-    i32 moduleAddr = mod.address;
-    L_(debug) << "Module " << moduleAddr << " has " << digiMap.front[moduleAddr].size() << " front digis and "
-              << digiMap.back[moduleAddr].size() << " back digis";
+  // Sum up digis per module
+  for (auto& v : digiMap.nDigisPerThread) {
+    for (size_t m = 0; m < nModuleSides; m++) {
+      digiMap.nDigisPerModule[m] += v[m];
+    }
   }
 
-  maxNDigisPerModule = 0;
-  for (const auto& mod : digiMap.front) {
-    maxNDigisPerModule = std::max(maxNDigisPerModule, mod.second.size());
+  // if (nPulsers > 0) L_(warning) << "STS Hitfinder: Discarded " << nPulsers << " pulser digis";
+
+  // Print digi counts per module
+  for (const auto& mod : fPars->setup.modules) {
+    // FIXME should use module index here
+    i32 moduleAddr  = CbmStsAddress::PackDigiAddress(mod.address);
+    u16 moduleIndex = digiMap.addrToIndex[moduleAddr];
+    L_(debug) << "Module " << moduleAddr << " has " << digiMap.nDigisPerModule[moduleIndex] << " front digis and "
+              << digiMap.nDigisPerModule[moduleIndex + nModules] << " back digis";
   }
 
-  for (const auto& mod : digiMap.back) {
-    maxNDigisPerModule = std::max(maxNDigisPerModule, mod.second.size());
+  digiMap.maxNDigisPerModule = 0;
+  for (const auto& nDigis : digiMap.nDigisPerModule) {
+    digiMap.maxNDigisPerModule = std::max(digiMap.maxNDigisPerModule, nDigis);
   }
 
   return digiMap;
 }
 
-void sts::HitfinderChain::FlattenDigis(DigiMap& digiMap)
+void sts::HitfinderChain::FlattenDigis(gsl::span<const CbmStsDigi> digis, DigiMap& digiMap)
 {
   L_(debug) << "STS Hitfinder Chain: Flattening digis";
-  xpu::scoped_timer t_ {"Flatten Digis"};
-  FlattenDigisSide(digiMap.front, true);
-  FlattenDigisSide(digiMap.back, false);
-}
+  xpu::scoped_timer t_("Flatten Digis");
+  xpu::t_add_bytes(digis.size_bytes());
 
-void sts::HitfinderChain::FlattenDigisSide(DigiMapSide& digis, bool isFront)
-{
-  EnsureParameters();
+  const auto& modules = fPars->setup.modules;
 
-  int nModules = fPars->modules.size();
-  xpu::h_view pMdigiOffset {fHitfinder.digiOffsetPerModule};
-  xpu::h_view pDigisFlat {fHitfinder.digisPerModule};
+  int nModules         = modules.size();
+  int nModuleSides     = nModules * 2;
+  int nChannelsPerSide = fPars->setup.nChannels / 2;
+  xpu::h_view pDigisFlat(fHitfinder.digisPerModule);  // Final input copied the GPU
 
-  int sideOffset = (isFront ? 0 : nModules);
+  xpu::h_view pMdigiOffset(fHitfinder.digiOffsetPerModule);
+  pMdigiOffset[0] = 0;
+  for (int m = 1; m < nModuleSides + 1; m++) {
+    pMdigiOffset[m] = pMdigiOffset[m - 1] + digiMap.nDigisPerModule.at(m - 1);
+  }
 
-  if (isFront) pMdigiOffset[0] = 0;
-  for (int m = 0; m < nModules; m++) {
-    size_t moduleOffset     = pMdigiOffset[sideOffset + m];
-    const auto& moduleDigis = digis[fPars->modules[m].address];
-    std::copy(moduleDigis.begin(), moduleDigis.end(), &pDigisFlat[moduleOffset]);
-    pMdigiOffset[sideOffset + m + 1] = moduleOffset + moduleDigis.size();
+  std::vector<std::vector<size_t>> digiOffsetsPerThread(openmp::GetMaxThreads());
+  for (auto& v : digiOffsetsPerThread)
+    v.resize(nModuleSides);
+  for (size_t i = 1; i < digiOffsetsPerThread.size(); i++) {
+    for (size_t m = 0; m < nModuleSides; m++) {
+      for (size_t j = 0; j < i; j++) {
+        digiOffsetsPerThread[i][m] += digiMap.nDigisPerThread[j][m];
+      }
+    }
+  }
+
+  CBM_PARALLEL()
+  {
+    int threadId      = openmp::GetThreadNum();  // Move out of the loop, seems to create small overhead
+    auto& digiOffsets = digiOffsetsPerThread.at(threadId);
+
+    CBM_OMP(for schedule(static))
+    for (size_t i = 0; i < digis.size(); i++) {
+      const auto& digi = digis[i];
+
+      u16 moduleIndex = digiMap.ModuleIndex(digi);
+
+      if (moduleIndex == InvalidModule) continue;
+
+      bool isFront = digi.GetChannel() < nChannelsPerSide;
+      moduleIndex += isFront ? 0 : nModules;
+
+      size_t moduleOffset = pMdigiOffset[moduleIndex];
+      size_t digiOffset   = digiOffsets[moduleIndex]++;
+
+      pDigisFlat[moduleOffset + digiOffset] = digi;
+    }
+  }
+
+  // Channels in Digis are counted 0 to 2047 by default.
+  // Channel 0 -> 1023 is the front side of the module
+  // Channel 1024 -> 2047 is the back side of the module
+  // Cluster finder only operates on one side, so we have to shift the channel numbers
+  CBM_PARALLEL_FOR(schedule(static))
+  for (size_t i = pMdigiOffset[nModules]; i < pMdigiOffset[nModuleSides]; i++) {
+    auto& digi = pDigisFlat[i];
+    digi.SetChannel(digi.GetChannel() - nChannelsPerSide);
   }
 }
 
-PartitionedPODVector<sts::Hit> sts::HitfinderChain::FlattenHits(xpu::queue queue)
+PartitionedSpan<sts::Hit> sts::HitfinderChain::FlattenHits(xpu::queue queue)
 {
   auto& hfc = fHitfinder;
   xpu::h_view nHits(hfc.nHitsPerModule);
@@ -411,8 +514,15 @@ PartitionedPODVector<sts::Hit> sts::HitfinderChain::FlattenHits(xpu::queue queue
   size_t nHitsTotal = 0;
   for (size_t m = 0; m < hfc.nModules; m++)
     nHitsTotal += GetNHits(nHits, m);
-  L_(info) << "STS Hitfinder Chain: Flattening " << nHitsTotal << " hits";
-  PODVector<sts::Hit> hits(nHitsTotal);
+  L_(debug) << "STS Hitfinder Chain: Flattening " << nHitsTotal << " hits";
+
+  if (nHitsTotal > hfc.hitsFlatCapacity)
+    throw ProcessingError("STS Hitfinder Chain: Not enough memory for hits: {} > {}", nHitsTotal, hfc.hitsFlatCapacity);
+
+  // hitBuffer is potentially bigger than nHitsTotal
+  // So construct a span to the memory region we actually use
+  xpu::h_view hitBuffer(hfc.hitsFlat);
+  gsl::span hits(hitBuffer.data(), nHitsTotal);
 
   // Transfer Hits / Cluster back to host
   // Hits and Clusters are stored in buckets. Buckets aren't necessarily full.
@@ -434,7 +544,7 @@ PartitionedPODVector<sts::Hit> sts::HitfinderChain::FlattenHits(xpu::queue queue
       for (size_t i = 0; i < m; i++) {
         offset += GetNHits(nHits, i);
       }
-      std::copy_n(hfc.hitsPerModule.get() + hfc.maxHitsPerModule * m, GetNHits(nHits, m), hits.begin() + offset);
+      std::copy_n(hfc.hitsPerModule.get() + hfc.hitsAllocatedPerModule * m, GetNHits(nHits, m), hits.begin() + offset);
     }
   }
   else {  // GPU
@@ -448,20 +558,25 @@ PartitionedPODVector<sts::Hit> sts::HitfinderChain::FlattenHits(xpu::queue queue
     size_t nHitsCopied = 0;
     for (size_t m = 0; m < hfc.nModules; m++) {
       size_t numHitsInModule = GetNHits(nHits, m);
-      queue.copy(hfc.hitsPerModule.get() + hfc.maxHitsPerModule * m, hits.data() + nHitsCopied,
+      queue.copy(hfc.hitsPerModule.get() + hfc.hitsAllocatedPerModule * m, hits.data() + nHitsCopied,
                  numHitsInModule * sizeof(sts::Hit));
       nHitsCopied += numHitsInModule;
     }
   }
 
-  std::vector<size_t> nHitsPerModule;
+
+  // Could do this more efficiently, cache addresses once in the beginning, ...
+  // doesn't really matter overhead wise
+  fHitOffsets.clear();
+  fHitOffsets.push_back(0);
   for (size_t m = 0; m < hfc.nModules; m++)
-    nHitsPerModule.push_back(GetNHits(nHits, m));
-  std::vector<u32> addresses;
-  for (auto& m : fPars->modules)
-    addresses.push_back(m.address);
-  PartitionedPODVector<sts::Hit> partitionedHits(std::move(hits), nHitsPerModule, addresses);
+    fHitOffsets.push_back(fHitOffsets.back() + GetNHits(nHits, m));
+
+  fAddresses = {};
+  for (auto& m : fPars->setup.modules)
+    fAddresses.push_back(m.address);
 
+  PartitionedSpan partitionedHits(hits, fHitOffsets, fAddresses);
   return partitionedHits;
 }
 
@@ -470,89 +585,20 @@ size_t sts::HitfinderChain::GetNHits(xpu::h_view<int> nHitsPerModule, int module
   return std::min<size_t>(nHitsPerModule[module], fHitfinder.maxHitsPerModule);
 }
 
-void sts::HitfinderChain::AppendClustersModule(int module, bool isFront, std::vector<sts::Cluster>& clusters)
-{
-  EnsureParameters();
-
-  auto& hfc      = fHitfinder;
-  int moduleSide = (isFront ? module : 2 * module);
-  xpu::h_view hNClusters {hfc.nClustersPerModule};
-  xpu::h_view hClusters {hfc.clusterDataPerModule};
-
-  int nClusters     = hNClusters[moduleSide];
-  auto* gpuClusters = &hClusters[moduleSide * hfc.maxClustersPerModule];
-
-  clusters.reserve(clusters.size() + nClusters);
-
-  for (int i = 0; i < nClusters; i++) {
-    clusters.emplace_back(gpuClusters[i]);
-
-    //   auto& clusterGpu = gpuClusters[i];
-
-    //   auto& clusterOut = clusters.emplace_back();
-
-    //   clusterOut.SetAddress(fPars->modules[module].address);
-    //   clusterOut.SetProperties(clusterGpu.fCharge, clusterGpu.fPosition, clusterGpu.fPositionError, clusterGpu.fTime,
-    //                             clusterGpu.fTimeError);
-    //   clusterOut.SetSize(clusterGpu.fSize);
-  }
-}
-
-void sts::HitfinderChain::AppendHitsModule(int module, std::vector<sts::Hit>& hits)
-{
-  EnsureParameters();
-
-  xpu::h_view hHits {fHitfinder.hitsPerModule};
-  xpu::h_view hNHits {fHitfinder.nHitsPerModule};
-
-  auto* gpuHits = &hHits[module * fHitfinder.maxHitsPerModule];
-  int nHitsGpu  = hNHits[module];
-
-  hits.reserve(hits.size() + nHitsGpu);
-
-  for (int i = 0; i < nHitsGpu; i++) {
-    hits.emplace_back(gpuHits[i]);
-    // auto& hitGpu = gpuHits[i];
-
-    // hits.emplace_back(fPars->modules[module].address,
-    //                   TVector3 {hitGpu.fX, hitGpu.fY, hitGpu.fZ},
-    //                   TVector3 {hitGpu.fDx, hitGpu.fDy, hitGpu.fDz},
-    //                   hitGpu.fDxy,
-    //                   0,
-    //                   0,
-    //                   double(hitGpu.fTime),
-    //                   hitGpu.fTimeError,
-    //                   hitGpu.fDu,
-    //                   hitGpu.fDv);
-  }
-}
-
 void sts::HitfinderChain::EnsureDigiOffsets(DigiMap& digi)
 {
-  xpu::h_view digiOffset {fHitfinder.digiOffsetPerModule};
+  xpu::h_view digiOffset(fHitfinder.digiOffsetPerModule);
 
-  size_t nModules = fPars->modules.size();
+  size_t nModules = fPars->setup.modules.size();
   size_t offset   = 0;
   // Front
-  for (size_t m = 0; m < nModules; m++) {
-    const auto& moduleDigis = digi.front[fPars->modules[m].address];
+  for (size_t m = 0; m < nModules * 2; m++) {
     if (digiOffset[m] != offset) {
       L_(fatal) << "Module " << m << ": Digi offset mismatch: " << digiOffset[m] << " != " << offset;
       std::abort();
     }
-    offset += moduleDigis.size();
-  }
-
-  // Back
-  for (size_t m = 0; m < nModules; m++) {
-    const auto& moduleDigis = digi.back[fPars->modules[m].address];
-
-    if (digiOffset[nModules + m] != offset) {
-      L_(fatal) << "Module " << nModules + m << ": Digi offset mismatch: " << digiOffset[nModules + m]
-                << " != " << offset;
-      std::abort();
-    }
-    offset += moduleDigis.size();
+    size_t nDigis = digi.nDigisPerModule[m];
+    offset += nDigis;
   }
 
   if (offset != digiOffset[2 * nModules]) {
@@ -563,12 +609,12 @@ void sts::HitfinderChain::EnsureDigiOffsets(DigiMap& digi)
 
 void sts::HitfinderChain::EnsureDigisSorted()
 {
-  xpu::h_view digiOffset {fHitfinder.digiOffsetPerModule};
-  xpu::h_view digis {fHitfinder.digisPerModule};
+  xpu::h_view digiOffset(fHitfinder.digiOffsetPerModule);
+  xpu::h_view digis(fHitfinder.digisPerModule);
 
   bool isSorted = true;
 
-  for (size_t m = 0; m < fPars->modules.size(); m++) {
+  for (size_t m = 0; m < fPars->setup.modules.size(); m++) {
     int nDigis = digiOffset[m + 1] - digiOffset[m];
     if (nDigis == 0) continue;
 
@@ -599,10 +645,10 @@ void sts::HitfinderChain::EnsureDigisSorted()
 
 void sts::HitfinderChain::EnsureChannelOffsets(gsl::span<u32> channelOffsetsByModule)
 {
-  xpu::h_view digisPerModule {fHitfinder.digisPerModule};
-  xpu::h_view digiOffsetPerModule {fHitfinder.digiOffsetPerModule};
-  for (size_t m = 0; m < fPars->modules.size() * 2; m++) {
-    int nChannels = fPars->nChannels / 2;  // Consider module sides
+  xpu::h_view digisPerModule(fHitfinder.digisPerModule);
+  xpu::h_view digiOffsetPerModule(fHitfinder.digiOffsetPerModule);
+  for (size_t m = 0; m < fPars->setup.modules.size() * 2; m++) {
+    int nChannels = fPars->setup.nChannels / 2;  // Consider module sides
 
     std::vector<u32> expectedChannelOffsets(nChannels, 0);
 
@@ -644,7 +690,7 @@ void sts::HitfinderChain::EnsureChannelOffsets(gsl::span<u32> channelOffsetsByMo
 
 void sts::HitfinderChain::EnsureClustersSane(gsl::span<ClusterIdx> hClusterIdx, gsl::span<int> hNClusters)
 {
-  for (size_t m = 0; m < 2 * fPars->modules.size(); m++) {
+  for (size_t m = 0; m < 2 * fPars->setup.modules.size(); m++) {
     int nClusters = hNClusters[m];
 
     L_(trace) << "Module " << m << " has " << nClusters << " clusters of " << fHitfinder.maxClustersPerModule;
diff --git a/algo/detectors/sts/HitfinderChain.h b/algo/detectors/sts/HitfinderChain.h
index da0b8c8470b6184a8cac7cdbab260e50b4e207f5..ddcf955a6543d78c8ecc744052a6ae3ff8c418ef 100644
--- a/algo/detectors/sts/HitfinderChain.h
+++ b/algo/detectors/sts/HitfinderChain.h
@@ -14,7 +14,7 @@
 #include <optional>
 #include <vector>
 
-#include "PartitionedVector.h"
+#include "PartitionedSpan.h"
 #include "SubChain.h"
 #include "gpu/xpu_legacy.h"
 #include "sts/Hitfinder.h"
@@ -24,12 +24,9 @@
 namespace cbm::algo::sts
 {
 
-  struct HitfinderTimes {
-    double timeIO          = 0;
-    double timeSortDigi    = 0;
-    double timeCluster     = 0;
-    double timeSortCluster = 0;
-    double timeHits        = 0;
+  struct HitfinderChainPars {
+    HitfinderPars setup;  // TODO: HitfinderPars should renamed to HitfinderSetup
+    RecoParams::STS::Memory memory;
   };
 
   /**
@@ -42,12 +39,12 @@ namespace cbm::algo::sts
 
   public:
     struct Result {
-      PartitionedPODVector<sts::Hit> hits;
+      PartitionedSpan<sts::Hit> hits;
       sts::HitfinderMonitor monitor;
     };
 
-    void SetParameters(const sts::HitfinderPars& parameters);
-    const sts::HitfinderPars& GetParameters() const { return *fPars; }
+    void SetParameters(const HitfinderChainPars& parameters);
+    const HitfinderChainPars& GetParameters() const { return *fPars; }
 
     /**
      * Teardown chain.
@@ -57,11 +54,22 @@ namespace cbm::algo::sts
     Result operator()(gsl::span<const CbmStsDigi>);
 
   private:
-    // Shorthands to map module-address onto digis in that module
-    using DigiMapSide = std::map<int, std::vector<CbmStsDigi>>;
+    static constexpr u16 InvalidModule = 0xFFFF;
+
     struct DigiMap {
-      DigiMapSide front;
-      DigiMapSide back;
+      //< Map module address to index in digis array. Only works for 17 bit packed addresses.
+      std::vector<u16> addrToIndex;
+      // Map modules to number of Digis, 2 * NModules entries, first half is front, second half is back
+      std::vector<size_t> nDigisPerModule;
+      std::vector<std::vector<size_t>> nDigisPerThread;  //< Number of digis per module per thread [thread][module]
+      size_t maxNDigisPerModule = 0;                     //< Upper bound on number of digis per module
+
+      u16 ModuleIndex(const CbmStsDigi& digi) const
+      {
+        i32 moduleAddr  = digi.GetAddressPacked();
+        u16 moduleIndex = addrToIndex[moduleAddr];
+        return moduleIndex;
+      }
     };
 
     /**
@@ -82,20 +90,19 @@ namespace cbm::algo::sts
     void AllocateDynamic(size_t, size_t);
 
     /**
-       * Sort Digis into buckets by module.
-       */
-    DigiMap SortDigisIntoModules(gsl::span<const CbmStsDigi> digis, size_t& maxNDigisPerModule);
+      * Count Digis per module.
+      */
+    DigiMap CountDigisPerModules(gsl::span<const CbmStsDigi> digis);
 
     /**
-       * Copy Digis into flat array that can be copied to the GPU.
-       */
-    void FlattenDigis(DigiMap& digiMap);
-    void FlattenDigisSide(DigiMapSide& digis, bool isFront);
+      * Copy Digis into flat array that can be copied to the GPU.
+      */
+    void FlattenDigis(gsl::span<const CbmStsDigi> digis, DigiMap& digiMap);
 
     /**
      * Copy Hits back to host into a flat array.
-    */
-    PartitionedPODVector<sts::Hit> FlattenHits(xpu::queue);
+     */
+    PartitionedSpan<sts::Hit> FlattenHits(xpu::queue);
 
     /**
     * @brief Returns the number of hits of a module.
@@ -104,12 +111,6 @@ namespace cbm::algo::sts
    */
     size_t GetNHits(xpu::h_view<int> nHitsPerModule, int module);
 
-    /**
-       * Transfer Hits / Clusters back to host and convert to common CBM types.
-       */
-    void AppendClustersModule(int module, bool isFront, std::vector<sts::Cluster>&);
-    void AppendHitsModule(int module, std::vector<sts::Hit>&);
-
     // Debug functions, ensure reco produces sane results
     void EnsureDigiOffsets(DigiMap&);
     void EnsureDigisSorted();
@@ -117,9 +118,13 @@ namespace cbm::algo::sts
     void EnsureClustersSane(gsl::span<ClusterIdx>, gsl::span<int>);
     void EnsureClustersSorted();
 
-    std::optional<const sts::HitfinderPars> fPars;
+    std::optional<sts::HitfinderChainPars> fPars;
 
     Hitfinder fHitfinder;
+
+    // Output buffer, used by the returned PartitionedSpan
+    std::vector<u32> fAddresses;
+    std::vector<size_t> fHitOffsets;
   };
 
 }  // namespace cbm::algo::sts
diff --git a/algo/evbuild/EventbuildChain.cxx b/algo/evbuild/EventbuildChain.cxx
index fe8cc5b599b3a1690226b0e10cbf5195015ca7da..6d2af6325aff0c8e8939e992e2aecf8a7f0bc44f 100644
--- a/algo/evbuild/EventbuildChain.cxx
+++ b/algo/evbuild/EventbuildChain.cxx
@@ -6,13 +6,6 @@
 
 #include "CbmDigiTimeslice.h"
 
-#include <boost/archive/binary_oarchive.hpp>
-#include <boost/iostreams/device/array.hpp>
-#include <boost/iostreams/device/back_inserter.hpp>
-#include <boost/iostreams/stream.hpp>
-#include <boost/serialization/utility.hpp>
-#include <boost/serialization/vector.hpp>
-
 #include <sstream>
 #include <string>
 
@@ -23,63 +16,50 @@
 using namespace cbm::algo;
 using namespace cbm::algo::evbuild;
 
-namespace b_io = boost::iostreams;
-namespace b_ar = boost::archive;
 
 // -----   Constructor   ------------------------------------------------------
-EventbuildChain::EventbuildChain(const Config& config)
+EventbuildChain::EventbuildChain(const Config& config, std::shared_ptr<HistogramSender> sender)
   : fTriggerDet(config.fTrigger.Detector())
   , fTrigger(config.fTrigger)
   , fBuilder(config.fBuilder)
   , fSelector(config.fSelector)
   , fQa(DigiEventQaConfig(config.fBuilder, 10., 100))
+  , fSender(sender)
 {
   Status();
 
-  if ("" != fHistComChan) {
-    fZmqContext = new zmq::context_t(1);
-    fZmqSocket  = new zmq::socket_t(*fZmqContext, ZMQ_PUSH);
-    fZmqSocket->connect(fHistComChan);  // This side "connects" to socket => Other side should have "bind"!!!!
-
+  if (fSender) {
     /// FIXME: based on JdC question, decide whether config re-emitted on each iteration instead of only at startup?
     /// => Header for multi-part message with Configuration + data
     /// => Format: std::pair< Nb histogram configs, Nb canvas configs  >
     std::vector<std::pair<std::string, std::string>> histsCfg = fQa.GetConfig().GetHistosConfigs();
     std::vector<std::pair<std::string, std::string>> canvsCfg = fQa.GetConfig().GetCanvasConfigs();
-    PrepareAndSendMsg(std::pair<uint32_t, uint32_t>(histsCfg.size(), canvsCfg.size()), zmq::send_flags::sndmore);
+    fSender->PrepareAndSendMsg(std::pair<uint32_t, uint32_t>(histsCfg.size(), canvsCfg.size()),
+                               zmq::send_flags::sndmore);
 
     /// => Histograms configuration = destination folder in http browser, mandatory but can be empty (= root folder)
     /// => 1 ZMQ message per histogram (= 1 part)
     /// => If no (new) histograms declared (e.g. new canvas declaration), has to be en empty message + `0` in the header
     for (const auto& cfg : histsCfg) {
-      PrepareAndSendMsg(cfg, zmq::send_flags::sndmore);
+      fSender->PrepareAndSendMsg(cfg, zmq::send_flags::sndmore);
     }
 
     /// => Canvas configuration
     /// => 1 ZMQ message per canvas (= 1 part)
     /// => If no (new) canvas declared (e.g. only histos declaration), has to be en empty message + `0` in the header
     for (const auto& cfg : canvsCfg) {
-      PrepareAndSendMsg(cfg, zmq::send_flags::sndmore);
+      fSender->PrepareAndSendMsg(cfg, zmq::send_flags::sndmore);
     }
 
     /// => (empty) Histograms serialization and emission to close multi-part message
-    PrepareAndSendMsg(std::vector<Histo1D> {}, zmq::send_flags::none);
+    fSender->PrepareAndSendMsg(std::vector<Histo1D> {}, zmq::send_flags::none);
   }
 }
 // ----------------------------------------------------------------------------
 
-// -----   Constructor   ------------------------------------------------------
+// -----   Destructor   ------------------------------------------------------
 EventbuildChain::~EventbuildChain()
 {
-  if ("" != fHistComChan) {
-    if (fZmqSocket) {
-      fZmqSocket->close();
-      delete fZmqSocket;
-    }
-    if (fZmqContext) {  //
-      delete fZmqContext;
-    }
-  }
 }
 // ----------------------------------------------------------------------------
 
@@ -97,11 +77,11 @@ EventbuildChain::ResultType EventbuildChain::Run(const DigiData& timeslice)
   auto [events, evbuildMon] = fBuilder(timeslice, triggers, fSelector);
 
   /// => Histograms serialization and emission
-  if ("" != fHistComChan) {
+  if (fSender) {
     // --- Run event QA
     DigiEventQaData qaData = fQa(events);
 
-    PrepareAndSendMsg(qaData.fVectHistos, zmq::send_flags::none);
+    fSender->PrepareAndSendMsg(qaData.fVectHistos, zmq::send_flags::none);
     L_(info) << "Published histograms, nb: " << qaData.fVectHistos.size();
   }
 
@@ -202,24 +182,3 @@ std::vector<double> EventbuildChain::GetDigiTimes(const DigiData& timeslice, ECb
   return result;
 }
 // ----------------------------------------------------------------------------
-
-
-// -----  Send a message to the histogram server   ----------------------------
-template<class Object>
-void EventbuildChain::PrepareAndSendMsg(const Object& obj, zmq::send_flags flags)
-{
-  /// Needed ressources (serializd string, boost inserter, boost stream, boost binary output archive)
-  std::string serial_str;
-  b_io::back_insert_device<std::string> inserter(serial_str);
-  b_io::stream<b_io::back_insert_device<std::string>> bstream(inserter);
-  b_ar::binary_oarchive oa(bstream);
-
-  serial_str.clear();
-  oa << obj;
-  bstream.flush();
-
-  zmq::message_t msg(serial_str.size());
-  std::copy_n(static_cast<const char*>(serial_str.data()), msg.size(), static_cast<char*>(msg.data()));
-  fZmqSocket->send(msg, flags);
-}
-// ----------------------------------------------------------------------------
diff --git a/algo/evbuild/EventbuildChain.h b/algo/evbuild/EventbuildChain.h
index 4c50976cbf8eee6cea7f2aa1f78822e30baede53..6b4676383168f46f53474b9ed53be54c0c7511d0 100644
--- a/algo/evbuild/EventbuildChain.h
+++ b/algo/evbuild/EventbuildChain.h
@@ -9,11 +9,12 @@
 
 #include "TimeClusterTrigger.h"
 
-#include <zmq.hpp>
+#include <memory>
 
 #include "DigiEventQa.h"
 #include "DigiEventSelector.h"
 #include "EventBuilder.h"
+#include "HistogramSender.h"
 #include "SubChain.h"
 
 namespace cbm::algo
@@ -47,7 +48,7 @@ namespace cbm::algo::evbuild
     using ResultType = std::pair<std::vector<DigiEvent>, EventbuildChainMonitorData>;
 
     /** @brief Constructor **/
-    EventbuildChain(const Config& config);
+    EventbuildChain(const Config& config, std::shared_ptr<HistogramSender> sender = nullptr);
 
     /** @brief Destructor **/
     ~EventbuildChain();
@@ -58,20 +59,13 @@ namespace cbm::algo::evbuild
     /** @brief Status info to logger **/
     void Status() const;
 
-
   private:                                               // members
     ECbmModuleId fTriggerDet = ECbmModuleId::kNotExist;  ///< Trigger detector
     TimeClusterTrigger fTrigger;                         ///< Trigger algorithm
     EventBuilder fBuilder;                               ///< Event builder algorithm
     DigiEventSelector fSelector;                         ///< Event selector algorithm
     DigiEventQa fQa;                                     ///< Event QA algorithm
-
-    /// FIXME: decide if the address and port of the histogram server should come from command line or YAML
-    //std::string fHistComChan = "";
-    std::string fHistComChan = "tcp://127.0.0.1:56800";
-    zmq::context_t* fZmqContext;  ///< ZMQ context FIXME: should be only one context per binary!
-    zmq::socket_t* fZmqSocket;    ///< ZMQ socket to histogram server
-
+    std::shared_ptr<HistogramSender> fSender;            ///< Histogram sender
 
   private:  // methods
     /** @brief Extract digi times from CbmDigiTimeslice
@@ -80,12 +74,6 @@ namespace cbm::algo::evbuild
      **/
     std::vector<double> GetDigiTimes(const DigiData& timeslice, ECbmModuleId system);
 
-    /** @brief Serialize object and send it to the histogram server
-     ** @param obj: object to be serialized in the message, e.g. config pairs of strings or Histo1D
-     ** @param flags: or'ed values from zmq::send_flags, typ. zmq::send_flags::sndmore to indicate multi-parts message
-     **/
-    template<class Object>
-    void PrepareAndSendMsg(const Object& obj, zmq::send_flags flags);
   };
 
 }  // namespace cbm::algo::evbuild
diff --git a/algo/global/Reco.cxx b/algo/global/Reco.cxx
index d3f03cfa46ef1a9e73b5145074e5867c314e2234..c6ab97da629207f37c61e9369783b32c068de35c 100644
--- a/algo/global/Reco.cxx
+++ b/algo/global/Reco.cxx
@@ -47,6 +47,11 @@ void Reco::Init(const Options& opts)
   fStsHitFinder.SetContext(&fContext);
   fTracking.SetContext(&fContext);
 
+  if (Opts().HistogramUri() != "") {
+    fSender = std::make_shared<HistogramSender>(Opts().HistogramUri());
+    // fContext.sender = fSender;
+  }
+
   xpu::device_prop props {xpu::device::active()};
   L_(info) << "Running CBM Reco on Device '" << props.name() << "' (Using " << openmp::GetMaxThreads()
            << " OpenMP threads)";
@@ -67,14 +72,17 @@ void Reco::Init(const Options& opts)
   // --- Event building
   fs::path configFile = opts.ParamsDir() / "EventbuildConfig.yaml";
   evbuild::Config config(YAML::LoadFile(configFile.string()));
-  fEventBuild = std::make_unique<evbuild::EventbuildChain>(config);
+  fEventBuild = std::make_unique<evbuild::EventbuildChain>(config, fSender);
 
   // STS Hitfinder
   fs::path stsHitfinderParamsPath    = opts.ParamsDir() / "StsHitfinder.yaml";
   yaml                               = YAML::LoadFile(stsHitfinderParamsPath.string());
-  sts::HitfinderPars hitFinderConfig = config::Read<sts::HitfinderPars>(yaml);
-  hitFinderConfig.landauTable        = sts::LandauTable::FromFile(opts.ParamsDir() / "LandauWidthTable.txt");
-  fStsHitFinder.SetParameters(hitFinderConfig);
+  sts::HitfinderPars hitFinderSetup  = config::Read<sts::HitfinderPars>(yaml);
+  hitFinderSetup.landauTable         = sts::LandauTable::FromFile(opts.ParamsDir() / "LandauWidthTable.txt");
+  sts::HitfinderChainPars hitFinderPars;
+  hitFinderPars.setup  = std::move(hitFinderSetup);
+  hitFinderPars.memory = Params().sts.memory;
+  fStsHitFinder.SetParameters(hitFinderPars);
 
   // Tracking
   fTracking.Init();
@@ -89,70 +97,69 @@ void Reco::Init(const Options& opts)
 
 RecoResults Reco::Run(const fles::Timeslice& ts)
 {
-  if (!fInitialized) { throw std::runtime_error("Chain not initialized"); }
+  if (!fInitialized) throw std::runtime_error("Chain not initialized");
 
   for (uint64_t comp = 0; comp < ts.num_components(); comp++) {
     xpu::t_add_bytes(ts.size_component(comp));
   }
 
-  xpu::push_timer(fmt::format("TS {}", ts.index()));
-
-  L_(info) << ">>> Processing TS " << ts.index();
-  xpu::set<cbm::algo::Params>(Params());
-
-  Unpack::resultType unpackResult;
-  UnpackMonitorData unpackMonitor;
-
-  if (Opts().HasStep(Step::Unpack)) {
-    switch (Params().sts.unpackMode) {
-      case RecoParams::UnpackMode::XPU:
-        // digis = fUnpackXpu.Exec(ts);
-        throw std::runtime_error("XPU unpacker currently not implemented");
-        break;
-      default:
-      case RecoParams::UnpackMode::CPU:
-        unpackResult  = fUnpack.Run(ts);
-        unpackMonitor = unpackResult.second;
-        break;
+  RecoResults results;
+  xpu::timings tsTimes;
+  {
+    xpu::scoped_timer t_(fmt::format("TS {}", ts.index()), &tsTimes);
+
+    L_(info) << ">>> Processing TS " << ts.index();
+    xpu::set<cbm::algo::Params>(Params());
+
+    Unpack::resultType unpackResult;
+    UnpackMonitorData unpackMonitor;
+
+    if (Opts().HasStep(Step::Unpack)) {
+      switch (Params().sts.unpackMode) {
+        case RecoParams::UnpackMode::XPU:
+          // digis = fUnpackXpu.Exec(ts);
+          throw std::runtime_error("XPU unpacker currently not implemented");
+          break;
+        default:
+        case RecoParams::UnpackMode::CPU:
+          unpackResult  = fUnpack.Run(ts);
+          unpackMonitor = unpackResult.second;
+          break;
+      }
     }
-  }
-
-  // --- Digi event building
-  std::vector<DigiEvent> events;
-  evbuild::EventbuildChainMonitorData evbuildMonitor;
-  if (Opts().HasStep(Step::DigiTrigger)) {
-    auto [ev, mon] = fEventBuild->Run(unpackResult.first);
-    events         = std::move(ev);
-    evbuildMonitor = mon;
-  }
-
-  sts::HitfinderMonitor stsHitfinderMonitor;
-  PartitionedPODVector<sts::Hit> stsHits;
-  if (Opts().HasStep(Step::LocalReco) && Opts().HasDetector(fles::Subsystem::STS)) {
-    auto stsResults     = fStsHitFinder(unpackResult.first.fSts);
-    stsHits             = std::move(stsResults.hits);
-    stsHitfinderMonitor = stsResults.monitor;
-  }
 
+    // --- Digi event building
+    std::vector<DigiEvent> events;
+    evbuild::EventbuildChainMonitorData evbuildMonitor;
+    if (Opts().HasStep(Step::DigiTrigger)) {
+      auto [ev, mon] = fEventBuild->Run(unpackResult.first);
+      events         = std::move(ev);
+      evbuildMonitor = mon;
+    }
 
-  xpu::timings ts_times = xpu::pop_timer();
+    sts::HitfinderMonitor stsHitfinderMonitor;
+    PartitionedSpan<sts::Hit> stsHits;
+    if (Opts().HasStep(Step::LocalReco) && Opts().HasDetector(fles::Subsystem::STS)) {
+      auto stsResults     = fStsHitFinder(unpackResult.first.fSts);
+      stsHits             = std::move(stsResults.hits);
+      stsHitfinderMonitor = stsResults.monitor;
+    }
 
-  QueueUnpackerMetrics(ts, unpackMonitor, unpackResult.first);
-  QueueStsRecoMetrics(stsHitfinderMonitor);
-  QueueEvbuildMetrics(evbuildMonitor);
+    // --- Tracking
+    if (Opts().HasStep(Step::Tracking)) {
+      TrackingChain::Return_t trackingOutput = fTracking.Run(results);
+      if (Opts().HasOutput(RecoData::Track)) { results.tracks = std::move(trackingOutput.first); }
+      QueueTrackingMetrics(trackingOutput.second);
+    }
 
-  PrintTimings(ts_times);
+    QueueUnpackerMetrics(ts, unpackMonitor, unpackResult.first);
+    QueueStsRecoMetrics(stsHitfinderMonitor);
+    QueueEvbuildMetrics(evbuildMonitor);
 
-  RecoResults results;
-  if (Opts().HasOutput(RecoData::DigiEvent)) results.events = std::move(events);
-  if (Opts().HasOutput(RecoData::Hit)) results.stsHits = std::move(stsHits);
-
-  // --- Tracking
-  if (Opts().HasStep(Step::Tracking)) {
-    TrackingChain::Return_t trackingOutput = fTracking.Run(results);
-    if (Opts().HasOutput(RecoData::Track)) { results.tracks = std::move(trackingOutput.first); }
-    QueueTrackingMetrics(trackingOutput.second);
+    if (Opts().HasOutput(RecoData::DigiEvent)) results.events = std::move(events);
+    if (Opts().HasOutput(RecoData::Hit)) results.stsHits = std::move(stsHits);
   }
+  PrintTimings(tsTimes);
 
   return results;
 }
diff --git a/algo/global/Reco.h b/algo/global/Reco.h
index 83efd74ace93014a6dafd71fd3bff78b1dbbaf91..7932985b6bb93d755d58416755a708690fbe74ba 100644
--- a/algo/global/Reco.h
+++ b/algo/global/Reco.h
@@ -7,6 +7,7 @@
 #include <xpu/host.h>
 
 #include "EventbuildChain.h"
+#include "HistogramSender.h"
 #include "SubChain.h"
 #include "UnpackChain.h"
 #include "ca/TrackingChain.h"
@@ -41,6 +42,7 @@ namespace cbm::algo
     bool fInitialized = false;
     ChainContext fContext;
     xpu::timings fTimesliceTimesAcc;
+    std::shared_ptr<HistogramSender> fSender;
 
     size_t prevTsId = 0;
 
diff --git a/algo/global/RecoResults.h b/algo/global/RecoResults.h
index 0a2a3bbced9381de36b268bb371904910af2ca7f..a8ed4e5d741d67623e8c3d7ee82429729d319291 100644
--- a/algo/global/RecoResults.h
+++ b/algo/global/RecoResults.h
@@ -12,7 +12,7 @@
 #include <vector>
 
 #include "DigiData.h"
-#include "base/PartitionedVector.h"
+#include "PartitionedSpan.h"
 #include "ca/core/utils/CaVector.h"
 #include "data/sts/Hit.h"
 
@@ -22,7 +22,7 @@ namespace cbm::algo
   /// \brief  Structure to keep reconstructed results: digi-events, hits and tracks
   struct RecoResults {
     std::vector<DigiEvent> events;
-    PartitionedPODVector<sts::Hit> stsHits;
+    PartitionedSpan<sts::Hit> stsHits;
     ca::Vector<ca::Track> tracks;
   };
 }  // namespace cbm::algo
diff --git a/algo/global/StorableRecoResults.h b/algo/global/StorableRecoResults.h
index c29d434c8dbffd5309395fd01927f5a1edc51b8b..bc1f089d88ef670b33cf87b9070d351d54d2c274 100644
--- a/algo/global/StorableRecoResults.h
+++ b/algo/global/StorableRecoResults.h
@@ -11,6 +11,8 @@
 
 #include <cstdint>
 
+#include "PartitionedVector.h"
+
 namespace cbm::algo
 {
 
diff --git a/algo/params/RecoParams.yaml b/algo/params/RecoParams.yaml
index 0e2f4456779e021b01accb18d66e58d3dca74ca2..9fc82381041d55089cc6f5e6e9379b8bf871cbfa 100644
--- a/algo/params/RecoParams.yaml
+++ b/algo/params/RecoParams.yaml
@@ -9,14 +9,10 @@ sts:
   timeCutClusterAbs: -1
   timeCutClusterSig: 4
 
-  memoryLimits:
-    maxDigisPerTS: 80000000
-    # TODO: Find sensible value!
-    maxDigisPerMS:   100000
-    # TODO: Find sensible value
-    maxDigisPerModule: 4000000
-    clustersPerDigiTS: 1.0
-    clustersPerDigiModule: 1.0
-    hitsPerClusterTS: 1.0
-    hitsPerClusterModule: 0.85
+  memory:
+    allocationMode: Auto
+    maxNDigisPerTS: 30000000
+    maxNDigisPerModule: 3000000
+    clustersPerDigi: 1.0
+    hitsPerCluster: 0.85
 ...
diff --git a/algo/test/CMakeLists.txt b/algo/test/CMakeLists.txt
index 6eeb86a8a18d468faf7907d6f337fefc1c6f45f9..dcd036d4c90c71418e2a1fcda1aa19cf18173c71 100644
--- a/algo/test/CMakeLists.txt
+++ b/algo/test/CMakeLists.txt
@@ -57,3 +57,7 @@ Set(PartitionedVectorSources
 )
 CreateGTestExeAndAddTest(_GTestPartitionedVector "${INCLUDE_DIRECTORIES}" "${LINK_DIRECTORIES}"
                          "${PartitionedVectorSources}" "${PUB_DEPS}" "${PVT_DEPS}" "${INT_DEPS}" "")
+
+Set(PartitionedSpanSources _GTestPartitionedSpan.cxx)
+CreateGTestExeAndAddTest(_GTestPartitionedSpan "${INCLUDE_DIRECTORIES}" "${LINK_DIRECTORIES}"
+                         "${PartitionedSpanSources}" "${PUB_DEPS}" "${PVT_DEPS}" "${INT_DEPS}" "")
diff --git a/algo/test/_GTestPartitionedSpan.cxx b/algo/test/_GTestPartitionedSpan.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..7afc70f50d6eb6a1728e606dab79701f62c0e032
--- /dev/null
+++ b/algo/test/_GTestPartitionedSpan.cxx
@@ -0,0 +1,122 @@
+/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Felix Weiglhofer [committer] */
+#include <gtest/gtest.h>
+
+#include "PartitionedSpan.h"
+#include "PartitionedVector.h"
+
+using namespace cbm::algo;
+
+template<typename T>
+void EXPECT_CONTAINER_EQ(gsl::span<T> a, std::vector<T> b)
+{
+  EXPECT_EQ(a.size(), b.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    EXPECT_EQ(a[i], b[i]);
+  }
+}
+
+class PartitionedSpanTest : public ::testing::Test {
+protected:
+  std::vector<i32> fData;
+  std::vector<size_t> fSizes;
+  std::vector<size_t> fOffsets;
+  std::vector<size_t> fOffsetsInvalid;
+  std::vector<u32> fAddresses;
+  std::vector<u32> fAddressesInvalid;
+
+  void SetUp() override
+  {
+    fData             = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    fSizes            = {2, 4, 3};
+    fOffsets          = {0, 2, 6, 9};
+    fOffsetsInvalid   = {0, 2, 6};
+    fAddresses        = {0x0, 0x100, 0x200};
+    fAddressesInvalid = {0x0, 0x100};
+  }
+
+  void Ensure(const PartitionedSpan<i32>& vec)
+  {
+    EXPECT_EQ(vec.NElements(), 9);
+    EXPECT_EQ(vec.NPartitions(), 3);
+
+    EXPECT_EQ(vec.Size(0), 2);
+
+    EXPECT_EQ(vec.Size(0), 2);
+    EXPECT_EQ(vec[0].size(), 2);
+    EXPECT_EQ(vec.Address(0), 0x0);
+    EXPECT_CONTAINER_EQ(vec[0], {1, 2});
+
+    auto part = vec.Partition(0);
+    EXPECT_EQ(part.first.size(), vec.Size(0));
+    EXPECT_EQ(part.second, vec.Address(0));
+
+    EXPECT_EQ(vec.Size(1), 4);
+    EXPECT_EQ(vec[1].size(), 4);
+    EXPECT_EQ(vec.Address(1), 0x100);
+    EXPECT_CONTAINER_EQ(vec[1], {3, 4, 5, 6});
+
+    part = vec.Partition(1);
+    EXPECT_EQ(part.first.size(), vec.Size(1));
+    EXPECT_EQ(part.second, vec.Address(1));
+
+    EXPECT_EQ(vec.Size(2), 3);
+    EXPECT_EQ(vec[2].size(), 3);
+    EXPECT_EQ(vec.Address(2), 0x200);
+    EXPECT_CONTAINER_EQ(vec[2], {7, 8, 9});
+
+    part = vec.Partition(2);
+    EXPECT_EQ(part.first.size(), vec.Size(2));
+    EXPECT_EQ(part.second, vec.Address(2));
+  }
+};
+
+TEST_F(PartitionedSpanTest, IsDefaultConstructable)
+{
+  PartitionedSpan<i32> vec;
+  EXPECT_EQ(vec.NElements(), 0);
+  EXPECT_EQ(vec.NPartitions(), 0);
+}
+
+TEST_F(PartitionedSpanTest, CanCreateWithPartitions)
+{
+  PartitionedSpan vec(fData, fOffsets, fAddresses);
+  Ensure(vec);
+}
+
+TEST_F(PartitionedSpanTest, IsConstructableFromPartitionedVector)
+{
+  PartitionedVector vec(std::move(fData), fSizes, fAddresses);
+
+  PartitionedSpan span(vec);
+  Ensure(span);
+}
+
+TEST_F(PartitionedSpanTest, IsCopyConstructable)
+{
+  PartitionedSpan vec(fData, fOffsets, fAddresses);
+
+  PartitionedSpan vecCopy(vec);
+  Ensure(vecCopy);
+}
+
+TEST_F(PartitionedSpanTest, ThrowsOnNumAddressesMismatchesNumPartions)
+{
+  EXPECT_THROW(PartitionedSpan vec(fData, fOffsets, fAddressesInvalid), std::runtime_error);
+}
+
+TEST_F(PartitionedSpanTest, ThrowsOnOffsetsMismatchesDataSize)
+{
+  EXPECT_THROW(PartitionedSpan vec(fData, fOffsetsInvalid, fAddresses), std::runtime_error);
+}
+
+TEST_F(PartitionedSpanTest, ThrowsOnOutOfBounds)
+{
+  PartitionedSpan vec(fData, fOffsets, fAddresses);
+
+  EXPECT_THROW(vec[3], std::out_of_range);
+  EXPECT_THROW(vec.Partition(3), std::out_of_range);
+  EXPECT_THROW(vec.Address(3), std::out_of_range);
+  EXPECT_THROW(vec.Size(3), std::out_of_range);
+}
diff --git a/algo/test/_GTestPartitionedVector.cxx b/algo/test/_GTestPartitionedVector.cxx
index 41dca1e213cada52c16459df9c274b539caecc2e..0d9be0cf31c99f8001a1c3c4eabb0819224c6ec3 100644
--- a/algo/test/_GTestPartitionedVector.cxx
+++ b/algo/test/_GTestPartitionedVector.cxx
@@ -9,7 +9,7 @@
 using namespace cbm::algo;
 
 template<typename T>
-void EXPECT_CONTAINER_EQ(gsl::span<T> a, std::vector<T> b)
+void EXPECT_CONTAINER_EQ(gsl::span<const T> a, std::vector<T> b)
 {
   EXPECT_EQ(a.size(), b.size());
   for (size_t i = 0; i < a.size(); ++i) {
@@ -17,77 +17,83 @@ void EXPECT_CONTAINER_EQ(gsl::span<T> a, std::vector<T> b)
   }
 }
 
-TEST(PartitionedVector, CanCreateEmpty)
+class PartitionedVectorTest : public ::testing::Test {
+protected:
+  std::vector<i32> fData;
+  std::vector<size_t> fSizes;
+  std::vector<size_t> fSizesInvalid;
+  std::vector<u32> fAddresses;
+  std::vector<u32> fAddressesInvalid;
+
+  void SetUp() override
+  {
+    fData             = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    fSizes            = {2, 4, 3};
+    fSizesInvalid     = {2, 4, 4};
+    fAddresses        = {0x0, 0x100, 0x200};
+    fAddressesInvalid = {0x0, 0x100};
+  }
+
+  void Ensure(const PartitionedVector<i32>& vec)
+  {
+    EXPECT_EQ(vec.NElements(), 9);
+    EXPECT_EQ(vec.NPartitions(), 3);
+
+    EXPECT_EQ(vec.Size(0), 2);
+    EXPECT_EQ(vec[0].size(), 2);
+    EXPECT_EQ(vec.Address(0), 0x0);
+    EXPECT_CONTAINER_EQ(vec[0], {1, 2});
+
+    auto part = vec.Partition(0);
+    EXPECT_EQ(part.first.size(), vec.Size(0));
+    EXPECT_EQ(part.second, vec.Address(0));
+
+    EXPECT_EQ(vec.Size(1), 4);
+    EXPECT_EQ(vec[1].size(), 4);
+    EXPECT_EQ(vec.Address(1), 0x100);
+    EXPECT_CONTAINER_EQ(vec[1], {3, 4, 5, 6});
+
+    part = vec.Partition(1);
+    EXPECT_EQ(part.first.size(), vec.Size(1));
+    EXPECT_EQ(part.second, vec.Address(1));
+
+    EXPECT_EQ(vec.Size(2), 3);
+    EXPECT_EQ(vec[2].size(), 3);
+    EXPECT_EQ(vec.Address(2), 0x200);
+    EXPECT_CONTAINER_EQ(vec[2], {7, 8, 9});
+
+    part = vec.Partition(2);
+    EXPECT_EQ(part.first.size(), vec.Size(2));
+    EXPECT_EQ(part.second, vec.Address(2));
+  }
+};
+
+TEST_F(PartitionedVectorTest, IsDefaultConstructable)
 {
   PartitionedVector<i32> vec;
   EXPECT_EQ(vec.NElements(), 0);
   EXPECT_EQ(vec.NPartitions(), 0);
 }
 
-TEST(PartitionedVector, CanCreateWithPartitions)
+TEST_F(PartitionedVectorTest, CanCreateWithPartitions)
 {
-  std::vector<i32> data      = {1, 2, 3, 4, 5, 6, 7, 8, 9};
-  std::vector<size_t> sizes  = {2, 4, 3};
-  std::vector<u32> addresses = {0x0, 0x100, 0x200};
-
-  PartitionedVector vec(std::move(data), sizes, addresses);
-
-  EXPECT_EQ(vec.NElements(), 9);
-  EXPECT_EQ(vec.NPartitions(), 3);
-
-  EXPECT_EQ(vec.Size(0), 2);
-  EXPECT_EQ(vec[0].size(), 2);
-  EXPECT_EQ(vec.Address(0), 0x0);
-  EXPECT_CONTAINER_EQ(vec[0], {1, 2});
-
-  auto part = vec.Partition(0);
-  EXPECT_EQ(part.first.size(), vec.Size(0));
-  EXPECT_EQ(part.second, vec.Address(0));
-
-  EXPECT_EQ(vec.Size(1), 4);
-  EXPECT_EQ(vec[1].size(), 4);
-  EXPECT_EQ(vec.Address(1), 0x100);
-  EXPECT_CONTAINER_EQ(vec[1], {3, 4, 5, 6});
-
-  part = vec.Partition(1);
-  EXPECT_EQ(part.first.size(), vec.Size(1));
-  EXPECT_EQ(part.second, vec.Address(1));
-
-  EXPECT_EQ(vec.Size(2), 3);
-  EXPECT_EQ(vec[2].size(), 3);
-  EXPECT_EQ(vec.Address(2), 0x200);
-  EXPECT_CONTAINER_EQ(vec[2], {7, 8, 9});
-
-  part = vec.Partition(2);
-  EXPECT_EQ(part.first.size(), vec.Size(2));
-  EXPECT_EQ(part.second, vec.Address(2));
+  PartitionedVector vec(std::move(fData), fSizes, fAddresses);
+  Ensure(vec);
 }
 
-TEST(PartitionedVector, ThrowsOnAddressSizesMismatch)
+TEST_F(PartitionedVectorTest, ThrowsOnNumAddressesMismatchesNumPartions)
 {
-  std::vector<i32> data      = {1, 2, 3, 4, 5, 6, 7, 8, 9};
-  std::vector<size_t> sizes  = {2, 4, 3};
-  std::vector<u32> addresses = {0x0, 0x100};
-
-  EXPECT_THROW(PartitionedVector vec(std::move(data), sizes, addresses), std::runtime_error);
+  EXPECT_THROW(PartitionedVector vec(std::move(fData), fSizes, fAddressesInvalid), std::runtime_error);
 }
 
-TEST(PartitionedVector, ThrowsOnSizesMismatch)
+TEST_F(PartitionedVectorTest, ThrowsOnSizesMismatchesDataSize)
 {
-  std::vector<i32> data      = {1, 2, 3, 4, 5, 6, 7, 8, 9};
-  std::vector<size_t> sizes  = {2, 3, 3};
-  std::vector<u32> addresses = {0x0, 0x100, 0x200};
-
-  EXPECT_THROW(PartitionedVector vec(std::move(data), sizes, addresses), std::runtime_error);
+  EXPECT_THROW(PartitionedVector vec(std::move(fData), fSizesInvalid, fAddresses), std::runtime_error);
 }
 
-TEST(PartitionedVector, ThrowsOnOutOfBounds)
+TEST_F(PartitionedVectorTest, ThrowsOnOutOfBounds)
 {
-  std::vector<i32> data      = {1, 2, 3, 4, 5, 6, 7, 8, 9};
-  std::vector<size_t> sizes  = {2, 4, 3};
-  std::vector<u32> addresses = {0x0, 0x100, 0x200};
-
-  PartitionedVector vec(std::move(data), sizes, addresses);
+  PartitionedVector vec(std::move(fData), fSizes, fAddresses);
 
   EXPECT_THROW(vec[3], std::out_of_range);
   EXPECT_THROW(vec.Partition(3), std::out_of_range);
@@ -95,4 +101,10 @@ TEST(PartitionedVector, ThrowsOnOutOfBounds)
   EXPECT_THROW(vec.Size(3), std::out_of_range);
 }
 
-TEST(PartitionedVector, IsCopyConstructable) {}
+TEST_F(PartitionedVectorTest, IsCopyConstructable)
+{
+  PartitionedVector vec(std::move(fData), fSizes, fAddresses);
+
+  PartitionedVector vecCopy(vec);
+  Ensure(vecCopy);
+}
diff --git a/core/data/sts/CbmStsDigi.h b/core/data/sts/CbmStsDigi.h
index caf5e8289a597ba059119a8cbef9ee2b9adc52da..08e39172edb528458facda87af26e664f1a9bf46 100644
--- a/core/data/sts/CbmStsDigi.h
+++ b/core/data/sts/CbmStsDigi.h
@@ -74,6 +74,13 @@ public:
   XPU_D int32_t GetAddress() const { return UnpackAddress(); }
 
 
+  XPU_D int32_t GetAddressPacked() const
+  {
+    int32_t highestBitAddr = fTime >> kNumTimestampBits;
+    int32_t packedAddress  = (highestBitAddr << kNumLowerAddrBits) | int32_t(fAddress);
+    return packedAddress;
+  }
+
   /** @brief Get the desired name of the branch for this obj in the cbm output tree  (static)
    ** @return "StsDigi"
    **/
@@ -195,8 +202,7 @@ private:
 
   XPU_D int32_t UnpackAddress() const
   {
-    int32_t highestBitAddr = fTime >> kNumTimestampBits;
-    int32_t packedAddress  = (highestBitAddr << kNumLowerAddrBits) | int32_t(fAddress);
+    int32_t packedAddress = GetAddressPacked();
     return CbmStsAddress::UnpackDigiAddress(packedAddress);
   }
 
diff --git a/reco/app/cbmreco/main.cxx b/reco/app/cbmreco/main.cxx
index f0f69c7e07056caf09ce4a505b17deca980feabe..4e9fb1e9564472369e0395d836f75f56a87c82de 100644
--- a/reco/app/cbmreco/main.cxx
+++ b/reco/app/cbmreco/main.cxx
@@ -11,6 +11,7 @@
 #include <xpu/host.h>
 
 #include "BuildInfo.h"
+#include "Exceptions.h"
 #include "Options.h"
 #include "Reco.h"
 #include "RecoResultsInputArchive.h"
@@ -143,12 +144,19 @@ int main(int argc, char** argv)
       tsIdx++;
       continue;
     }
-    auto result = reco.Run(*ts);
 
-    if (archive) {
-      auto storable = makeStorableRecoResults(*ts, result);
-      archive->put(storable);
+    try {
+      RecoResults result = reco.Run(*ts);
+      if (archive) {
+        auto storable = makeStorableRecoResults(*ts, result);
+        archive->put(storable);
+      }
+    }
+    catch (const ProcessingError& e) {
+      // TODO: Add flag if we want to abort on exception or continue with next timeslice
+      L_(error) << "Caught ProcessingError while processing timeslice " << tsIdx << ": " << e.what();
     }
+
     tsIdx++;
 
     if (num_ts > 0 && tsIdx >= num_ts) break;
diff --git a/reco/detectors/sts/CbmRecoSts.cxx b/reco/detectors/sts/CbmRecoSts.cxx
index 39d76808a29fcbaddf2fbc974aa1c4d9b26146b5..b0765929160bf145c17ba67f4f7bb5f68b527899 100644
--- a/reco/detectors/sts/CbmRecoSts.cxx
+++ b/reco/detectors/sts/CbmRecoSts.cxx
@@ -182,7 +182,11 @@ UInt_t CbmRecoSts::CreateModules()
         .stepSize = float(landauStepSize),
       },
   };
-  if (fUseGpuReco) fGpuReco.SetParameters(pars);
+  cbm::algo::sts::HitfinderChainPars chainPars {
+    .setup  = pars,
+    .memory = {},
+  };
+  if (fUseGpuReco) fGpuReco.SetParameters(chainPars);
 
   return fModules.size();
 }