diff --git a/algo/CMakeLists.txt b/algo/CMakeLists.txt
index f4d65d357bbe06dc709c443248334153fda68b6c..34b6976ae8335544695d5665fcd58b913ba980cc 100644
--- a/algo/CMakeLists.txt
+++ b/algo/CMakeLists.txt
@@ -1,8 +1,13 @@
 add_subdirectory(data)
 add_subdirectory(test)
 
+set(DEVICE_SRCS
+  detectors/sts/UnpackStsXpu.cxx
+)
+
 # Create a library libCbmAlgo
 set(SRCS
+  ${DEVICE_SRCS}
   evbuild/EventBuilder.cxx
   trigger/TimeClusterTrigger.cxx
   evselector/DigiEventSelector.cxx
@@ -33,8 +38,9 @@ target_include_directories(Algo
          ${CMAKE_CURRENT_SOURCE_DIR}/detectors/trd
  )
 
-target_link_libraries(Algo PUBLIC OnlineData ROOT::GenVector INTERFACE FairLogger::FairLogger external::fles_ipc)
+target_link_libraries(Algo PUBLIC OnlineData ROOT::GenVector INTERFACE FairLogger::FairLogger external::fles_ipc xpu)
 target_compile_definitions(Algo PUBLIC NO_ROOT)
+xpu_attach(Algo ${DEVICE_SRCS})
 
 install(TARGETS Algo DESTINATION lib)
 
diff --git a/algo/detectors/sts/StsReadoutConfig.cxx b/algo/detectors/sts/StsReadoutConfig.cxx
index a1ec9b3f64f671ea741121543fc275d421174198..809a20fa45f36868d6553da16a70a7a55f415b49 100644
--- a/algo/detectors/sts/StsReadoutConfig.cxx
+++ b/algo/detectors/sts/StsReadoutConfig.cxx
@@ -48,6 +48,18 @@ namespace cbm::algo
   // ------------------------------------------------------------------------------------
 
 
+  // ---   Total number of elinks for STS   ---------------------------------------------
+  size_t StsReadoutConfig::GetNumElinks()
+  {
+    size_t result = 0;
+    for (auto& entry : fReadoutMap) {
+      result += entry.second.size();
+    }
+    return result;
+  }
+  // ------------------------------------------------------------------------------------
+
+
   // ---  Initialise the mapping structure   --------------------------------------------
   void StsReadoutConfig::Init()
   {
diff --git a/algo/detectors/sts/StsReadoutConfig.h b/algo/detectors/sts/StsReadoutConfig.h
index 03302aec8c5148e2af0dd58002c20f60c0de87d1..76dd79efb5d2c9a1fb9bc1593cef86ccf230de96 100644
--- a/algo/detectors/sts/StsReadoutConfig.h
+++ b/algo/detectors/sts/StsReadoutConfig.h
@@ -50,6 +50,12 @@ namespace cbm::algo
     size_t GetNumElinks(uint16_t equipmentId);
 
 
+    /** @brief Total number of elinks for STS
+     ** @return Number of elinks
+     **/
+    size_t GetNumElinks();
+
+
     /** @brief API: Mapping from component and elink to address and ASIC number
      ** @param equipId     Equipment identifier (component)
      ** @param elink       Elink number within component
diff --git a/algo/detectors/sts/UnpackStsXpu.cxx b/algo/detectors/sts/UnpackStsXpu.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..b07d6e188174b88a43afcf29e8af3c1bbc6ae8be
--- /dev/null
+++ b/algo/detectors/sts/UnpackStsXpu.cxx
@@ -0,0 +1,274 @@
+/* Copyright (C) 2023 Facility for Antiproton and Ion Research in Europe, Darmstadt
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Dominik Smith [committer] */
+
+#include "UnpackStsXpu.h"
+
+#include <cassert>
+#include <utility>
+#include <vector>
+
+#include <cmath>
+
+#include "StsXyterMessage.h"
+
+using std::unique_ptr;
+using std::vector;
+
+XPU_IMAGE(cbm::algo::UnpackStsXpu::StsXpuUnpack);  // Call exactly once per library
+
+XPU_BLOCK_SIZE_1D(cbm::algo::UnpackStsXpu::Unpack, 32);
+
+namespace cbm::algo
+{
+
+  // ----   Algorithm execution   ---------------------------------------------
+  UnpackStsXpu::resultType UnpackStsXpu::operator()(const fles::Timeslice* ts, StsReadoutConfig& config)
+  {
+    // --- Output data
+    resultType result = {};
+    std::cout << "Called UnpackStsXpu::operator()()." << std::endl;
+
+    // ---  Init local storage vectors
+    std::vector<stsxyter::Message> messages;  //storage of all messages
+    std::vector<uint64_t> messCount;          //storage of number of messages per MS
+    std::vector<uint64_t> messOffset;         //storage of MS offset in message buffer
+    std::vector<uint64_t> msIdx;              //storage of MS idx / start time
+    std::vector<uint32_t> compIdx;            //storage of comp idx for MS
+
+    auto equipIdsSts         = config.GetEquipmentIds();
+    const size_t numStsComps = equipIdsSts.size();
+
+    // --- Loop over components in unpacker config
+    for (size_t comp = 0; comp < numStsComps; comp++) {
+      auto equip = equipIdsSts[comp];
+      // --- Loop over components in timeslice
+      for (uint64_t tsComp = 0; tsComp < ts->num_components(); tsComp++) {
+
+        // --- Skip if TS component is not from STS (equipment IDs are non-unique across systems)
+        auto systemId = static_cast<fles::SubsystemIdentifier>(ts->descriptor(tsComp, 0).sys_id);
+        if (systemId != fles::SubsystemIdentifier::STS) continue;
+
+        if (equip == ts->descriptor(tsComp, 0).eq_id) {
+          const uint64_t numMsInComp = ts->num_microslices(tsComp);
+          for (uint64_t mslice = 0; mslice < numMsInComp; mslice++) {
+            const auto msDescr = ts->descriptor(tsComp, mslice);
+            if (msDescr.size % sizeof(stsxyter::Message) != 0) {
+              result.second.fNumErrInvalidMsSize++;
+              continue;
+            }
+            const uint32_t numMessages = msDescr.size / sizeof(stsxyter::Message);
+            if (numMessages < 2) {
+              result.second.fNumErrInvalidMsSize++;
+              continue;
+            }
+            msIdx.push_back(msDescr.idx);
+            compIdx.push_back(comp);
+            messCount.push_back(numMessages);
+            messOffset.push_back(messages.size());
+            const auto msContent = ts->content(tsComp, mslice);
+            auto mess            = reinterpret_cast<const stsxyter::Message*>(msContent);
+            messages.insert(messages.end(), mess, mess + numMessages);
+          }
+        }
+      }
+    }
+    // --- Total number of microslices
+    const uint64_t numMs = messCount.size();
+
+    // --- Store SMX messages to be unpacked (TS content)
+    xpu::d_buffer<stsxyter::Message> tsContent {messages.size()};
+    xpu::copy(tsContent.d(), messages.data(), messages.size());
+
+    // --- Store auxiliary host-device buffers
+    xpu::hd_buffer<uint64_t> msMessCount {numMs};  //modified by kernel, stores numDigis after execution
+    xpu::copy(msMessCount.d(), messCount.data(), messCount.size());
+
+    xpu::hd_buffer<uint64_t> msMessOffset {numMs};  //unchanged but needed on device and host
+    std::copy(messOffset.begin(), messOffset.end(), msMessOffset.h());
+    xpu::copy(msMessOffset, xpu::host_to_device);
+
+    // --- Store auxiliary device buffers
+    xpu::d_buffer<uint64_t> msStartTime {numMs};
+    xpu::d_buffer<uint32_t> msCompIdx {numMs};
+    xpu::copy(msStartTime.d(), msIdx.data(), msIdx.size());
+    xpu::copy(msCompIdx.d(), compIdx.data(), compIdx.size());
+
+    // --- Create output buffer with maximum possible size
+    xpu::hd_buffer<CbmStsDigi> digisOut {messages.size()};
+
+    // --- Current Timeslice start time in epoch units. Note that it is always a multiple of epochs
+    // --- and the epoch is a multiple of ns.
+    const uint64_t epochLengthInNs = fkEpochLength * fkClockCycleNom / fkClockCycleDen;
+    const uint64_t currentTsTime   = ts->start_time() / epochLengthInNs;
+
+    // --- Do unpacking for each microslice
+    xpu::run_kernel<Unpack>(xpu::grid::n_threads(numMs), fParams.d(), fElinkParams.d(), tsContent.d(), msMessCount.d(),
+                            msMessOffset.d(), msStartTime.d(), msCompIdx.d(), digisOut.d(), currentTsTime, numMs);
+
+    // --- Copy results back to host (only two buffers are modified on device)
+    xpu::copy(msMessCount, xpu::device_to_host);
+    xpu::copy(digisOut, xpu::device_to_host);
+
+    // --- Store digis  TO DO: make Kernel for this, needs a way to sum arrays in XPU first
+    for (uint64_t i = 0; i < numMs; i++) {
+      uint64_t offset   = msMessOffset.h()[i];
+      uint64_t numDigis = msMessCount.h()[i];
+      for (uint64_t j = 0; j < numDigis; j++) {
+        result.first.push_back(digisOut.h()[offset + j]);
+      }
+    }
+
+    return result;
+  }
+
+  XPU_KERNEL(UnpackStsXpu::Unpack, xpu::no_smem, UnpackStsXpuPar* params, UnpackStsXpuElinkPar* elinkParams,
+             stsxyter::Message* content, uint64_t* msMessCount, uint64_t* msMessOffset, uint64_t* msStartTime,
+             uint32_t* msCompIdx, CbmStsDigi* digisOut, const uint64_t currentTsTime, int NElems)
+  {
+    int id = xpu::block_idx::x() * xpu::block_dim::x() + xpu::thread_idx::x();
+    if (id >= NElems || msMessCount[id] < 2) return;  // exit if out of bounds or too few messages
+
+    UnpackStsXpuMonitorData monitor;  //Monitor data, currently not stored. TO DO: Implement!
+
+    // --- Get message count and offset for this MS
+    const uint32_t numMessages = msMessCount[id];
+    const uint32_t messOffset  = msMessOffset[id];
+
+    // --- Get starting position of this MS in message buffer
+    stsxyter::Message* message = &content[messOffset];
+
+    // --- Get starting position of this MS in digi buffer
+    CbmStsDigi* digis = &digisOut[messOffset];
+
+    // --- Get component index and unpack parameters of this MS
+    const uint32_t comp              = msCompIdx[id];
+    const UnpackStsXpuPar& unpackPar = params[comp];
+
+    // --- Get starting position of elink parameters of this MS
+    UnpackStsXpuElinkPar* elinkPar = &elinkParams[unpackPar.fElinkOffset];
+
+    // --- Init counter for produced digis
+    uint64_t numDigis = 0;
+
+    // --- The first message in the MS is expected to be of type EPOCH and can be ignored.
+    if (message[0].GetMessType() != stsxyter::MessType::Epoch) {
+      monitor.fNumErrInvalidFirstMessage++;
+      msMessCount[id] = 0;
+      return;
+    }
+
+    // --- The second message must be of type ts_msb.
+    if (message[1].GetMessType() != stsxyter::MessType::TsMsb) {
+      monitor.fNumErrInvalidFirstMessage++;
+      msMessCount[id] = 0;
+      return;
+    }
+
+    // --- Current TS_MSB epoch cycle
+    uint64_t currentCycle = msStartTime[id] / fkCycleLength;
+
+    // --- Process first message (ts_msb)
+    uint32_t currentEpoch     = 0;  ///< Current epoch number within epoch cycle
+    uint64_t currentEpochTime = 0;  ///< Current epoch time relative to timeslice in clock cycles
+    ProcessTsmsbMessage(message[1], currentEpoch, currentEpochTime, currentCycle, currentTsTime);
+
+    // --- Message loop
+    for (uint32_t messageNr = 2; messageNr < numMessages; messageNr++) {
+
+      // --- Action depending on message type
+      switch (message[messageNr].GetMessType()) {
+        case stsxyter::MessType::Hit: {
+          ProcessHitMessage(message[messageNr], digis, numDigis, unpackPar, elinkPar, monitor, currentEpochTime);
+          break;
+        }
+        case stsxyter::MessType::TsMsb: {
+          ProcessTsmsbMessage(message[messageNr], currentEpoch, currentEpochTime, currentCycle, currentTsTime);
+          break;
+        }
+        default: {
+          monitor.fNumNonHitOrTsbMessage++;
+          break;
+        }
+      }
+    }
+    // --- Store number of digis in buffer
+    msMessCount[id] = numDigis;
+  }
+
+
+  // -----   Process hit message   --------------------------------------------
+  XPU_D inline void UnpackStsXpu::ProcessHitMessage(const stsxyter::Message& message, CbmStsDigi* digis,
+                                                    uint64_t& numDigis, const UnpackStsXpuPar& unpackPar,
+                                                    UnpackStsXpuElinkPar* elinkParams, UnpackStsXpuMonitorData& monitor,
+                                                    const uint64_t currentEpochTime)
+  {
+    // --- Check eLink and get parameters
+    uint16_t elink = message.GetLinkIndexHitBinning();
+    if (elink >= unpackPar.fNumElinks) {
+      monitor.fNumErrElinkOutOfRange++;
+      return;
+    }
+    const UnpackStsXpuElinkPar& elinkPar = elinkParams[elink];
+    uint32_t asicNr                      = elinkPar.fAsicNr;
+
+    // --- Hardware-to-software address
+    uint32_t numChansPerModule = unpackPar.fNumAsicsPerModule * unpackPar.fNumChansPerAsic;
+    uint32_t address           = elinkPar.fAddress;
+    uint32_t channel           = 0;
+    if (asicNr < unpackPar.fNumAsicsPerModule / 2) {  // front side (n side)
+      channel = message.GetHitChannel() + unpackPar.fNumChansPerAsic * asicNr;
+    }
+    else {  // back side (p side)
+      channel = numChansPerModule - message.GetHitChannel()
+                - unpackPar.fNumChansPerAsic * (asicNr - unpackPar.fNumAsicsPerModule / 2) - 1;
+    }
+
+    // --- Expand time stamp to time within timeslice (in clock cycle)
+    uint64_t messageTime = message.GetHitTimeBinning() + currentEpochTime;
+
+    // --- Convert time stamp from clock cycles to ns. Round to nearest full ns.
+    messageTime = (messageTime * fkClockCycleNom + fkClockCycleDen / 2) / fkClockCycleDen;
+
+    // --- Correct ASIC-wise offsets
+    messageTime -= elinkPar.fTimeOffset;
+    // --- TODO: Add walk correction (depends on ADC)
+
+    // --- Charge
+    double charge = elinkPar.fAdcOffset + (message.GetHitAdc() - 1) * elinkPar.fAdcGain;
+
+    // --- Create output digi
+    digis[numDigis] = CbmStsDigi(address, channel, messageTime, charge);
+    numDigis++;
+  }
+  // --------------------------------------------------------------------------
+
+
+  // -----   Process an epoch (TS_MSB) message   ------------------------------
+  XPU_D inline void UnpackStsXpu::ProcessTsmsbMessage(const stsxyter::Message& message, uint32_t& currentEpoch,
+                                                      uint64_t& currentEpochTime, uint64_t& currentCycle,
+                                                      const uint64_t currentTsTime)
+  {
+    // The compression of time is based on the hierarchy epoch cycle - epoch - message time.
+    // Cycles are counted from the start of Unix time and are multiples of an epoch (ts_msb).
+    // The epoch number is counted within each cycle. The time in the hit message is expressed
+    // in units of the readout clock cycle relative to the current epoch.
+    // The ts_msb message indicates the start of a new epoch. Its basic information is the epoch
+    // number within the current cycle. A cycle wrap resets the epoch number to zero, so it is
+    // indicated by the epoch number being smaller than the previous one (epoch messages are
+    // seemingly not consecutively in the data stream, but only if there are hit messages in between).
+    auto epoch = message.GetTsMsbValBinning();
+
+    // --- Cycle wrap
+    if (epoch < currentEpoch) currentCycle++;
+
+    // --- Update current epoch counter
+    currentEpoch = epoch;
+
+    // --- Calculate epoch time in clocks cycles relative to timeslice start time
+    currentEpochTime = (currentCycle * fkEpochsPerCycle + epoch - currentTsTime) * fkEpochLength;
+  }
+  // --------------------------------------------------------------------------
+
+
+} /* namespace cbm::algo */
diff --git a/algo/detectors/sts/UnpackStsXpu.h b/algo/detectors/sts/UnpackStsXpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..649aa14761e11dbad4b69adf03e46274b712bfbd
--- /dev/null
+++ b/algo/detectors/sts/UnpackStsXpu.h
@@ -0,0 +1,165 @@
+/* Copyright (C) 2023 Facility for Antiproton and Ion Research in Europe, Darmstadt
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Dominik Smith [committer] */
+
+#ifndef CBM_ALGO_UNPACKSTSXPU_H
+#define CBM_ALGO_UNPACKSTSXPU_H 1
+
+
+#include "CbmStsDigi.h"
+
+#include "MicrosliceDescriptor.hpp"
+#include "Timeslice.hpp"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include <xpu/device.h>
+#include <xpu/host.h>
+
+#include "StsReadoutConfig.h"
+#include "StsXyterMessage.h"
+
+
+namespace cbm::algo
+{
+
+  /** @struct UnpackStsXpuElinkPar
+   ** @author Volker Friese <v.friese@gsi.de>
+   ** @since 25 November 2021
+   ** @brief STS Unpacking parameters for one eLink / ASIC
+   **/
+  struct UnpackStsXpuElinkPar {
+    int32_t fAddress     = 0;   ///< CbmStsAddress for the connected module
+    uint32_t fAsicNr     = 0;   ///< Number of connected ASIC within the module
+    uint64_t fTimeOffset = 0.;  ///< Time calibration parameter
+    double fAdcOffset    = 0.;  ///< Charge calibration parameter
+    double fAdcGain      = 0.;  ///< Charge calibration parameter
+  };
+
+
+  /** @struct UnpackStsXpuPar
+   ** @author Volker Friese <v.friese@gsi.de>
+   ** @since 25 November 2021
+   ** @brief Parameters required for the STS unpacking (specific to one component)
+   **/
+  struct UnpackStsXpuPar {
+    uint32_t fNumChansPerAsic   = 0;  ///< Number of channels per ASIC
+    uint32_t fNumAsicsPerModule = 0;  ///< Number of ASICS per module
+    uint32_t fNumElinks         = 0;  ///< Number of elinks for this component
+    uint32_t fElinkOffset       = 0;  ///< Elink index offset for this component
+  };
+
+
+  /** @struct UnpackStsXpuMoni
+   ** @author Volker Friese <v.friese@gsi.de>
+   ** @since 2 December 2021
+   ** @brief Monitoring data for STS unpacking
+   **/
+  struct UnpackStsXpuMonitorData {
+    uint32_t fNumNonHitOrTsbMessage     = 0;
+    uint32_t fNumErrElinkOutOfRange     = 0;  ///< Elink not contained in parameters
+    uint32_t fNumErrInvalidFirstMessage = 0;  ///< First message is not TS_MSB or second is not EPOCH
+    uint32_t fNumErrInvalidMsSize       = 0;  ///< Microslice size is not multiple of message size
+    uint32_t fNumErrTimestampOverflow   = 0;  ///< Overflow in 64 bit time stamp
+    bool HasErrors()
+    {
+      uint32_t numErrors = fNumNonHitOrTsbMessage + fNumErrElinkOutOfRange + fNumErrInvalidFirstMessage
+                           + fNumErrInvalidMsSize + fNumErrTimestampOverflow;
+      return (numErrors > 0 ? true : false);
+    }
+  };
+
+
+  /** @class UnpackStsXpu
+   ** @author Pierre-Alain Loizeau <p.-a.loizeau@gsi.de>
+   ** @author Volker Friese <v.friese@gsi.de>
+   ** @since 25 November 2021
+   ** @brief Unpack algorithm for STS
+   **/
+  class UnpackStsXpu {
+
+  public:
+    typedef std::pair<std::vector<CbmStsDigi>, UnpackStsXpuMonitorData> resultType;
+
+    /** @brief Default constructor **/
+    UnpackStsXpu() {};
+
+
+    /** @brief Destructor **/
+    ~UnpackStsXpu() {};
+
+
+    /** @brief Algorithm execution
+     ** @return STS digi data
+     ** @param  ts      Timselice payload
+     ** @param  config  Configuration data
+     ** @return STS digi data
+     **/
+    resultType operator()(const fles::Timeslice* ts, StsReadoutConfig& config);
+
+
+    struct StsXpuUnpack {
+    };  // Identifier used by xpu to find where kernels are located
+
+
+    // Run unpacker for each microslice
+    XPU_EXPORT_KERNEL(StsXpuUnpack, Unpack, UnpackStsXpuPar* params, UnpackStsXpuElinkPar* elinkParams,
+                      stsxyter::Message* content, uint64_t* msMessCount, uint64_t* msMessOffset, uint64_t* msStartTime,
+                      uint32_t* msCompIdx, CbmStsDigi* digisOut, const uint64_t currentTsTime, int NElems);
+
+    //Stores parameter structs for all elinks
+    xpu::hd_buffer<UnpackStsXpuElinkPar> fElinkParams;
+
+
+    //Stores parameter structs for all components
+    xpu::hd_buffer<UnpackStsXpuPar> fParams;
+
+
+  private:  // methods
+    /** @brief Process a hit message
+     ** @param message SMX message (32-bit word)
+     ** @param digi buffer
+     ** @param digi counter
+     ** @param parameters for component
+     ** @param parameter buffer for elinks
+     ** @param reference to monitor object
+     ** @param current epoch number within epoch cycle
+     **/
+    XPU_D static void ProcessHitMessage(const stsxyter::Message& message, CbmStsDigi* digis, uint64_t& numDigis,
+                                        const UnpackStsXpuPar& unpackPar, UnpackStsXpuElinkPar* elinkParams,
+                                        UnpackStsXpuMonitorData& monitor, const uint64_t currentEpochTime);
+
+    /** @brief Process an epoch message (TS_MSB)
+     ** @param message SMX message (32-bit word)
+     ** @param current epoch number within epoch cycle
+     ** @param current epoch time relative to timeslice in clock cycles
+     ** @param current TS_MSB epoch cycle
+     **/
+    XPU_D static void ProcessTsmsbMessage(const stsxyter::Message& message, uint32_t& currentEpoch,
+                                          uint64_t& currentEpochTime, uint64_t& currentCycle,
+                                          const uint64_t currentTsTime);
+
+  private:  // members
+            ///// To do: Make these available on device somehow
+    /** Number of TS_MSB epochs per cycle **/
+    static constexpr uint64_t fkEpochsPerCycle = stsxyter::kuTsMsbNbTsBinsBinning;
+
+    /** Length of TS_MSB epoch in clock cycles **/
+    static constexpr uint64_t fkEpochLength = stsxyter::kuHitNbTsBinsBinning;
+
+    /** Clock cycle nominator [ns] and denominator. The clock cycle in ns is nominator / denominator. **/
+    static constexpr uint32_t fkClockCycleNom = stsxyter::kulClockCycleNom;
+    static constexpr uint32_t fkClockCycleDen = stsxyter::kulClockCycleDen;
+
+    /** Epoch cycle length in ns **/
+    static constexpr uint64_t fkCycleLength = (fkEpochsPerCycle * fkEpochLength * fkClockCycleNom) / fkClockCycleDen;
+  };
+
+
+} /* namespace cbm::algo */
+
+#endif /* CBM_ALGO_UNPACKSTSXPU_H */
diff --git a/reco/tasks/CMakeLists.txt b/reco/tasks/CMakeLists.txt
index 8e4478fdd84d1336ac03a9ccdaaac562de1f4d08..d71ec0eaca2b5d84f41f801ead71f7481db94a7b 100644
--- a/reco/tasks/CMakeLists.txt
+++ b/reco/tasks/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SRCS
   CbmTaskTriggerDigi.cxx
   CbmTaskTofHitFinder.cxx
   CbmTaskUnpack.cxx
+  CbmTaskUnpackXpu.cxx
 )
 
 
diff --git a/reco/tasks/CbmRecoTasksLinkDef.h b/reco/tasks/CbmRecoTasksLinkDef.h
index 980fb920edc0e77f102f7470cd8bbe2ff346d3d9..590d0cfb5f33f5ba4282f9b650b28cc37fb2e3dd 100644
--- a/reco/tasks/CbmRecoTasksLinkDef.h
+++ b/reco/tasks/CbmRecoTasksLinkDef.h
@@ -21,6 +21,7 @@
 #pragma link C++ class CbmTaskTofHitFinder + ;
 #pragma link C++ class CbmTaskTriggerDigi + ;
 #pragma link C++ class CbmTaskUnpack + ;
+#pragma link C++ class CbmTaskUnpackXpu + ;
 
 
 #endif /* __CINT__ */
diff --git a/reco/tasks/CbmTaskUnpackXpu.cxx b/reco/tasks/CbmTaskUnpackXpu.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..f095169d69216eeceec9baf41949dd89f47328cb
--- /dev/null
+++ b/reco/tasks/CbmTaskUnpackXpu.cxx
@@ -0,0 +1,187 @@
+/* Copyright (C) 2023 Facility for Antiproton and Ion Research in Europe, Darmstadt
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Dominik Smith [committer] */
+
+
+#include "CbmTaskUnpackXpu.h"
+
+#include "CbmDefs.h"
+#include "CbmDigiBranchBase.h"
+#include "CbmDigiEvent.h"
+#include "CbmDigiManager.h"
+#include "CbmDigiTimeslice.h"
+#include "CbmSourceTs.h"
+
+#include "MicrosliceDescriptor.hpp"
+
+#include <FairRunOnline.h>
+#include <Logger.h>
+
+#include <TStopwatch.h>
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#ifdef WITH_EXECUTION
+#include <execution>
+#endif
+#include <iomanip>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+#include <xpu/host.h>
+
+using namespace std;
+using cbm::algo::UnpackStsXpuElinkPar;
+using cbm::algo::UnpackStsXpuPar;
+
+// -----   Constructor   -----------------------------------------------------
+CbmTaskUnpackXpu::CbmTaskUnpackXpu() : FairTask("Unpack") {}
+// ---------------------------------------------------------------------------
+
+
+// -----   Destructor   ------------------------------------------------------
+CbmTaskUnpackXpu::~CbmTaskUnpackXpu()
+{
+  if (fTimeslice) delete fTimeslice;
+}
+// ---------------------------------------------------------------------------
+
+
+// -----   Execution   -------------------------------------------------------
+void CbmTaskUnpackXpu::Exec(Option_t*)
+{
+  // --- Reset output branch (CbmDigiTimeslice)
+  fTimeslice->Clear();
+
+  // --- Get FLES timeslice
+  assert(fSource);
+  fles::Timeslice* timeslice = fSource->GetTimeslice();
+  assert(timeslice);
+
+  // --- Timer and counters
+  TStopwatch timer;
+  timer.Start();
+
+  //Run STS unpacker and store result
+  auto resultSts = fAlgoStsXpu(timeslice, fStsConfig);
+  fTimeslice->fData.fSts.fDigis.insert(fTimeslice->fData.fSts.fDigis.end(), resultSts.first.begin(),
+                                       resultSts.first.end());
+
+  // --- Sorting of output digis. Is required by both digi trigger and event builder.
+#ifdef WITH_EXECUTION
+  std::sort(std::execution::par_unseq, fTimeslice->fData.fSts.fDigis.begin(), fTimeslice->fData.fSts.fDigis.end(),
+            [](CbmStsDigi digi1, CbmStsDigi digi2) { return digi1.GetTime() < digi2.GetTime(); });
+#else
+  std::sort(fTimeslice->fData.fSts.fDigis.begin(), fTimeslice->fData.fSts.fDigis.end(),
+            [](CbmStsDigi digi1, CbmStsDigi digi2) { return digi1.GetTime() < digi2.GetTime(); });
+#endif
+
+  // --- Timeslice log
+  timer.Stop();
+  stringstream logOut;
+  logOut << setw(15) << left << GetName() << " [";
+  logOut << fixed << setw(8) << setprecision(1) << right << timer.RealTime() * 1000. << " ms] ";
+  logOut << "TS " << fNumTs << " (index " << timeslice->index() << ")";
+  logOut << ", digis " << fTimeslice->fData.fSts.fDigis.size();
+  LOG(info) << logOut.str();
+
+  // --- Run statistics
+  fNumTs++;
+  // TO DO: implement these
+  //fNumMs += numMs;
+  //fNumBytes += numBytes;
+  //fNumDigis += numDigis;
+  fTime += timer.RealTime();
+}
+
+
+// -----   End-of-run action   ------------------------------------------------
+void CbmTaskUnpackXpu::Finish()
+{
+  double timePerTs = 1000. * fTime / double(fNumTs);  // in ms
+  double rate      = fNumBytes / 1.e6 / fTime;        // in MB/s
+  LOG(info) << "=====================================";
+  LOG(info) << GetName() << ": Run summary";
+  LOG(info) << "Timeslices     : " << fNumTs;
+  LOG(info) << "Microslices    : " << fNumMs;
+  LOG(info) << "Digis          : " << fNumDigis;
+  LOG(info) << "Av. input rate : " << fixed << setprecision(2) << rate << " MB/s";
+  LOG(info) << "Time / TS      : " << fixed << setprecision(2) << timePerTs << " ms";
+  LOG(info) << "=====================================";
+}
+// ----------------------------------------------------------------------------
+
+
+// -----   Initialisation   ---------------------------------------------------
+InitStatus CbmTaskUnpackXpu::Init()
+{
+  // FIXME: this has to be called only once
+  // and should happen during initialization not in reco loop
+  setenv("XPU_PROFILE", "1", 1);
+  xpu::initialize();
+
+  LOG(info) << "==================================================";
+  LOG(info) << GetName() << ": Initialising...";
+
+  // --- Get source instance
+  fSource = dynamic_cast<CbmSourceTs*>(FairRunOnline::Instance()->GetSource());
+  if (fSource == nullptr) {
+    LOG(error) << GetName() << ": No valid source class registered!";
+    return kFATAL;
+  }
+  LOG(info) << "--- Found CbmSourceTs instance";
+
+  // --- Get FairRootManager instance
+  FairRootManager* ioman = FairRootManager::Instance();
+  assert(ioman);
+
+  // --- Register output array (CbmDigiTimeslice)
+  if (ioman->GetObject("DigiTimeslice")) {
+    LOG(fatal) << GetName() << ": Branch DigiTimeslice already exists!";
+    return kFATAL;
+  }
+  fTimeslice = new CbmDigiTimeslice();
+  ioman->RegisterAny("DigiTimeslice.", fTimeslice, IsOutputBranchPersistent("DigiTimeslice."));
+  LOG(info) << "--- Registered branch DigiTimeslice.";
+
+  // Initialize parameter buffers for STS
+  auto equipIdsSts          = fStsConfig.GetEquipmentIds();
+  const size_t numStsElinks = fStsConfig.GetNumElinks();
+  const size_t numStsComps  = equipIdsSts.size();
+  fAlgoStsXpu.fParams       = xpu::hd_buffer<UnpackStsXpuPar>(numStsComps);
+  fAlgoStsXpu.fElinkParams  = xpu::hd_buffer<UnpackStsXpuElinkPar>(numStsElinks);
+
+  // --- Common parameters for all components for STS
+  uint32_t numChansPerAsicSts   = 128;  // R/O channels per ASIC for STS
+  uint32_t numAsicsPerModuleSts = 16;   // Number of ASICs per module for STS
+
+  // Fill parameter buffers for STS
+  for (size_t comp = 0; comp < numStsComps; comp++) {
+    auto equip                      = equipIdsSts[comp];
+    auto params                     = fAlgoStsXpu.fParams.h();
+    auto numElinks                  = fStsConfig.GetNumElinks(equip);
+    params[comp].fNumElinks         = numElinks;
+    params[comp].fNumChansPerAsic   = numChansPerAsicSts;
+    params[comp].fNumAsicsPerModule = numAsicsPerModuleSts;
+    params[comp].fElinkOffset       = (comp == 0) ? 0 : params[comp - 1].fElinkOffset + params[comp - 1].fNumElinks;
+    for (size_t elink = 0; elink < numElinks; elink++) {
+      UnpackStsXpuElinkPar& elinkPar = fAlgoStsXpu.fElinkParams.h()[params[comp].fElinkOffset + elink];
+      auto mapEntry                  = fStsConfig.Map(equip, elink);
+      elinkPar.fAddress              = mapEntry.first;   // Module address for this elink
+      elinkPar.fAsicNr               = mapEntry.second;  // ASIC number within module
+      elinkPar.fTimeOffset           = 0.;
+      elinkPar.fAdcOffset            = 1.;
+      elinkPar.fAdcGain              = 1.;
+    }
+    LOG(info) << "--- Configured equipment " << equip << " with " << numElinks << " elinks";
+  }
+  xpu::copy(fAlgoStsXpu.fParams, xpu::host_to_device);
+  xpu::copy(fAlgoStsXpu.fElinkParams, xpu::host_to_device);
+
+  return kSUCCESS;
+}
+// ----------------------------------------------------------------------------
+
+ClassImp(CbmTaskUnpackXpu)
diff --git a/reco/tasks/CbmTaskUnpackXpu.h b/reco/tasks/CbmTaskUnpackXpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..fe51c61dae41a98c394efbecc0df1a3bfbc9f7b2
--- /dev/null
+++ b/reco/tasks/CbmTaskUnpackXpu.h
@@ -0,0 +1,85 @@
+/* Copyright (C) 2023 Facility for Antiproton and Ion Research in Europe, Darmstadt
+   SPDX-License-Identifier: GPL-3.0-only
+   Authors: Dominik Smith [committer] */
+
+
+#ifndef CBMTASKUNPACKXPU_H
+#define CBMTASKUNPACKXPU_H 1
+
+#include "CbmDefs.h"
+#include "CbmDigiTimeslice.h"
+
+#include <FairTask.h>
+
+#include <sstream>
+#include <vector>
+
+#include "EventBuilder.h"
+#include "StsReadoutConfig.h"
+#include "UnpackStsXpu.h"
+
+
+class CbmDigiManager;
+class CbmSourceTs;
+
+
+/** @class CbmTaskUnpackXpu
+ ** @brief Task class for associating digis to events
+ ** @author Volker Friese <v.friese@gsi.de>
+ ** @since 15.11.2021
+ **
+ ** Creates objects of class CbmDigiEvent and fills them with digi objects,
+ ** using the algorithm EventBuilder.
+ **
+ ** TOFO: The current implementation is for STS only and with a dummy trigger list
+ ** just to establish the framework integration of algorithm and data interfaces.
+ **/
+class CbmTaskUnpackXpu : public FairTask {
+
+
+public:
+  /** @brief Constructor **/
+  CbmTaskUnpackXpu();
+
+
+  /** @brief Copy constructor (disabled) **/
+  CbmTaskUnpackXpu(const CbmTaskUnpackXpu&) = delete;
+
+
+  /** @brief Destructor **/
+  virtual ~CbmTaskUnpackXpu();
+
+
+  /** @brief Task execution **/
+  virtual void Exec(Option_t* opt);
+
+
+  /** @brief Finish timeslice **/
+  virtual void Finish();
+
+
+  /** @brief Assignment operator (disabled) **/
+  CbmTaskUnpackXpu& operator=(const CbmTaskUnpackXpu&) = delete;
+
+
+private:  // methods
+  /** @brief Task initialisation **/
+  virtual InitStatus Init();
+
+private:  // members
+  CbmSourceTs* fSource = nullptr;
+
+  cbm::algo::UnpackStsXpu fAlgoStsXpu;
+  cbm::algo::StsReadoutConfig fStsConfig {};
+
+  size_t fNumTs                = 0;
+  size_t fNumMs                = 0;
+  size_t fNumBytes             = 0;
+  size_t fNumDigis             = 0;
+  double fTime                 = 0.;
+  CbmDigiTimeslice* fTimeslice = nullptr;  ///< Output data
+
+  ClassDef(CbmTaskUnpackXpu, 1);
+};
+
+#endif /* CBMTASKUNPACKXPU_H */