Skip to content
Snippets Groups Projects
Commit 375d3fb7 authored by Felix Weiglhofer's avatar Felix Weiglhofer
Browse files

Bump xpu version.

parent 4f140bb2
No related branches found
No related tags found
1 merge request!1161algo: Update STS GPU Reco
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
namespace cbm::algo namespace cbm::algo
{ {
struct GPUReco { struct GPUReco : xpu::device_image {
}; };
} // namespace cbm::algo } // namespace cbm::algo
......
/* Copyright (C) 2022 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
SPDX-License-Identifier: GPL-3.0-only
Authors: Felix Weiglhofer [committer]*/
#ifndef CORE_COMPAT_XPU_LEGACY_H
#define CORE_COMPAT_XPU_LEGACY_H
#include <xpu/host.h>
namespace xpu {
inline constexpr auto host_to_device = xpu::h2d;
inline constexpr auto device_to_host = xpu::d2h;
template<typename T>
class hd_buffer {
public:
hd_buffer() = default;
hd_buffer(size_t size) : m_buffer(size, xpu::buf_io) {}
T *h() { return xpu::h_view(m_buffer).begin(); }
T *d() { return m_buffer.get(); }
xpu::buffer<T> &underlying() { return m_buffer; }
private:
xpu::buffer<T> m_buffer;
};
template<typename T>
class d_buffer {
public:
d_buffer() = default;
d_buffer(size_t size) : m_buffer(size, xpu::buf_device) {}
T *d() { return m_buffer.get(); }
xpu::buffer<T> &underlying() { return m_buffer; }
private:
xpu::buffer<T> m_buffer;
};
template<typename T>
void copy(hd_buffer<T> &buf, direction dir) {
static xpu::queue _Q;
_Q.copy(buf.underlying(), dir);
_Q.wait();
}
enum class side {
host,
device
};
template<typename T, side S>
struct cmem_io {
using type = T *;
};
template<typename T>
struct cmem_io<T, side::host> {
using type = hd_buffer<T>;
};
template<typename T, side S>
using cmem_io_t = typename cmem_io<T, S>::type;
template<typename T, side S>
struct cmem_device {
using type = T *;
};
template<typename T>
struct cmem_device<T, side::host> {
using type = d_buffer<T>;
};
template<typename T, side S>
using cmem_device_t = typename cmem_device<T, S>::type;
} // namespace xpu
#define XPU_BLOCK_SIZE_1D(...)
#define XPU_EXPORT_KERNEL(Image, Kernel, ...) XPU_EXPORT_KERNEL_II(Image, Kernel, xpu::no_smem, 64, ##__VA_ARGS__)
#define XPU_EXPORT_KERNEL_II(Image, Kernel, SMEM, BlockSize, ...) \
struct Kernel : xpu::kernel<Image> { \
using block_size = xpu::block_size<BlockSize>; \
using context = xpu::kernel_context<SMEM>; \
XPU_D void operator()(context &ctx, ##__VA_ARGS__); \
}
#define XPU_KERNEL(Kernel, smemIgnored, ...) \
XPU_EXPORT(Kernel); \
XPU_D void Kernel::operator()(context &ctx, ##__VA_ARGS__)
#endif
...@@ -15,11 +15,83 @@ ...@@ -15,11 +15,83 @@
using std::unique_ptr; using std::unique_ptr;
using std::vector; using std::vector;
XPU_BLOCK_SIZE_1D(cbm::algo::UnpackStsXpu::Unpack, 32); XPU_KERNEL(cbm::algo::UnpackK, xpu::no_smem, UnpackStsXpuPar* params, UnpackStsXpuElinkPar* elinkParams,
stsxyter::Message* content, uint64_t* msMessCount, uint64_t* msMessOffset, uint64_t* msStartTime,
uint32_t* msCompIdx, CbmStsDigi* digisOut, const uint64_t currentTsTime, int NElems)
{
int id = ctx.block_idx_x() * ctx.block_dim_x() + ctx.thread_idx_x();
if (id >= NElems || msMessCount[id] < 2) return; // exit if out of bounds or too few messages
UnpackStsXpuMonitorData monitor; //Monitor data, currently not stored. TO DO: Implement!
// --- Get message count and offset for this MS
const uint32_t numMessages = msMessCount[id];
const uint32_t messOffset = msMessOffset[id];
// --- Get starting position of this MS in message buffer
stsxyter::Message* message = &content[messOffset];
// --- Get starting position of this MS in digi buffer
CbmStsDigi* digis = &digisOut[messOffset];
// --- Get component index and unpack parameters of this MS
const uint32_t comp = msCompIdx[id];
const UnpackStsXpuPar& unpackPar = params[comp];
// --- Get starting position of elink parameters of this MS
UnpackStsXpuElinkPar* elinkPar = &elinkParams[unpackPar.fElinkOffset];
// --- Init counter for produced digis
uint64_t numDigis = 0;
// --- The first message in the MS is expected to be of type EPOCH and can be ignored.
if (message[0].GetMessType() != stsxyter::MessType::Epoch) {
monitor.fNumErrInvalidFirstMessage++;
msMessCount[id] = 0;
return;
}
// --- The second message must be of type ts_msb.
if (message[1].GetMessType() != stsxyter::MessType::TsMsb) {
monitor.fNumErrInvalidFirstMessage++;
msMessCount[id] = 0;
return;
}
// --- Current TS_MSB epoch cycle
uint64_t currentCycle = msStartTime[id] / UnpackStsXpu::fkCycleLength;
// --- Process first message (ts_msb)
uint32_t currentEpoch = 0; ///< Current epoch number within epoch cycle
uint64_t currentEpochTime = 0; ///< Current epoch time relative to timeslice in clock cycles
UnpackStsXpu::ProcessTsmsbMessage(message[1], currentEpoch, currentEpochTime, currentCycle, currentTsTime);
// --- Message loop
for (uint32_t messageNr = 2; messageNr < numMessages; messageNr++) {
// --- Action depending on message type
switch (message[messageNr].GetMessType()) {
case stsxyter::MessType::Hit: {
UnpackStsXpu::ProcessHitMessage(message[messageNr], digis, numDigis, unpackPar, elinkPar, monitor, currentEpochTime);
break;
}
case stsxyter::MessType::TsMsb: {
UnpackStsXpu::ProcessTsmsbMessage(message[messageNr], currentEpoch, currentEpochTime, currentCycle, currentTsTime);
break;
}
default: {
monitor.fNumNonHitOrTsbMessage++;
break;
}
}
}
// --- Store number of digis in buffer
msMessCount[id] = numDigis;
}
namespace cbm::algo namespace cbm::algo
{ {
// ---- Algorithm execution --------------------------------------------- // ---- Algorithm execution ---------------------------------------------
UnpackStsXpu::resultType UnpackStsXpu::operator()(const fles::Timeslice* ts, StsReadoutConfig& config) UnpackStsXpu::resultType UnpackStsXpu::operator()(const fles::Timeslice* ts, StsReadoutConfig& config)
{ {
...@@ -60,6 +132,8 @@ namespace cbm::algo ...@@ -60,6 +132,8 @@ namespace cbm::algo
result.second.fNumErrInvalidMsSize++; result.second.fNumErrInvalidMsSize++;
continue; continue;
} }
xpu::t_add_bytes(msDescr.size);
xpu::k_add_bytes<UnpackK>(msDescr.size);
msIdx.push_back(msDescr.idx); msIdx.push_back(msDescr.idx);
compIdx.push_back(comp); compIdx.push_back(comp);
messCount.push_back(numMessages); messCount.push_back(numMessages);
...@@ -101,7 +175,7 @@ namespace cbm::algo ...@@ -101,7 +175,7 @@ namespace cbm::algo
const uint64_t currentTsTime = ts->start_time() / epochLengthInNs; const uint64_t currentTsTime = ts->start_time() / epochLengthInNs;
// --- Do unpacking for each microslice // --- Do unpacking for each microslice
xpu::run_kernel<Unpack>(xpu::grid::n_threads(numMs), fParams.d(), fElinkParams.d(), tsContent.d(), msMessCount.d(), xpu::run_kernel<UnpackK>(xpu::n_threads(numMs), fParams.d(), fElinkParams.d(), tsContent.d(), msMessCount.d(),
msMessOffset.d(), msStartTime.d(), msCompIdx.d(), digisOut.d(), currentTsTime, numMs); msMessOffset.d(), msStartTime.d(), msCompIdx.d(), digisOut.d(), currentTsTime, numMs);
// --- Copy results back to host (only two buffers are modified on device) // --- Copy results back to host (only two buffers are modified on device)
...@@ -109,6 +183,7 @@ namespace cbm::algo ...@@ -109,6 +183,7 @@ namespace cbm::algo
xpu::copy(digisOut, xpu::device_to_host); xpu::copy(digisOut, xpu::device_to_host);
// --- Store digis TO DO: make Kernel for this, needs a way to sum arrays in XPU first // --- Store digis TO DO: make Kernel for this, needs a way to sum arrays in XPU first
xpu::push_timer("Store digis");
for (uint64_t i = 0; i < numMs; i++) { for (uint64_t i = 0; i < numMs; i++) {
uint64_t offset = msMessOffset.h()[i]; uint64_t offset = msMessOffset.h()[i];
uint64_t numDigis = msMessCount.h()[i]; uint64_t numDigis = msMessCount.h()[i];
...@@ -116,85 +191,11 @@ namespace cbm::algo ...@@ -116,85 +191,11 @@ namespace cbm::algo
result.first.push_back(digisOut.h()[offset + j]); result.first.push_back(digisOut.h()[offset + j]);
} }
} }
xpu::pop_timer();
return result; return result;
} }
XPU_KERNEL(UnpackStsXpu::Unpack, xpu::no_smem, UnpackStsXpuPar* params, UnpackStsXpuElinkPar* elinkParams,
stsxyter::Message* content, uint64_t* msMessCount, uint64_t* msMessOffset, uint64_t* msStartTime,
uint32_t* msCompIdx, CbmStsDigi* digisOut, const uint64_t currentTsTime, int NElems)
{
int id = xpu::block_idx::x() * xpu::block_dim::x() + xpu::thread_idx::x();
if (id >= NElems || msMessCount[id] < 2) return; // exit if out of bounds or too few messages
UnpackStsXpuMonitorData monitor; //Monitor data, currently not stored. TO DO: Implement!
// --- Get message count and offset for this MS
const uint32_t numMessages = msMessCount[id];
const uint32_t messOffset = msMessOffset[id];
// --- Get starting position of this MS in message buffer
stsxyter::Message* message = &content[messOffset];
// --- Get starting position of this MS in digi buffer
CbmStsDigi* digis = &digisOut[messOffset];
// --- Get component index and unpack parameters of this MS
const uint32_t comp = msCompIdx[id];
const UnpackStsXpuPar& unpackPar = params[comp];
// --- Get starting position of elink parameters of this MS
UnpackStsXpuElinkPar* elinkPar = &elinkParams[unpackPar.fElinkOffset];
// --- Init counter for produced digis
uint64_t numDigis = 0;
// --- The first message in the MS is expected to be of type EPOCH and can be ignored.
if (message[0].GetMessType() != stsxyter::MessType::Epoch) {
monitor.fNumErrInvalidFirstMessage++;
msMessCount[id] = 0;
return;
}
// --- The second message must be of type ts_msb.
if (message[1].GetMessType() != stsxyter::MessType::TsMsb) {
monitor.fNumErrInvalidFirstMessage++;
msMessCount[id] = 0;
return;
}
// --- Current TS_MSB epoch cycle
uint64_t currentCycle = msStartTime[id] / fkCycleLength;
// --- Process first message (ts_msb)
uint32_t currentEpoch = 0; ///< Current epoch number within epoch cycle
uint64_t currentEpochTime = 0; ///< Current epoch time relative to timeslice in clock cycles
ProcessTsmsbMessage(message[1], currentEpoch, currentEpochTime, currentCycle, currentTsTime);
// --- Message loop
for (uint32_t messageNr = 2; messageNr < numMessages; messageNr++) {
// --- Action depending on message type
switch (message[messageNr].GetMessType()) {
case stsxyter::MessType::Hit: {
ProcessHitMessage(message[messageNr], digis, numDigis, unpackPar, elinkPar, monitor, currentEpochTime);
break;
}
case stsxyter::MessType::TsMsb: {
ProcessTsmsbMessage(message[messageNr], currentEpoch, currentEpochTime, currentCycle, currentTsTime);
break;
}
default: {
monitor.fNumNonHitOrTsbMessage++;
break;
}
}
}
// --- Store number of digis in buffer
msMessCount[id] = numDigis;
}
// ----- Process hit message -------------------------------------------- // ----- Process hit message --------------------------------------------
XPU_D inline void UnpackStsXpu::ProcessHitMessage(const stsxyter::Message& message, CbmStsDigi* digis, XPU_D inline void UnpackStsXpu::ProcessHitMessage(const stsxyter::Message& message, CbmStsDigi* digis,
uint64_t& numDigis, const UnpackStsXpuPar& unpackPar, uint64_t& numDigis, const UnpackStsXpuPar& unpackPar,
......
...@@ -5,7 +5,10 @@ ...@@ -5,7 +5,10 @@
#ifndef CBM_ALGO_UNPACKSTSXPU_H #ifndef CBM_ALGO_UNPACKSTSXPU_H
#define CBM_ALGO_UNPACKSTSXPU_H 1 #define CBM_ALGO_UNPACKSTSXPU_H 1
#include "CbmStsDigi.h" #include "CbmStsDigi.h"
#include "gpu/DeviceImage.h"
#include "gpu/xpu_legacy.h"
#include "MicrosliceDescriptor.hpp" #include "MicrosliceDescriptor.hpp"
#include "Timeslice.hpp" #include "Timeslice.hpp"
...@@ -21,7 +24,6 @@ ...@@ -21,7 +24,6 @@
#include "StsReadoutConfig.h" #include "StsReadoutConfig.h"
#include "StsXyterMessage.h" #include "StsXyterMessage.h"
#include "gpu/DeviceImage.h"
namespace cbm::algo namespace cbm::algo
...@@ -73,6 +75,9 @@ namespace cbm::algo ...@@ -73,6 +75,9 @@ namespace cbm::algo
} }
}; };
XPU_EXPORT_KERNEL(GPUReco, UnpackK, UnpackStsXpuPar* params, UnpackStsXpuElinkPar* elinkParams,
stsxyter::Message* content, uint64_t* msMessCount, uint64_t* msMessOffset, uint64_t* msStartTime,
uint32_t* msCompIdx, CbmStsDigi* digisOut, const uint64_t currentTsTime, int NElems);
/** @class UnpackStsXpu /** @class UnpackStsXpu
** @author Pierre-Alain Loizeau <p.-a.loizeau@gsi.de> ** @author Pierre-Alain Loizeau <p.-a.loizeau@gsi.de>
...@@ -101,16 +106,6 @@ namespace cbm::algo ...@@ -101,16 +106,6 @@ namespace cbm::algo
**/ **/
resultType operator()(const fles::Timeslice* ts, StsReadoutConfig& config); resultType operator()(const fles::Timeslice* ts, StsReadoutConfig& config);
struct StsXpuUnpack {
}; // Identifier used by xpu to find where kernels are located
// Run unpacker for each microslice
XPU_EXPORT_KERNEL(GPUReco, Unpack, UnpackStsXpuPar* params, UnpackStsXpuElinkPar* elinkParams,
stsxyter::Message* content, uint64_t* msMessCount, uint64_t* msMessOffset, uint64_t* msStartTime,
uint32_t* msCompIdx, CbmStsDigi* digisOut, const uint64_t currentTsTime, int NElems);
//Stores parameter structs for all elinks //Stores parameter structs for all elinks
xpu::hd_buffer<UnpackStsXpuElinkPar> fElinkParams; xpu::hd_buffer<UnpackStsXpuElinkPar> fElinkParams;
...@@ -120,6 +115,8 @@ namespace cbm::algo ...@@ -120,6 +115,8 @@ namespace cbm::algo
private: // methods private: // methods
friend struct UnpackK;
/** @brief Process a hit message /** @brief Process a hit message
** @param message SMX message (32-bit word) ** @param message SMX message (32-bit word)
** @param digi buffer ** @param digi buffer
......
...@@ -11,5 +11,6 @@ jsroot ...@@ -11,5 +11,6 @@ jsroot
googletest googletest
yaml-cpp/ yaml-cpp/
xpu/ xpu/
xpu-dev
GSL GSL
bba bba
...@@ -52,7 +52,7 @@ if(DOWNLOAD_EXTERNALS) ...@@ -52,7 +52,7 @@ if(DOWNLOAD_EXTERNALS)
if (NOT ${CBM_XPU_DEV}) if (NOT ${CBM_XPU_DEV})
download_project_if_needed(PROJECT xpu download_project_if_needed(PROJECT xpu
GIT_REPOSITORY "https://github.com/fweig/xpu.git" GIT_REPOSITORY "https://github.com/fweig/xpu.git"
GIT_TAG "dd7d1d7e4b4d71079b9cfadbd662a264288308b0" # v0.7.6 GIT_TAG "d142d9ac7135488925b5b94f36da22ec55d9271c" # v0.9.3
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/xpu SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/xpu
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND "" BUILD_COMMAND ""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment