Skip to content
Snippets Groups Projects
Commit 6dccee20 authored by Felix Weiglhofer's avatar Felix Weiglhofer
Browse files

Bump xpu version.

parent e017f797
No related branches found
No related tags found
No related merge requests found
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
namespace cbm::algo namespace cbm::algo
{ {
struct GPUReco { struct GPUReco : xpu::device_image {
}; };
} // namespace cbm::algo } // namespace cbm::algo
......
/* Copyright (C) 2022 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
SPDX-License-Identifier: GPL-3.0-only
Authors: Felix Weiglhofer [committer]*/
#ifndef CORE_COMPAT_XPU_LEGACY_H
#define CORE_COMPAT_XPU_LEGACY_H
#include <xpu/host.h>
namespace xpu {
inline constexpr auto host_to_device = xpu::h2d;
inline constexpr auto device_to_host = xpu::d2h;
template<typename T>
class hd_buffer {
public:
hd_buffer() = default;
hd_buffer(size_t size) : m_buffer(size, xpu::buf_io) {}
T *h() { return xpu::h_view(m_buffer).begin(); }
T *d() { return m_buffer.get(); }
xpu::buffer<T> &underlying() { return m_buffer; }
private:
xpu::buffer<T> m_buffer;
};
template<typename T>
class d_buffer {
public:
d_buffer() = default;
d_buffer(size_t size) : m_buffer(size, xpu::buf_device) {}
T *d() { return m_buffer.get(); }
xpu::buffer<T> &underlying() { return m_buffer; }
private:
xpu::buffer<T> m_buffer;
};
template<typename T>
void copy(hd_buffer<T> &buf, direction dir) {
static xpu::queue _Q;
_Q.copy(buf.underlying(), dir);
_Q.wait();
}
enum class side {
host,
device
};
template<typename T, side S>
struct cmem_io {
using type = T *;
};
template<typename T>
struct cmem_io<T, side::host> {
using type = hd_buffer<T>;
};
template<typename T, side S>
using cmem_io_t = typename cmem_io<T, S>::type;
template<typename T, side S>
struct cmem_device {
using type = T *;
};
template<typename T>
struct cmem_device<T, side::host> {
using type = d_buffer<T>;
};
template<typename T, side S>
using cmem_device_t = typename cmem_device<T, S>::type;
} // namespace xpu
#define XPU_BLOCK_SIZE_1D(...)
#define XPU_EXPORT_KERNEL(Image, Kernel, ...) XPU_EXPORT_KERNEL_II(Image, Kernel, xpu::no_smem, 64, ##__VA_ARGS__)
#define XPU_EXPORT_KERNEL_II(Image, Kernel, SMEM, BlockSize, ...) \
struct Kernel : xpu::kernel<Image> { \
using block_size = xpu::block_size<BlockSize>; \
using context = xpu::kernel_context<SMEM>; \
XPU_D void operator()(context &ctx, ##__VA_ARGS__); \
}
#define XPU_KERNEL(Kernel, smemIgnored, ...) \
XPU_EXPORT(Kernel); \
XPU_D void Kernel::operator()(context &ctx, ##__VA_ARGS__)
#endif
...@@ -15,11 +15,83 @@ ...@@ -15,11 +15,83 @@
using std::unique_ptr; using std::unique_ptr;
using std::vector; using std::vector;
XPU_BLOCK_SIZE_1D(cbm::algo::UnpackStsXpu::Unpack, 32); XPU_KERNEL(cbm::algo::UnpackK, xpu::no_smem, UnpackStsXpuPar* params, UnpackStsXpuElinkPar* elinkParams,
stsxyter::Message* content, uint64_t* msMessCount, uint64_t* msMessOffset, uint64_t* msStartTime,
uint32_t* msCompIdx, CbmStsDigi* digisOut, const uint64_t currentTsTime, int NElems)
{
int id = ctx.block_idx_x() * ctx.block_dim_x() + ctx.thread_idx_x();
if (id >= NElems || msMessCount[id] < 2) return; // exit if out of bounds or too few messages
UnpackStsXpuMonitorData monitor; //Monitor data, currently not stored. TO DO: Implement!
// --- Get message count and offset for this MS
const uint32_t numMessages = msMessCount[id];
const uint32_t messOffset = msMessOffset[id];
// --- Get starting position of this MS in message buffer
stsxyter::Message* message = &content[messOffset];
// --- Get starting position of this MS in digi buffer
CbmStsDigi* digis = &digisOut[messOffset];
// --- Get component index and unpack parameters of this MS
const uint32_t comp = msCompIdx[id];
const UnpackStsXpuPar& unpackPar = params[comp];
// --- Get starting position of elink parameters of this MS
UnpackStsXpuElinkPar* elinkPar = &elinkParams[unpackPar.fElinkOffset];
// --- Init counter for produced digis
uint64_t numDigis = 0;
// --- The first message in the MS is expected to be of type EPOCH and can be ignored.
if (message[0].GetMessType() != stsxyter::MessType::Epoch) {
monitor.fNumErrInvalidFirstMessage++;
msMessCount[id] = 0;
return;
}
// --- The second message must be of type ts_msb.
if (message[1].GetMessType() != stsxyter::MessType::TsMsb) {
monitor.fNumErrInvalidFirstMessage++;
msMessCount[id] = 0;
return;
}
// --- Current TS_MSB epoch cycle
uint64_t currentCycle = msStartTime[id] / UnpackStsXpu::fkCycleLength;
// --- Process first message (ts_msb)
uint32_t currentEpoch = 0; ///< Current epoch number within epoch cycle
uint64_t currentEpochTime = 0; ///< Current epoch time relative to timeslice in clock cycles
UnpackStsXpu::ProcessTsmsbMessage(message[1], currentEpoch, currentEpochTime, currentCycle, currentTsTime);
// --- Message loop
for (uint32_t messageNr = 2; messageNr < numMessages; messageNr++) {
// --- Action depending on message type
switch (message[messageNr].GetMessType()) {
case stsxyter::MessType::Hit: {
UnpackStsXpu::ProcessHitMessage(message[messageNr], digis, numDigis, unpackPar, elinkPar, monitor, currentEpochTime);
break;
}
case stsxyter::MessType::TsMsb: {
UnpackStsXpu::ProcessTsmsbMessage(message[messageNr], currentEpoch, currentEpochTime, currentCycle, currentTsTime);
break;
}
default: {
monitor.fNumNonHitOrTsbMessage++;
break;
}
}
}
// --- Store number of digis in buffer
msMessCount[id] = numDigis;
}
namespace cbm::algo namespace cbm::algo
{ {
// ---- Algorithm execution --------------------------------------------- // ---- Algorithm execution ---------------------------------------------
UnpackStsXpu::resultType UnpackStsXpu::operator()(const fles::Timeslice* ts, StsReadoutConfig& config) UnpackStsXpu::resultType UnpackStsXpu::operator()(const fles::Timeslice* ts, StsReadoutConfig& config)
{ {
...@@ -60,6 +132,8 @@ namespace cbm::algo ...@@ -60,6 +132,8 @@ namespace cbm::algo
result.second.fNumErrInvalidMsSize++; result.second.fNumErrInvalidMsSize++;
continue; continue;
} }
xpu::t_add_bytes(msDescr.size);
xpu::k_add_bytes<UnpackK>(msDescr.size);
msIdx.push_back(msDescr.idx); msIdx.push_back(msDescr.idx);
compIdx.push_back(comp); compIdx.push_back(comp);
messCount.push_back(numMessages); messCount.push_back(numMessages);
...@@ -101,7 +175,7 @@ namespace cbm::algo ...@@ -101,7 +175,7 @@ namespace cbm::algo
const uint64_t currentTsTime = ts->start_time() / epochLengthInNs; const uint64_t currentTsTime = ts->start_time() / epochLengthInNs;
// --- Do unpacking for each microslice // --- Do unpacking for each microslice
xpu::run_kernel<Unpack>(xpu::grid::n_threads(numMs), fParams.d(), fElinkParams.d(), tsContent.d(), msMessCount.d(), xpu::run_kernel<UnpackK>(xpu::n_threads(numMs), fParams.d(), fElinkParams.d(), tsContent.d(), msMessCount.d(),
msMessOffset.d(), msStartTime.d(), msCompIdx.d(), digisOut.d(), currentTsTime, numMs); msMessOffset.d(), msStartTime.d(), msCompIdx.d(), digisOut.d(), currentTsTime, numMs);
// --- Copy results back to host (only two buffers are modified on device) // --- Copy results back to host (only two buffers are modified on device)
...@@ -109,6 +183,7 @@ namespace cbm::algo ...@@ -109,6 +183,7 @@ namespace cbm::algo
xpu::copy(digisOut, xpu::device_to_host); xpu::copy(digisOut, xpu::device_to_host);
// --- Store digis TO DO: make Kernel for this, needs a way to sum arrays in XPU first // --- Store digis TO DO: make Kernel for this, needs a way to sum arrays in XPU first
xpu::push_timer("Store digis");
for (uint64_t i = 0; i < numMs; i++) { for (uint64_t i = 0; i < numMs; i++) {
uint64_t offset = msMessOffset.h()[i]; uint64_t offset = msMessOffset.h()[i];
uint64_t numDigis = msMessCount.h()[i]; uint64_t numDigis = msMessCount.h()[i];
...@@ -116,85 +191,11 @@ namespace cbm::algo ...@@ -116,85 +191,11 @@ namespace cbm::algo
result.first.push_back(digisOut.h()[offset + j]); result.first.push_back(digisOut.h()[offset + j]);
} }
} }
xpu::pop_timer();
return result; return result;
} }
XPU_KERNEL(UnpackStsXpu::Unpack, xpu::no_smem, UnpackStsXpuPar* params, UnpackStsXpuElinkPar* elinkParams,
stsxyter::Message* content, uint64_t* msMessCount, uint64_t* msMessOffset, uint64_t* msStartTime,
uint32_t* msCompIdx, CbmStsDigi* digisOut, const uint64_t currentTsTime, int NElems)
{
int id = xpu::block_idx::x() * xpu::block_dim::x() + xpu::thread_idx::x();
if (id >= NElems || msMessCount[id] < 2) return; // exit if out of bounds or too few messages
UnpackStsXpuMonitorData monitor; //Monitor data, currently not stored. TO DO: Implement!
// --- Get message count and offset for this MS
const uint32_t numMessages = msMessCount[id];
const uint32_t messOffset = msMessOffset[id];
// --- Get starting position of this MS in message buffer
stsxyter::Message* message = &content[messOffset];
// --- Get starting position of this MS in digi buffer
CbmStsDigi* digis = &digisOut[messOffset];
// --- Get component index and unpack parameters of this MS
const uint32_t comp = msCompIdx[id];
const UnpackStsXpuPar& unpackPar = params[comp];
// --- Get starting position of elink parameters of this MS
UnpackStsXpuElinkPar* elinkPar = &elinkParams[unpackPar.fElinkOffset];
// --- Init counter for produced digis
uint64_t numDigis = 0;
// --- The first message in the MS is expected to be of type EPOCH and can be ignored.
if (message[0].GetMessType() != stsxyter::MessType::Epoch) {
monitor.fNumErrInvalidFirstMessage++;
msMessCount[id] = 0;
return;
}
// --- The second message must be of type ts_msb.
if (message[1].GetMessType() != stsxyter::MessType::TsMsb) {
monitor.fNumErrInvalidFirstMessage++;
msMessCount[id] = 0;
return;
}
// --- Current TS_MSB epoch cycle
uint64_t currentCycle = msStartTime[id] / fkCycleLength;
// --- Process first message (ts_msb)
uint32_t currentEpoch = 0; ///< Current epoch number within epoch cycle
uint64_t currentEpochTime = 0; ///< Current epoch time relative to timeslice in clock cycles
ProcessTsmsbMessage(message[1], currentEpoch, currentEpochTime, currentCycle, currentTsTime);
// --- Message loop
for (uint32_t messageNr = 2; messageNr < numMessages; messageNr++) {
// --- Action depending on message type
switch (message[messageNr].GetMessType()) {
case stsxyter::MessType::Hit: {
ProcessHitMessage(message[messageNr], digis, numDigis, unpackPar, elinkPar, monitor, currentEpochTime);
break;
}
case stsxyter::MessType::TsMsb: {
ProcessTsmsbMessage(message[messageNr], currentEpoch, currentEpochTime, currentCycle, currentTsTime);
break;
}
default: {
monitor.fNumNonHitOrTsbMessage++;
break;
}
}
}
// --- Store number of digis in buffer
msMessCount[id] = numDigis;
}
// ----- Process hit message -------------------------------------------- // ----- Process hit message --------------------------------------------
XPU_D inline void UnpackStsXpu::ProcessHitMessage(const stsxyter::Message& message, CbmStsDigi* digis, XPU_D inline void UnpackStsXpu::ProcessHitMessage(const stsxyter::Message& message, CbmStsDigi* digis,
uint64_t& numDigis, const UnpackStsXpuPar& unpackPar, uint64_t& numDigis, const UnpackStsXpuPar& unpackPar,
......
...@@ -5,7 +5,10 @@ ...@@ -5,7 +5,10 @@
#ifndef CBM_ALGO_UNPACKSTSXPU_H #ifndef CBM_ALGO_UNPACKSTSXPU_H
#define CBM_ALGO_UNPACKSTSXPU_H 1 #define CBM_ALGO_UNPACKSTSXPU_H 1
#include "CbmStsDigi.h" #include "CbmStsDigi.h"
#include "gpu/DeviceImage.h"
#include "gpu/xpu_legacy.h"
#include "MicrosliceDescriptor.hpp" #include "MicrosliceDescriptor.hpp"
#include "Timeslice.hpp" #include "Timeslice.hpp"
...@@ -21,7 +24,6 @@ ...@@ -21,7 +24,6 @@
#include "StsReadoutConfig.h" #include "StsReadoutConfig.h"
#include "StsXyterMessage.h" #include "StsXyterMessage.h"
#include "gpu/DeviceImage.h"
namespace cbm::algo namespace cbm::algo
...@@ -73,6 +75,9 @@ namespace cbm::algo ...@@ -73,6 +75,9 @@ namespace cbm::algo
} }
}; };
XPU_EXPORT_KERNEL(GPUReco, UnpackK, UnpackStsXpuPar* params, UnpackStsXpuElinkPar* elinkParams,
stsxyter::Message* content, uint64_t* msMessCount, uint64_t* msMessOffset, uint64_t* msStartTime,
uint32_t* msCompIdx, CbmStsDigi* digisOut, const uint64_t currentTsTime, int NElems);
/** @class UnpackStsXpu /** @class UnpackStsXpu
** @author Pierre-Alain Loizeau <p.-a.loizeau@gsi.de> ** @author Pierre-Alain Loizeau <p.-a.loizeau@gsi.de>
...@@ -101,16 +106,6 @@ namespace cbm::algo ...@@ -101,16 +106,6 @@ namespace cbm::algo
**/ **/
resultType operator()(const fles::Timeslice* ts, StsReadoutConfig& config); resultType operator()(const fles::Timeslice* ts, StsReadoutConfig& config);
struct StsXpuUnpack {
}; // Identifier used by xpu to find where kernels are located
// Run unpacker for each microslice
XPU_EXPORT_KERNEL(GPUReco, Unpack, UnpackStsXpuPar* params, UnpackStsXpuElinkPar* elinkParams,
stsxyter::Message* content, uint64_t* msMessCount, uint64_t* msMessOffset, uint64_t* msStartTime,
uint32_t* msCompIdx, CbmStsDigi* digisOut, const uint64_t currentTsTime, int NElems);
//Stores parameter structs for all elinks //Stores parameter structs for all elinks
xpu::hd_buffer<UnpackStsXpuElinkPar> fElinkParams; xpu::hd_buffer<UnpackStsXpuElinkPar> fElinkParams;
...@@ -120,6 +115,8 @@ namespace cbm::algo ...@@ -120,6 +115,8 @@ namespace cbm::algo
private: // methods private: // methods
friend struct UnpackK;
/** @brief Process a hit message /** @brief Process a hit message
** @param message SMX message (32-bit word) ** @param message SMX message (32-bit word)
** @param digi buffer ** @param digi buffer
......
...@@ -11,5 +11,6 @@ jsroot ...@@ -11,5 +11,6 @@ jsroot
googletest googletest
yaml-cpp/ yaml-cpp/
xpu/ xpu/
xpu-dev
GSL GSL
bba bba
...@@ -52,7 +52,7 @@ if(DOWNLOAD_EXTERNALS) ...@@ -52,7 +52,7 @@ if(DOWNLOAD_EXTERNALS)
if (NOT ${CBM_XPU_DEV}) if (NOT ${CBM_XPU_DEV})
download_project_if_needed(PROJECT xpu download_project_if_needed(PROJECT xpu
GIT_REPOSITORY "https://github.com/fweig/xpu.git" GIT_REPOSITORY "https://github.com/fweig/xpu.git"
GIT_TAG "dd7d1d7e4b4d71079b9cfadbd662a264288308b0" # v0.7.6 GIT_TAG "d142d9ac7135488925b5b94f36da22ec55d9271c" # v0.9.3
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/xpu SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/xpu
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND "" BUILD_COMMAND ""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment