Skip to content
Snippets Groups Projects
Commit ba686896 authored by Felix Weiglhofer's avatar Felix Weiglhofer
Browse files

algo: Add monitoring support in cbmreco.

parent 291e393f
No related branches found
No related tags found
1 merge request!1180algo: Add monitoring support in cbmreco.
......@@ -83,6 +83,7 @@ target_link_libraries(Algo
external::yaml-cpp
external::fles_logging
external::fles_ipc
external::fles_monitoring
)
target_compile_definitions(Algo PUBLIC NO_ROOT)
xpu_attach(Algo ${DEVICE_SRCS})
......
......@@ -4,6 +4,9 @@
#ifndef CBM_ALGO_BASE_CHAINCONTEXT_H
#define CBM_ALGO_BASE_CHAINCONTEXT_H
#include <Monitor.hpp>
#include <optional>
#include "Options.h"
#include "RecoParams.h"
......@@ -12,6 +15,7 @@ namespace cbm::algo
struct ChainContext {
Options opts;
RecoParams recoParams;
std::optional<cbm::Monitor> monitor;
};
} // namespace cbm::algo
......
......@@ -52,6 +52,8 @@ Options::Options(int argc, char** argv)
"select device (cpu, cuda0, cuda1, hip0, ...)")
("log-level,l", po::value(&fLogLevel)->default_value(info)->value_name("<level>"),
"set log level (debug, info, warning, error, fatal)")
("monitor,m", po::value(&fMonitorUri)->value_name("<uri>"),
"URI specifying monitor output (e.g. file:tmp/monitor.txt, empty = no monitor)")
("num-ts,n", po::value<int>(&fNumTimeslices)->default_value(-1)->value_name("<num>"),
"Stop after <num> timeslices (-1 = all)")
("skip-ts,s", po::value<int>(&fSkipTimeslices)->default_value(0)->value_name("<num>"),
......
......@@ -22,6 +22,7 @@ namespace cbm::algo
const std::string& InputLocator() const { return fInputLocator; }
severity_level LogLevel() const { return fLogLevel; }
const std::string& Device() const { return fDevice; }
const std::string& MonitorUri() const { return fMonitorUri; }
bool CollectKernelTimes() const { return fCollectKernelTimes; }
int NumTimeslices() const { return fNumTimeslices; }
int SkipTimeslices() const { return fSkipTimeslices; }
......@@ -31,6 +32,7 @@ namespace cbm::algo
std::string fInputLocator;
severity_level fLogLevel;
std::string fDevice;
std::string fMonitorUri;
bool fCollectKernelTimes = false;
int fNumTimeslices = -1;
int fSkipTimeslices = 0;
......
......@@ -18,6 +18,14 @@ namespace cbm::algo
const Options& Opts() const { return gsl::make_not_null(fContext)->opts; }
const RecoParams& Params() const { return gsl::make_not_null(fContext)->recoParams; }
bool HasMonitor() const { return gsl::make_not_null(fContext)->monitor.has_value(); }
Monitor& GetMonitor() const
{
// Need Get-prefix to avoid conflict with Monitor-class name
return gsl::make_not_null(fContext)->monitor.value();
}
private:
ChainContext* fContext = nullptr;
};
......
/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main
SPDX-License-Identifier: GPL-3.0-only
Authors: Felix Weiglhofer [committer] */
#include <Timeslice.hpp>
namespace cbm::algo::ts_utils
{
inline size_t SizeBytes(const fles::Timeslice& ts)
{
size_t size = 0;
for (size_t i = 0; i < ts.num_components(); i++) {
size += ts.size_component(i);
}
return size;
}
} // namespace cbm::algo::ts_utils
......@@ -3,11 +3,14 @@
Authors: Felix Weiglhofer [committer] */
#include "Reco.h"
#include <System.hpp>
#include <xpu/host.h>
#include "config/Yaml.h"
#include "log.hpp"
#include "util/TimingsFormat.h"
#include "util/TsUtils.h"
using namespace cbm::algo;
......@@ -26,6 +29,11 @@ void Reco::Init(const Options& opts)
xpu::device_prop props {xpu::device::active()};
L_(info) << "Running CBM Reco on Device " << props.name();
if (!opts.MonitorUri().empty()) {
fContext.monitor.emplace(opts.MonitorUri());
L_(info) << "Monitoring enabled, sending to " << opts.MonitorUri();
}
// Reco Params
fs::path recoParamsPath = opts.ParamsDir() / "RecoParams.yaml";
YAML::Node yaml = YAML::LoadFile(recoParamsPath.string());
......@@ -76,6 +84,9 @@ void Reco::Run(const fles::Timeslice& ts)
xpu::timings ts_times = xpu::pop_timer();
GetMonitor().QueueMetric("cbm_reco", {{"hostname", fles::system::current_hostname()}},
{{"tsProcessed", 1}, {"bytesProcessed", ts_utils::SizeBytes(ts)}});
PrintTimings(ts_times);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment