diff --git a/algo/CMakeLists.txt b/algo/CMakeLists.txt index 2f8182f323d7b82e0e628751da62edff8a2edc55..d107ef399801b3da755d1570265eb42a1c9784f0 100644 --- a/algo/CMakeLists.txt +++ b/algo/CMakeLists.txt @@ -83,6 +83,7 @@ target_link_libraries(Algo external::yaml-cpp external::fles_logging external::fles_ipc + external::fles_monitoring ) target_compile_definitions(Algo PUBLIC NO_ROOT) xpu_attach(Algo ${DEVICE_SRCS}) diff --git a/algo/base/ChainContext.h b/algo/base/ChainContext.h index 3277028b2ab0762e1f27ced91ced778d60471746..0ed47dcc3853f09f162a2111312db2f9fb2244e4 100644 --- a/algo/base/ChainContext.h +++ b/algo/base/ChainContext.h @@ -4,6 +4,9 @@ #ifndef CBM_ALGO_BASE_CHAINCONTEXT_H #define CBM_ALGO_BASE_CHAINCONTEXT_H +#include <Monitor.hpp> +#include <optional> + #include "Options.h" #include "RecoParams.h" @@ -12,6 +15,7 @@ namespace cbm::algo struct ChainContext { Options opts; RecoParams recoParams; + std::optional<cbm::Monitor> monitor; }; } // namespace cbm::algo diff --git a/algo/base/Options.cxx b/algo/base/Options.cxx index ff74e0e5592075490bfa67d7ce80f9bb3ae99b41..efd141f09afccdeb448676a0aafb708cf02c98e6 100644 --- a/algo/base/Options.cxx +++ b/algo/base/Options.cxx @@ -52,6 +52,8 @@ Options::Options(int argc, char** argv) "select device (cpu, cuda0, cuda1, hip0, ...)") ("log-level,l", po::value(&fLogLevel)->default_value(info)->value_name("<level>"), "set log level (debug, info, warning, error, fatal)") + ("monitor,m", po::value(&fMonitorUri)->value_name("<uri>"), + "URI specifying monitor output (e.g. file:tmp/monitor.txt, empty = no monitor)") ("num-ts,n", po::value<int>(&fNumTimeslices)->default_value(-1)->value_name("<num>"), "Stop after <num> timeslices (-1 = all)") ("skip-ts,s", po::value<int>(&fSkipTimeslices)->default_value(0)->value_name("<num>"), diff --git a/algo/base/Options.h b/algo/base/Options.h index 25c3f1b99a32f7f583d5a7a4e49c813c39c35705..61b480a4d2443a17da9518a90f64334c41afb454 100644 --- a/algo/base/Options.h +++ b/algo/base/Options.h @@ -22,6 +22,7 @@ namespace cbm::algo const std::string& InputLocator() const { return fInputLocator; } severity_level LogLevel() const { return fLogLevel; } const std::string& Device() const { return fDevice; } + const std::string& MonitorUri() const { return fMonitorUri; } bool CollectKernelTimes() const { return fCollectKernelTimes; } int NumTimeslices() const { return fNumTimeslices; } int SkipTimeslices() const { return fSkipTimeslices; } @@ -31,6 +32,7 @@ namespace cbm::algo std::string fInputLocator; severity_level fLogLevel; std::string fDevice; + std::string fMonitorUri; bool fCollectKernelTimes = false; int fNumTimeslices = -1; int fSkipTimeslices = 0; diff --git a/algo/base/SubChain.h b/algo/base/SubChain.h index b787c1b64c2c13860233e84ee7a2162ded31f207..b206247190e987cbd9470ae19e5a25caaef01d85 100644 --- a/algo/base/SubChain.h +++ b/algo/base/SubChain.h @@ -18,6 +18,14 @@ namespace cbm::algo const Options& Opts() const { return gsl::make_not_null(fContext)->opts; } const RecoParams& Params() const { return gsl::make_not_null(fContext)->recoParams; } + bool HasMonitor() const { return gsl::make_not_null(fContext)->monitor.has_value(); } + + Monitor& GetMonitor() const + { + // Need Get-prefix to avoid conflict with Monitor-class name + return gsl::make_not_null(fContext)->monitor.value(); + } + private: ChainContext* fContext = nullptr; }; diff --git a/algo/base/util/TsUtils.h b/algo/base/util/TsUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..fa45dc060b6a495c2ae2aed3b309fe350625f4b2 --- /dev/null +++ b/algo/base/util/TsUtils.h @@ -0,0 +1,19 @@ +/* Copyright (C) 2023 FIAS Frankfurt Institute for Advanced Studies, Frankfurt / Main + SPDX-License-Identifier: GPL-3.0-only + Authors: Felix Weiglhofer [committer] */ + +#include <Timeslice.hpp> + +namespace cbm::algo::ts_utils +{ + + inline size_t SizeBytes(const fles::Timeslice& ts) + { + size_t size = 0; + for (size_t i = 0; i < ts.num_components(); i++) { + size += ts.size_component(i); + } + return size; + } + +} // namespace cbm::algo::ts_utils diff --git a/algo/global/Reco.cxx b/algo/global/Reco.cxx index cea651468a632f64f51d860f3252ec0a7230d0ff..9984134c9256a6303ad3d9ab790522095fdf6d57 100644 --- a/algo/global/Reco.cxx +++ b/algo/global/Reco.cxx @@ -3,11 +3,14 @@ Authors: Felix Weiglhofer [committer] */ #include "Reco.h" +#include <System.hpp> + #include <xpu/host.h> #include "config/Yaml.h" #include "log.hpp" #include "util/TimingsFormat.h" +#include "util/TsUtils.h" using namespace cbm::algo; @@ -26,6 +29,11 @@ void Reco::Init(const Options& opts) xpu::device_prop props {xpu::device::active()}; L_(info) << "Running CBM Reco on Device " << props.name(); + if (!opts.MonitorUri().empty()) { + fContext.monitor.emplace(opts.MonitorUri()); + L_(info) << "Monitoring enabled, sending to " << opts.MonitorUri(); + } + // Reco Params fs::path recoParamsPath = opts.ParamsDir() / "RecoParams.yaml"; YAML::Node yaml = YAML::LoadFile(recoParamsPath.string()); @@ -76,6 +84,9 @@ void Reco::Run(const fles::Timeslice& ts) xpu::timings ts_times = xpu::pop_timer(); + GetMonitor().QueueMetric("cbm_reco", {{"hostname", fles::system::current_hostname()}}, + {{"tsProcessed", 1}, {"bytesProcessed", ts_utils::SizeBytes(ts)}}); + PrintTimings(ts_times); }