Skip to content
Snippets Groups Projects
Commit ccfc5b0f authored by Sergei Zharko's avatar Sergei Zharko
Browse files

QA-checker: README.md and cosmetics

parent 2a1ebaa1
No related branches found
No related tags found
1 merge request!1962QA-checker updates
Pipeline #31842 passed
......@@ -49,9 +49,9 @@ namespace cbm::qa::checker
// ----- User interface
/// @brief Adds a version of QA output for a comparison
/// @param version Label of the version
/// @param path Path to the QA output directory for this version
void AddVersion(const char* version, const char* path);
/// @param label Label of the version
/// @param path Path to the QA output directory for this version
void AddVersion(const char* label, const char* path);
/// @brief Adds a dataset name
/// @param datasetName Name of dataset
......@@ -63,13 +63,13 @@ namespace cbm::qa::checker
/// "F": forces canvas creation (even if there is no difference)
/// "R": draw ratio on canvas
/// "D": draw difference on canvas
/// "P": enables bin-by-bin comparison
/// "E": enables exact comparison
/// "S": enables statistical hypothesis test, where is possible
/// "U": enables interval comparison, where is possible
/// "U": enables ratio comparison
/// @return 0: All versions are identical
/// @return 1: Some checks for some histograms did not pass, but the histograms are consistent
/// @return 2: Some histograms are different
int Process(Option_t* comparisonMethod = "P");
int Process(Option_t* comparisonMethod = "E");
/// @brief Registers root-file for storing output
/// @param filename Name of file
......@@ -91,12 +91,12 @@ namespace cbm::qa::checker
/// @brief Sets P-value threshold
/// @param pVal P-value threshold
void SetPvalThreshold(float pVal) { fpObjDB->SetPvalThreshold(pVal); }
void SetPvalThreshold(double pVal) { fpObjDB->SetPvalThreshold(pVal); }
/// @brief Sets ratio accepted range
/// @param min Lower boundary
/// @param max Upper boundary
void SetRatioRange(float min, float max) { fpObjDB->SetRatioRange(min, max); }
void SetRatioRange(double min, double max) { fpObjDB->SetRatioRange(min, max); }
private:
/// @brief Prepares output file (creates directory structure)
......
......@@ -98,7 +98,7 @@ std::vector<ECmpInference> FileHandler::Process(Option_t* opt)
}
bool bSuppressCanvases = sOption.find("b") != std::string::npos;
bool bForceCanvases = sOption.find("c") != std::string::npos;
bool bCmpExact = sOption.find("p") != std::string::npos;
bool bCmpExact = sOption.find("e") != std::string::npos;
bool bCmpChi2 = sOption.find("s") != std::string::npos;
bool bCmpRatio = sOption.find("u") != std::string::npos;
......
......@@ -137,7 +137,7 @@ void ObjectDB::ReadFromYAML(const char* configName)
}
// ----- Define file-object map
if (config["files"]) {
if (const auto& node = config["files"]) {
if (fvObjectFirstGlobIndex.size()) {
LOG(warn) << "ObjectDB: file-object map was defined before. Redefining it from the config file " << configName;
fvFiles.clear();
......@@ -145,16 +145,14 @@ void ObjectDB::ReadFromYAML(const char* configName)
fvObjects.clear();
}
try {
const auto& rootNode = config["files"];
// Calculate total number of objects and files
size_t nFiles = rootNode.size();
size_t nFiles = node.size();
fvFiles.reserve(nFiles);
fvFileLabels.reserve(nFiles);
fvObjects.reserve(nFiles);
// Fill vectors
for (const auto& fileNode : rootNode) {
for (const auto& fileNode : node) {
const auto& objectsNode = fileNode["objects"];
int nObjects = objectsNode ? objectsNode.size() : 0;
auto& objectsInFile = fvObjects.emplace_back();
......@@ -177,14 +175,13 @@ void ObjectDB::ReadFromYAML(const char* configName)
}
// ----- Define dataset names
if (config["datasets"]) {
if (const auto& node = config["datasets"]) {
LOG_IF(fatal, fvDatasets.size())
<< "ObjectDB: dataset names were defined before. Please, use only one initialisation method:"
<< " either configuration file, either setters from macro";
<< " either configuration file, or setters of the checker::Core class";
try {
const auto& rootNode = config["datasets"];
fvDatasets.reserve(rootNode.size());
for (const auto& datasetNode : rootNode) {
fvDatasets.reserve(node.size());
for (const auto& datasetNode : node) {
fvDatasets.push_back(datasetNode.as<std::string>());
}
}
......@@ -197,14 +194,13 @@ void ObjectDB::ReadFromYAML(const char* configName)
}
// ----- Define version names
if (config["versions"]) {
if (const auto& node = config["versions"]) {
LOG_IF(fatal, fvVersionLabels.size())
<< "ObjectDB: dataset names were defined before. Attempt to redefine dataset names from config " << configName;
try {
const auto& rootNode = config["versions"];
fvVersionLabels.reserve(rootNode.size());
fvVersionPaths.reserve(rootNode.size());
for (const auto& versionNode : rootNode) {
fvVersionLabels.reserve(node.size());
fvVersionPaths.reserve(node.size());
for (const auto& versionNode : node) {
fvVersionLabels.push_back(versionNode["label"].as<std::string>());
fvVersionPaths.push_back(versionNode["path"].as<std::string>());
}
......@@ -218,14 +214,29 @@ void ObjectDB::ReadFromYAML(const char* configName)
}
// ----- Define default version
if (config["default_label"]) {
if (const auto& node = config["default_label"]) {
try {
SetDefaultLabel(config["default_label"].as<std::string>().data());
SetDefaultLabel(node.as<std::string>().c_str());
}
catch (const YAML::InvalidNode& exc) {
LOG(fatal) << "ObjectDB:: error while reading checker/default_label node from the config " << configName;
}
}
// ----- Define the comparison parameters
if (const auto& node = config["settings"]) {
try {
double ratioMin = node["ratio_min"].as<double>(fRatioMin);
double ratioMax = node["ratio_max"].as<double>(fRatioMax);
SetRatioRange(ratioMin, ratioMax);
double pValThresh = node["pval_threshold"].as<double>(fPvalThresh);
SetPvalThreshold(pValThresh);
}
catch (const YAML::InvalidNode& exc) {
LOG(fatal) << "ObjectDB:: error while reading checker/versions node from the config " << configName;
}
}
}
// ---------------------------------------------------------------------------------------------------------------------
......@@ -264,6 +275,29 @@ void ObjectDB::ReadObjectList(int iFile)
LOG(info) << "Reading object list from files: done";
}
// ---------------------------------------------------------------------------------------------------------------------
//
void ObjectDB::SetPvalThreshold(double pVal)
{
if (pVal <= 0 || pVal >= 1) {
LOG(fatal) << "ObjectDB::SetPvalThreshold(): p-value threshold runs out the range (0, 1): " << pVal;
}
fPvalThresh = pVal;
}
// ---------------------------------------------------------------------------------------------------------------------
//
void ObjectDB::SetRatioRange(double min, double max)
{
if (min > max || min < 0) {
LOG(fatal) << "ObjectDB::SetPvalThreshold(): min and max for ratio run out the range: min = " << min
<< ", max = " << max;
}
fRatioMin = min;
fRatioMax = max;
}
// ---------------------------------------------------------------------------------------------------------------------
//
std::string ObjectDB::ToString(int verbose) const
......
......@@ -71,7 +71,6 @@ namespace cbm::qa::checker
/// @return Label of file
const std::string& GetFileLabel(int iFile) const { return fvFileLabels[iFile]; }
public:
/// @brief Gets name of file from indexes of version, file and dataset
/// @param iVersion Index of version
/// @param iFile Index of file
......@@ -153,16 +152,12 @@ namespace cbm::qa::checker
/// @brief Sets P-value threshold
/// @param pVal P-value threshold
void SetPvalThreshold(float pVal) { fPvalThresh = pVal; }
void SetPvalThreshold(double pVal);
/// @brief Sets ratio accepted range
/// @param min Lower boundary
/// @param max Upper boundary
void SetRatioRange(float min, float max)
{
fRatioMin = min;
fRatioMax = max;
}
void SetRatioRange(double min, double max);
private:
/// @brief Reads list of histograms from file
......@@ -191,9 +186,9 @@ namespace cbm::qa::checker
std::vector<std::string> fvVersionLabels; ///< Container of version labels
std::vector<std::string> fvVersionPaths; ///< Container of version paths
float fPvalThresh{0.05}; ///< P-value threshold for histograms equality
float fRatioMax{1.05}; ///< Upper boundary for ratio deviation
float fRatioMin{0.95}; ///< Lower boundary for ratio deviation
double fPvalThresh{0.05}; ///< P-value threshold for histograms equality
double fRatioMax{1.05}; ///< Upper boundary for ratio deviation
double fRatioMin{0.95}; ///< Lower boundary for ratio deviation
};
} // namespace cbm::qa::checker
......
# QA-checker framework
## Introduction
The framework provides tools to compare histograms in ROOT-files, which were created under different conditions
(code, parameter or geometry versions). Within a single routine, multiple data sets (e.g., different detector
setups), multiple ROOT-files and multiple versions can be processed. The output ROOT-file comprises the histograms
from all required variants and optionally the comparison canvases.
## Comparison Methods
The framework proposes three different comparison methods: exact equality check, ratio check and $\chi^2$-test.
The exact equality check provides a value and error equality check for each bin of the two histograms (including
under- and overflows). The ratio check estimates the minimal and maximal ratio of the two values among the histogram
bins and tests, if they stay in the defined acceptable range. The $\chi^2$-test runs a standard
routine for ROOT histograms: `TH1::Chi2TestX()` and returns the p-value, which is then compared with a threshold.
A user can combine these methods for achieving a final inference. If the exact equality check is applied, and if
according to it the two histograms are equal, other checks are not executed, even if they are required by the
configuration.
## Interface and Configuration
An interaction between a user and the framework is carried out using the interface of the `cbm::qa::checker::Core`
class. A recommended structure of the ROOT-macro is the following:
```
/* clang-format off */
int check_my_histograms(
const char* configName = "./my_config.yaml",
const char* outputName = "./checks_output.root"
)
/* clang-format on */
{
// Creating an instance of the checker
auto pChecker = std::make_unique<cbm::qa::checker::Core>();
// Configuring the instance of the checker
pChecker->RegisterOutFile(outputName);
pChecker->SetFromYaml(configName);
// Additional configuration functions such as pChecker->AddVersion() or pChecker->SetPvalThreshold() can be
// called here.
// Executes the comparison routine
int res = pChecker->Process("ESR"); // process exact comparison and chi2-test, draw the ratio on canvas
return res;
}
```
The selection of the comparison methods as well as of canvas handling options is performed via an option string
parameter of the `Core::Process()` function. This works in a similar manner as for many ROOT classes. The following
options are supported at the moment:
| Option | Meaning |
|--------|-----------------------------------------------------------------|
| 'E' | enable exact comparison |
| 'S' | enable $\chi^2$-test |
| 'U' | enable ratio comparison |
| 'B' | suppress comparison canvas creation |
| 'F' | force canvas creation (even if the two histograms are the same) |
| 'D' | draw difference on canvas (only for 1D-histograms) |
| 'R' | draw ratio on canvas (only for 1D-histograms) |
The configuration of the routine requires:
- definition of version list (at least two versions);
- definition of dataset list (at least one dataset);
- definition of ROOT-files list (at least one file).
This can be done using the YAML-configuration file and partially using the `Core` class functions.
### Version list configuration
Each version requires a label and a path. The label is used to distinguish histograms from different code versions
inside the output file as well as for legends in the comparison canvases. The path is used for the definition of the
input file. The list of versions can be provided either via `Core::AddVersion(label, path)` function, or via the
node `versions` in the YAML. For example,
```
checker:
versions:
- label: "master"
path: "master_output"
- label: "feature"
path: "feature_output"
```
At least two versions must be provided. The default version can be assigned explicitly via the
`Core::SetDefaultVersion(label)` or in the configuration file:
```
checker:
default_label: "master"
```
If the default version was not defined explicitly, the first version in the list is assigned to be the default one.
### Dataset list configuration
Each dataset provides a label only. The label is used to distinguish input files and the histograms in the output.
One can provide a list of dataset labels either via the `Core::AddDataset(label)`, or in the configuration file. For example:
```
checker:
datasets:
- mcbm_beam_2022_05_23_nickel
- mcbm_beam_2024_05_08_nickel
```
At least one dataset must be provided.
### File and object list configuration
The file and object lists can be set only via the YAML configuration. For each file its name and path in the
filesystem are required. The name of the file is used as name of the directory in the output ROOT-file. To
distinguish between different versions and datasets, one can use specifiers %v and %d, which will be replaced a
version path and a dataset label respectively.
Let's consider an example. We want to compare QA-output for two different mCBM setups (mcbm_beam_2022_05_23_nickel
and mcbm_beam_2024_05_08_nickel) for two different CbmRoot branches (let's say "master" and "feature"). Let the
QA-files being stored on disk under the following names:
| version | setup | path |
|---------|-----------------------------|----------------------------------------------------------------------------------------------|
| master | mcbm_beam_2022_05_23_nickel | /path/to/master_output/mcbm_beam_2022_05_23_nickel/data/mcbm_beam_2022_05_23_nickel.qa.root |
| master | mcbm_beam_2024_05_08_nickel | /path/to/master_output/mcbm_beam_2024_05_23_nickel/data/mcbm_beam_2024_05_08_nickel.qa.root |
| feature | mcbm_beam_2022_05_23_nickel | /path/to/feature_output/mcbm_beam_2022_05_23_nickel/data/mcbm_beam_2022_05_23_nickel.qa.root |
| feature | mcbm_beam_2024_05_08_nickel | /path/to/feature_output/mcbm_beam_2024_05_08_nickel/data/mcbm_beam_2024_05_08_nickel.qa.root |
According to the pattern of the qa.root files, the filename should be provided as follows:
"/path/to/**%v**/**%d**/data/**%d**.qa.root". Please note, that the file name can contain multiple version and
dataset specifiers.
A user can also provide a list of histograms, which should be compared. It is useful, if only particular histograms
from a heavy ROOT-file are of interest. If the histogram list for a particular input file is not provided, the
framework will scan all the versions of this file in all the defined datasets and collect a full list of
histograms. If the list of histograms diverges for different datasets, the absent ones will be just skipped.
Below is an example of the configuration file:
```
checker:
files:
- name: "/path/to/%v/%d/data/%d.qa.root"
label: "qa"
objects: # optional node
- CbmCaInputQaSts/efficiencies/casts_reco_eff_vs_r_st0
- CbmCaInputQaSts/efficiencies/casts_reco_eff_vs_xy_st0
- CbmCaInputQaSts/histograms/casts_pull_t_st2
- CbmCaInputQaSts/histograms/casts_res_x_vs_x_st2
```
### Comparison settings
At the moment, all the comparison settings are global for all processed objects. The settings include the range
of accepted histogram ratio and a p-value threshold for the chi2-test. These parameters can be specified either via
`Core` class interface (methods `Core::SetRatioRange(min, max)` and `Core::SetPvalThreshold(pValMin)`), or in the
configuration file:
```
checker:
settings:
ratio_min: 0.9
ratio_max: 1.1
pval_threshold: 0.01
```
If these values are not defined, the default ones will be used (ratio_min = 0.95, ratio_max = 1.05,
pval_threshold = 0.05).
### Full YAML-config example
The full configuration file will be as follows:
```
checker:
settings:
ratio_min: 0.9
ratio_max: 1.1
pval_threshold: 0.01
versions:
- label: "master"
path: "master_output"
- label: "feature"
path: "feature_output"
default_label: "master"
datasets:
- mcbm_beam_2022_05_23_nickel
- mcbm_beam_2024_05_08_nickel
files:
- name: "/path/to/%v/%d/data/%d.qa.root"
label: "qa"
objects: # optional node
- CbmCaInputQaSts/efficiencies/casts_reco_eff_vs_r_st0
- CbmCaInputQaSts/efficiencies/casts_reco_eff_vs_xy_st0
- CbmCaInputQaSts/histograms/casts_pull_t_st2
- CbmCaInputQaSts/histograms/casts_res_x_vs_x_st2
```
## Framework Inference
For each particular compared object withing two different versions an inference on the equality is produced. The
inference can have one of three following values:
| Inference | Label | Meaning |
|-----------|------------|------------------------------------------------------------------------------|
| 0 | same | All the comparison methods showed equality |
| 1 | consistent | At least one of the comparison methods showed equality, but at least one did |
| 2 | different | No of the comparison methods showed equality |
The final inference of the routine returns the maximum inference value among all the objects for all the processed
files.
## Notes
Please note, that the framework accepts **only** the ROOT-files, which have an a directory system built on the
`TDirectory` classes. The files with the `TFolder`-based directory system is not supported.
......@@ -6,6 +6,10 @@
checker:
settings:
ratio_min: 0.95
ratio_max: 1.05
pval_threshold: 0.05
files:
- name: "%v/%d_qa_ts_eb_ideal.qa.root"
label: qa
......
......@@ -50,7 +50,7 @@ int qa_compare(
pQaChecker->SetDefaultVersion("old");
//// ----- Run comparision routine
bool res = pQaChecker->Process("P");
bool res = pQaChecker->Process("E");
std::cout << "Macro finished successfully." << std::endl;
return res;
}
......@@ -36,7 +36,7 @@ int qa_compare_ca(
pQaChecker->SetFromYAML(configName); // Read file-object map
//// ----- Run comparision routine
int res = pQaChecker->Process("UPS");
int res = pQaChecker->Process("UES");
std::cout << "Macro finished successfully." << std::endl;
return res;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment