From 0b1fc86af0699d990d009cee67ed98f914466228 Mon Sep 17 00:00:00 2001 From: "P.-A. Loizeau" <p.-a.loizeau@gsi.de> Date: Fri, 8 Jul 2022 17:05:22 +0200 Subject: [PATCH] [MQ] Sbatch scripts fixes following investigation of problems seen online in Au runs - Change HWM for Sink process input to 1 as this applies to each link to an Evt builder individually and not to the sum of all as expected (solves huge bufferings) - Do not re-order the TS in the Sink by default (leads to buffering if one branch is really slow) - Add option for DigiEvent I/O between builders and Sink - Reduce HWM at input and output of Unpackes and Event builders to 1 - Add bash and sbatch scripts for testing replay of a run on the mFLES cluster (single source node + single processing node) - Add information about online MQ problems in Gold run and these fixes --- macro/beamtime/mcbm2022/online/HowTo.md | 34 + .../online/create_log_folder_dev.sbatch | 3 + .../mcbm2022/online/mq_processing_node.sbatch | 10 +- .../online/mq_processing_node_array.sbatch | 8 +- .../online/mq_processing_node_dev.sbatch | 611 ++++++++++++++++++ macro/beamtime/mcbm2022/online/mq_sink.sbatch | 2 +- macro/beamtime/mcbm2022/online/replay.sbatch | 46 ++ .../mcbm2022/online/start_topology_dev.sh | 43 ++ 8 files changed, 747 insertions(+), 10 deletions(-) create mode 100644 macro/beamtime/mcbm2022/online/create_log_folder_dev.sbatch create mode 100644 macro/beamtime/mcbm2022/online/mq_processing_node_dev.sbatch create mode 100644 macro/beamtime/mcbm2022/online/replay.sbatch create mode 100755 macro/beamtime/mcbm2022/online/start_topology_dev.sh diff --git a/macro/beamtime/mcbm2022/online/HowTo.md b/macro/beamtime/mcbm2022/online/HowTo.md index 2caa9148e1..3536d85a5b 100644 --- a/macro/beamtime/mcbm2022/online/HowTo.md +++ b/macro/beamtime/mcbm2022/online/HowTo.md @@ -326,4 +326,38 @@ check loop behind identical to the latter. disk 1. Something fishy is happening with the ZMQ buffering, as even without re-ordering and missing TS insertion, the memory usage of the sink increase up to `180 GB`, which is far more than expected with the HWM of 2 messages at input + - Now understood: was caused by wrong usage of FairMQ channel option `rcvBufSize` in case of fan-in, which sets the + limits for each link and not for all links as originally expected. + - After setting it to 1 the "unassigned memory size" is limited to approx. `NBranch * max TS size` as expected + - Memory usage of the Unpackers and Event builders + number of TS in-flight also reduced by setting all HWM to 1 1. The plots generated by the sink for the buffer monitoring and processed TS/Event counting have messed up scales + - Fixed after review of the way the plots are filled +1. Processing of TS in Sink far slower than expected: around 2 TS/10 s instead of 20 TS/10 s expected + - Partially linked to writing to single disk + - Partially linked to extraction of selected data in the Sink itself + - Improved by adding option to make DigiEvent in Event builder and transmit these = ~4-6 TS/10 s in Sink + +# Replay testing scripts + +These scripts allow to replay a run from one of the archiver nodes with a single processing node. +They consist of: +- create_log_folder_dev.sbatch +- mq_processing_node_dev.sbatch +- replay.sbatch +- start_topology_dev.sh + +This version is first starting a full topology with `<Nb branches>` on `en13`, writing a single set of files to +`/storage/${_Disk}/mcbm2022/data/<Run Id>_<Trigger Set>_end13.digi_events[_FileIdx].root`, +then starting a replay of all files for a given `<Run Id>` from node8 and connecting it to the MQ topology using the +Infiniband network. + +The replay is done at a rate of around `2 TS/s`, which is slightly more than what one would expect for a single +processing node in a `2 TS builder + 4 processing nodes` configuration + +It expects 4 parameters in the following order: +- the `<Run Id>`, as reported by flesctl +- the `<Number of branches to be started per node>`, leading to a total parallel capability of `4 x n` timeslices +- the `<Trigger set>` in the range `[0-14]`, with `[0-6]` corresponding to the trigger settings tested by N. Herrmann + and `[7-14]` those used for development by P.-A. Loizeau +- the `<Disk index>` in the range `[0-8]`, with currently only indices `6` and `7` being valid for `en13` (HDDs were + moved around) diff --git a/macro/beamtime/mcbm2022/online/create_log_folder_dev.sbatch b/macro/beamtime/mcbm2022/online/create_log_folder_dev.sbatch new file mode 100644 index 0000000000..2501158937 --- /dev/null +++ b/macro/beamtime/mcbm2022/online/create_log_folder_dev.sbatch @@ -0,0 +1,3 @@ +#!/bin/bash + +mkdir -p /storage/6/mcbm2022/online_logs/$1 diff --git a/macro/beamtime/mcbm2022/online/mq_processing_node.sbatch b/macro/beamtime/mcbm2022/online/mq_processing_node.sbatch index 9584f6b418..fec21ca83f 100644 --- a/macro/beamtime/mcbm2022/online/mq_processing_node.sbatch +++ b/macro/beamtime/mcbm2022/online/mq_processing_node.sbatch @@ -458,7 +458,7 @@ EVTSINK+=" --id evtsink1" EVTSINK+=" --severity info" # EVTSINK+=" --severity debug" EVTSINK+=" --StoreFullTs 0" -# EVTSINK+=" --BypassConsecutiveTs true" +EVTSINK+=" --BypassConsecutiveTs true" EVTSINK+=" --WriteMissingTs false" EVTSINK+=" --DisableCompression true" EVTSINK+=" --TreeFileMaxSize 4000000000" @@ -472,7 +472,7 @@ EVTSINK+=" --PubFreqTs $_pubfreqts" EVTSINK+=" --PubTimeMin $_pubminsec" EVTSINK+=" --PubTimeMax $_pubmaxsec" EVTSINK+=" --EvtNameIn events" -EVTSINK+=" --channel-config name=events,type=pull,method=bind,transport=zeromq,rcvBufSize=$_nbbranch,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" +EVTSINK+=" --channel-config name=events,type=pull,method=bind,transport=zeromq,rcvBufSize=1,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" EVTSINK+=" --channel-config name=missedts,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11006,rateLogging=$_ratelog" EVTSINK+=" --channel-config name=commands,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11007,rateLogging=$_ratelog" EVTSINK+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://${_histServHost}:11666,rateLogging=$_ratelog" @@ -520,7 +520,7 @@ while (( _iBranch < _nbbranch )); do # fi UNPACKER+=" --TsNameOut unpts$_iBranch" UNPACKER+=" --channel-config name=ts-request,type=req,method=connect,transport=zeromq,address=tcp://127.0.0.1:11555,rateLogging=$_ratelog" - UNPACKER+=" --channel-config name=unpts$_iBranch,type=push,method=bind,transport=zeromq,sndBufSize=2,address=tcp://127.0.0.1:$_iPort,rateLogging=$_ratelog" + UNPACKER+=" --channel-config name=unpts$_iBranch,type=push,method=bind,transport=zeromq,sndBufSize=1,address=tcp://127.0.0.1:$_iPort,rateLogging=$_ratelog" # UNPACKER+=" --channel-config name=commands,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11007" UNPACKER+=" --channel-config name=parameters,type=req,method=connect,transport=zeromq,address=tcp://${_parServHost}:11005,rateLogging=0" UNPACKER+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://${_histServHost}:11666,rateLogging=$_ratelog" @@ -579,8 +579,8 @@ while (( _iBranch < _nbbranch )); do EVTBUILDER+=" --TsNameIn unpts$_iBranch" EVTBUILDER+=" --EvtNameOut events" # EVTBUILDER+=" --DoNotSend true" - EVTBUILDER+=" --channel-config name=unpts$_iBranch,type=pull,method=connect,transport=zeromq,rcvBufSize=2,address=tcp://127.0.0.1:$_iPort,rateLogging=$_ratelog" - EVTBUILDER+=" --channel-config name=events,type=push,method=connect,transport=zeromq,sndBufSize=2,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" + EVTBUILDER+=" --channel-config name=unpts$_iBranch,type=pull,method=connect,transport=zeromq,rcvBufSize=1,address=tcp://127.0.0.1:$_iPort,rateLogging=$_ratelog" + EVTBUILDER+=" --channel-config name=events,type=push,method=connect,transport=zeromq,sndBufSize=1,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" # EVTBUILDER+=" --channel-config name=commands,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11007" EVTBUILDER+=" --channel-config name=parameters,type=req,method=connect,transport=zeromq,address=tcp://${_parServHost}:11005,rateLogging=0" EVTBUILDER+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://${_histServHost}:11666,rateLogging=$_ratelog" diff --git a/macro/beamtime/mcbm2022/online/mq_processing_node_array.sbatch b/macro/beamtime/mcbm2022/online/mq_processing_node_array.sbatch index eb51bbfabe..9ecd1b27ed 100644 --- a/macro/beamtime/mcbm2022/online/mq_processing_node_array.sbatch +++ b/macro/beamtime/mcbm2022/online/mq_processing_node_array.sbatch @@ -452,7 +452,7 @@ case $SLURM_ARRAY_TASK_ID in EVTSINK+=" --PubTimeMin $_pubminsec" EVTSINK+=" --PubTimeMax $_pubmaxsec" EVTSINK+=" --EvtNameIn events" - EVTSINK+=" --channel-config name=events,type=pull,method=bind,transport=zeromq,rcvBufSize=$_nbbranch,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" + EVTSINK+=" --channel-config name=events,type=pull,method=bind,transport=zeromq,rcvBufSize=1,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" EVTSINK+=" --channel-config name=missedts,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11006,rateLogging=$_ratelog" EVTSINK+=" --channel-config name=commands,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11007,rateLogging=$_ratelog" EVTSINK+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11666,rateLogging=$_ratelog" @@ -498,7 +498,7 @@ case $SLURM_ARRAY_TASK_ID in UNPACKER+=" --TsNameOut unpts$_iBranch" UNPACKER+=" --channel-config name=ts-request,type=req,method=connect,transport=zeromq,address=tcp://127.0.0.1:11555,rateLogging=$_ratelog" UNPACKER+=" --channel-config name=parameters,type=req,method=connect,transport=zeromq,address=tcp://127.0.0.1:11005,rateLogging=0" - UNPACKER+=" --channel-config name=unpts$_iBranch,type=push,method=bind,transport=zeromq,sndBufSize=2,address=tcp://127.0.0.1:$_iPort,rateLogging=$_ratelog" + UNPACKER+=" --channel-config name=unpts$_iBranch,type=push,method=bind,transport=zeromq,sndBufSize=1,address=tcp://127.0.0.1:$_iPort,rateLogging=$_ratelog" # UNPACKER+=" --channel-config name=commands,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11007" UNPACKER+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11666,rateLogging=$_ratelog" UNPACKER+=" --transport zeromq" @@ -554,8 +554,8 @@ case $SLURM_ARRAY_TASK_ID in EVTBUILDER+=" --TsNameIn unpts$_iBranch" EVTBUILDER+=" --EvtNameOut events" EVTBUILDER+=" --DoNotSend true" - EVTBUILDER+=" --channel-config name=unpts$_iBranch,type=pull,method=connect,transport=zeromq,rcvBufSize=2,address=tcp://127.0.0.1:$_iPort,rateLogging=$_ratelog" - EVTBUILDER+=" --channel-config name=events,type=push,method=connect,transport=zeromq,sndBufSize=2,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" + EVTBUILDER+=" --channel-config name=unpts$_iBranch,type=pull,method=connect,transport=zeromq,rcvBufSize=1,address=tcp://127.0.0.1:$_iPort,rateLogging=$_ratelog" + EVTBUILDER+=" --channel-config name=events,type=push,method=connect,transport=zeromq,sndBufSize=1,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" # EVTBUILDER+=" --channel-config name=commands,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11007" EVTBUILDER+=" --channel-config name=parameters,type=req,method=connect,transport=zeromq,address=tcp://127.0.0.1:11005,rateLogging=0" EVTBUILDER+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11666,rateLogging=$_ratelog" diff --git a/macro/beamtime/mcbm2022/online/mq_processing_node_dev.sbatch b/macro/beamtime/mcbm2022/online/mq_processing_node_dev.sbatch new file mode 100644 index 0000000000..0426a6a9b7 --- /dev/null +++ b/macro/beamtime/mcbm2022/online/mq_processing_node_dev.sbatch @@ -0,0 +1,611 @@ +#!/bin/bash + +#SBATCH -J McbmOnline +#SBATCH --oversubscribe + +# Copyright (C) 2022 Facility for Antiproton and Ion Research in Europe, Darmstadt +# SPDX-License-Identifier: GPL-3.0-only +# author: Pierre-Alain Loizeau [committer] + +_histServHost="127.0.0.1" +_parServHost="127.0.0.1" +if [ $# -ge 5 ]; then + _run_id=$1 + _nbbranch=$2 + _TriggSet=$3 + _Disk=$4 + _hostname=$5 + if [ $# -ge 6 ]; then + _histServHost=$6 + if [ $# -eq 7 ]; then + _parServHost=$7 + fi + fi +else + echo 'Missing parameters. Only following pattern allowed:' + echo 'mq_processing_node.sbatch <Run Id> <Nb // branches> <Trigger set> <Storage disk index> <hostname:port>' + echo 'mq_processing_node.sbatch <Run Id> <Nb // branches> <Trigger set> <Storage disk index> <hostname:port> <hist serv host>' + echo 'mq_processing_node.sbatch <Run Id> <Nb // branches> <Trigger set> <Storage disk index> <hostname:port> <hist serv host> <par. serv host>' + + return -1 +fi + +# Prepare log folder variables +#_log_folder="/local/mcbm2022/online_logs/${_run_id}/" +_log_folder="/storage/6/mcbm2022/online_logs/${_run_id}/" +_localhost=`hostname` + +echo $SLURM_ARRAY_TASK_ID ${_localhost} ${_run_id} ${_nbbranch} ${_TriggSet} ${_hostname} + +# CBMROOT + FAIRMQ initialisation +_BuildDir=/scratch/loizeau/cbmroot_mcbm/build +source ${_BuildDir}/config.sh +# source /local/mcbm2022/install/config.sh +if [ -e $SIMPATH/bin/fairmq-shmmonitor ]; then + $SIMPATH/bin/fairmq-shmmonitor --cleanup +fi + +# Only one Processing branch is monitoring, and the full topology gets 2.5 TS/s, so with 10 branches pub may be ~10s +_pubfreqts=3 +_pubminsec=1.0 +_pubmaxsec=10.0 + +######################################################################################################################## + +# Setup file and parameter files for parameter server +_setup_name=mcbm_beam_2022_03_22_iron +_parfileBmon=$VMCWORKDIR/macro/beamtime/mcbm2022/mBmonCriPar.par +_parfileSts=$VMCWORKDIR/macro/beamtime/mcbm2022/mStsPar.par +_parfileTrdAsic=$VMCWORKDIR/parameters/trd/trd_v22d_mcbm.asic.par +_parfileTrdDigi=$VMCWORKDIR/parameters/trd/trd_v22d_mcbm.digi.par +_parfileTrdGas=$VMCWORKDIR/parameters/trd/trd_v22d_mcbm.gas.par +_parfileTrdGain=$VMCWORKDIR/parameters/trd/trd_v22d_mcbm.gain.par +_parfileTof=$VMCWORKDIR/macro/beamtime/mcbm2022/mTofCriPar.par +_parfileRich=$VMCWORKDIR/macro/beamtime/mcbm2021/mRichPar_70.par + +# Parameter files => Update depending on run ID!!! +if [ $_run_id -ge 2060 ]; then + if [ $_run_id -le 2065 ]; then + _setup_name=mcbm_beam_2022_03_09_carbon + _parfileTof=$VMCWORKDIR/macro/beamtime/mcbm2022/mTofCriParCarbon.par + elif [ $_run_id -le 2160 ]; then # Potentially wrong setup between 2065 and 2150 but not official runs + _setup_name=mcbm_beam_2022_03_22_iron + _parfileTof=$VMCWORKDIR/macro/beamtime/mcbm2022/mTofCriParIron.par + elif [ $_run_id -le 2310 ]; then # Potentially wrong setup between 2160 and 2176 but not official runs + _setup_name=mcbm_beam_2022_03_28_uranium + _parfileTrdAsic=$VMCWORKDIR/parameters/trd/trd_v22g_mcbm.asic.par + _parfileTrdDigi=$VMCWORKDIR/parameters/trd/trd_v22g_mcbm.digi.par + _parfileTrdGas=$VMCWORKDIR/parameters/trd/trd_v22g_mcbm.gas.par + _parfileTrdGain=$VMCWORKDIR/parameters/trd/trd_v22g_mcbm.gain.par + _parfileTof=$VMCWORKDIR/macro/beamtime/mcbm2022/mTofCriParUranium.par + elif [ $_run_id -ge 2350 ]; then + _setup_name=mcbm_beam_2022_05_23_nickel + _parfileTrdAsic=$VMCWORKDIR/parameters/trd/trd_v22h_mcbm.asic.par + _parfileTrdDigi=$VMCWORKDIR/parameters/trd/trd_v22h_mcbm.digi.par + _parfileTrdGas=$VMCWORKDIR/parameters/trd/trd_v22h_mcbm.gas.par + _parfileTrdGain=$VMCWORKDIR/parameters/trd/trd_v22h_mcbm.gain.par + _parfileTof=$VMCWORKDIR/macro/beamtime/mcbm2022/mTofCriParUranium.par + fi +fi + +######################################################################################################################## + +# Apply sets of settings for different triggers +_UnpTimeOffsBMon=0 +_UnpTimeOffsSts=-970 +_UnpTimeOffsTrd1d=1225 +_UnpTimeOffsTrd2d=-525 +_UnpTimeOffsTof=45 +_UnpTimeOffsRich=95 + +######################################################################################################################## + +# Apply sets of settings for different triggers +_TriggerMinNumberBmon=0 +_TriggerMinNumberSts=0 +_TriggerMinNumberTrd1d=0 +_TriggerMinNumberTrd2d=0 +_TriggerMinNumberTof=4 +_TriggerMinNumberRich=0 + +_TriggerMaxNumberBMon=-1 +_TriggerMaxNumberSts=-1 +_TriggerMaxNumberTrd1d=-1 +_TriggerMaxNumberTrd2d=-1 +_TriggerMaxNumberTof=-1 +_TriggerMaxNumberRich=-1 + +_TriggerMinLayersNumberTof=0 +_TriggerMinLayersNumberSts=0 + +_TrigWinMinBMon=-10 +_TrigWinMaxBMon=10 +_TrigWinMinSts=-40 +_TrigWinMaxSts=40 +_TrigWinMinTrd1d=-50 +_TrigWinMaxTrd1d=400 +_TrigWinMinTrd2d=-60 +_TrigWinMaxTrd2d=350 +_TrigWinMinTof=-10 +_TrigWinMaxTof=70 +_TrigWinMinRich=-10 +_TrigWinMaxRich=40 + +bTrigSet=true; +case ${_TriggSet} in + 0) + # NH: default, any Tof hit + _TriggerMaxNumberBMon=1000 + + _TriggerMinNumberTof=1 + + _TrigWinMinBMon=-50 + _TrigWinMaxBMon=50 + _TrigWinMinSts=-60 + _TrigWinMaxSts=60 + _TrigWinMinTrd1d=-300 + _TrigWinMaxTrd1d=300 + _TrigWinMinTrd2d=-200 + _TrigWinMaxTrd2d=200 + + _TrigWinMinTof=-80 + _TrigWinMaxTof=120 + + _TrigWinMinRich=-60 + _TrigWinMaxRich=60 + ;; + + 1) + # NH: default, Tof - T0 concidences (pulser) + _TriggerMinNumberBmon=1 + _TriggerMaxNumberBMon=1000 + + _TriggerMinNumberTof=2 + _TriggerMinLayersNumberTof=1 + + _TrigWinMinBMon=-50 + _TrigWinMaxBMon=50 + _TrigWinMinSts=-60 + _TrigWinMaxSts=60 + _TrigWinMinTrd1d=-300 + _TrigWinMaxTrd1d=300 + _TrigWinMinTrd2d=-200 + _TrigWinMaxTrd2d=200 + + _TrigWinMinTof=-180 + _TrigWinMaxTof=220 + + _TrigWinMinRich=-60 + _TrigWinMaxRich=60 + ;; + + 2) + # NH: Tof standalone track trigger (cosmic) + _TriggerMaxNumberBMon=1000 + + _TriggerMinNumberTof=8 + _TriggerMinLayersNumberTof=4 + + _TrigWinMinBMon=-50 + _TrigWinMaxBMon=50 + _TrigWinMinSts=-60 + _TrigWinMaxSts=60 + _TrigWinMinTrd1d=-300 + _TrigWinMaxTrd1d=300 + _TrigWinMinTrd2d=-200 + _TrigWinMaxTrd2d=200 + + _TrigWinMinTof=-30 + _TrigWinMaxTof=70 + + _TrigWinMinRich=-60 + _TrigWinMaxRich=60 + ;; + + 3) + # NH: Tof track trigger with T0 + _TriggerMinNumberBmon=1 + _TriggerMaxNumberBMon=2 + + _TriggerMinNumberTof=8 + _TriggerMinLayersNumberTof=4 + + _TrigWinMinBMon=-50 + _TrigWinMaxBMon=50 + _TrigWinMinSts=-60 + _TrigWinMaxSts=60 + _TrigWinMinTrd1d=-300 + _TrigWinMaxTrd1d=300 + _TrigWinMinTrd2d=-200 + _TrigWinMaxTrd2d=200 + _TrigWinMinTof=-20 + _TrigWinMaxTof=60 + _TrigWinMinRich=-60 + _TrigWinMaxRich=60 + ;; + + 4) + # NH: mCbm track trigger Tof, T0 & STS + _TriggerMinNumberBmon=1 + _TriggerMaxNumberBMon=2 + + _TriggerMinNumberSts=2 + _TriggerMinLayersNumberSts=1 + + _TriggerMinNumberTof=8 + _TriggerMinLayersNumberTof=4 + + _TrigWinMinBMon=-50 + _TrigWinMaxBMon=50 + _TrigWinMinSts=-60 + _TrigWinMaxSts=60 + _TrigWinMinTrd1d=-300 + _TrigWinMaxTrd1d=300 + _TrigWinMinTrd2d=-200 + _TrigWinMaxTrd2d=200 + _TrigWinMinTof=-20 + _TrigWinMaxTof=60 + _TrigWinMinRich=-60 + _TrigWinMaxRich=60 + ;; + + 5) + # NH: mCbm lambda trigger + _TriggerMinNumberBmon=1 + _TriggerMaxNumberBMon=2 + + _TriggerMinNumberSts=8 + _TriggerMinLayersNumberSts=2 + + _TriggerMinNumberTof=16 + _TriggerMinLayersNumberTof=8 + + _TrigWinMinBMon=-50 + _TrigWinMaxBMon=50 + _TrigWinMinSts=-60 + _TrigWinMaxSts=60 + _TrigWinMinTrd1d=-300 + _TrigWinMaxTrd1d=300 + _TrigWinMinTrd2d=-200 + _TrigWinMaxTrd2d=200 + _TrigWinMinTof=-20 + _TrigWinMaxTof=60 + _TrigWinMinRich=-60 + _TrigWinMaxRich=60 + ;; + + 6) + # NH: One hit per detector system w/ big acceptance=mCbm full track trigger + _TriggerMinNumberBmon=1 + _TriggerMaxNumberBMon=1; + + _TriggerMinNumberSts=4 + _TriggerMinLayersNumberSts=0 + + _TriggerMinNumberTrd1d=2 + + _TriggerMinNumberTof=8 + _TriggerMinLayersNumberTof=4 + + _TrigWinMinBMon=-50 + _TrigWinMaxBMon=50 + _TrigWinMinSts=-60 + _TrigWinMaxSts=60 + _TrigWinMinTrd1d=-300 + _TrigWinMaxTrd1d=300 + _TrigWinMinTrd2d=-200 + _TrigWinMaxTrd2d=200 + _TrigWinMinTof=-20 + _TrigWinMaxTof=60 + _TrigWinMinRich=-60 + _TrigWinMaxRich=60 + ;; + + 7) + # PAL default: T0 + STS + TOF, only digi cut + _TriggerMinNumberBmon=1 + _TriggerMinNumberSts=2 + _TriggerMinNumberTof=4 + ;; + + 8) + # PAL: default, Tof - T0 concidences (pulser) + _TriggerMinNumberBmon=4 + _TriggerMinNumberTof=2 + _TriggerMinLayersNumberTof=1 + ;; + + 9) + # PAL: Tof standalone track trigger (cosmic) + _TriggerMinNumberTof=8 + _TriggerMinLayersNumberTof=4 + ;; + + 10) + # PAL: Tof track trigger with T0 + _TriggerMinNumberBmon=1 + _TriggerMinNumberTof=8 + _TriggerMinLayersNumberTof=4 + ;; + + 11) + # PAL: mCbm track trigger Tof, T0 & STS + _TriggerMinNumberBmon=1 + _TriggerMinNumberSts=2 + _TriggerMinNumberTof=8 + _TriggerMinLayersNumberTof=4 + ;; + + 12) + # PAL: mCbm lambda trigger + _TriggerMinNumberBmon=1 + _TriggerMinNumberSts=8 + _TriggerMinNumberTof=16 + _TriggerMinLayersNumberTof=8 + ;; + + 13) + # PAL: One hit per detector system w/ big acceptance=mCbm full track trigger + TriggerMinNumberBmon=1 + _TriggerMinNumberSts=4 + _TriggerMinNumberTrd1d=2 + _TriggerMinNumberTrd1d=1 + _TriggerMinNumberTof=8 + _TriggerMinNumberRich=1 + ;; + + 14) + # PAL: mCbm track trigger Tof, T0 & STS + _TriggerMinNumberBmon=1 + _TriggerMinNumberSts=4 + _TriggerMinNumberTof=8 + _TriggerMinLayersNumberTof=4 + _TriggerMinLayersNumberSts=2 + ;; + + *) + bTrigSet=false; + ;; + +esac +echo Using MQ trigger par set: ${_TriggSet} + +######################################################################################################################## + +_ratelog=0 # hides ZMQ messages rates and bandwidth +#_ratelog=1 # display ZMQ messages rates and bandwidth + +# ProcessName_runid_trigset_hostname_yyyy_mm_dd_hh_mm_ss.log +LOGFILETAG="${_run_id}_${_TriggSet}_${_localhost}_" +LOGFILETAG+=`date +%Y_%m_%d_%H_%M_%S` +LOGFILETAG+=".log" + +######################################################################################################################## + +############################ +# Histogram server # +############################ +if [[ ${_histServHost} == "127.0.0.1" ]]; then + HISTSRV_LOG="${_log_folder}server1_${LOGFILETAG}" + + HISTSERVER="MqHistoServer" + HISTSERVER+=" --control static" + HISTSERVER+=" --id server1" + HISTSERVER+=" --severity info" + HISTSERVER+=" --histport 8080" + HISTSERVER+=" --channel-config name=histogram-in,type=sub,method=bind,transport=zeromq,address=tcp://127.0.0.1:11666,rateLogging=$_ratelog" + HISTSERVER+=" --channel-config name=histo-conf,type=sub,method=bind,transport=zeromq,address=tcp://127.0.0.1:11667,rateLogging=0" + HISTSERVER+=" --channel-config name=canvas-conf,type=sub,method=bind,transport=zeromq,address=tcp://127.0.0.1:11668,rateLogging=0" + + echo ${_BuildDir}/bin/MQ/histogramServer/$HISTSERVER &> $HISTSRV_LOG & + ${_BuildDir}/bin/MQ/histogramServer/$HISTSERVER &> $HISTSRV_LOG & +fi + +############################ +# Sampler # +############################ +SAMPLER_LOG="${_log_folder}sampler1_${LOGFILETAG}" + +SAMPLER="RepReqTsSampler" +SAMPLER+=" --control static" +SAMPLER+=" --id sampler1" +SAMPLER+=" --max-timeslices -1" +SAMPLER+=" --severity info" +SAMPLER+=" --fles-host $_hostname" +SAMPLER+=" --high-water-mark 10" +SAMPLER+=" --no-split-ts 1" +SAMPLER+=" --ChNameMissTs missedts" +SAMPLER+=" --ChNameCmds commands" +SAMPLER+=" --PubFreqTs $_pubfreqts" +SAMPLER+=" --PubTimeMin $_pubminsec" +SAMPLER+=" --PubTimeMax $_pubmaxsec" +SAMPLER+=" --channel-config name=ts-request,type=rep,method=bind,transport=zeromq,address=tcp://127.0.0.1:11555,rateLogging=$_ratelog" +SAMPLER+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://${_histServHost}:11666,rateLogging=$_ratelog" +SAMPLER+=" --channel-config name=missedts,type=pub,method=bind,address=tcp://127.0.0.1:11006,rateLogging=$_ratelog" +SAMPLER+=" --channel-config name=commands,type=pub,method=bind,address=tcp://127.0.0.1:11007,rateLogging=$_ratelog" +SAMPLER+=" --transport zeromq" + +echo ${_BuildDir}/bin/MQ/source/$SAMPLER &> $SAMPLER_LOG & +${_BuildDir}/bin/MQ/source/$SAMPLER &> $SAMPLER_LOG & + +############################ +# Parameter server # +############################ +if [[ ${_parServHost} == "127.0.0.1" ]]; then + PARAMSRV_LOG="${_log_folder}parmq_${LOGFILETAG}" + + PARAMETERSERVER="parmq-server" + PARAMETERSERVER+=" --control static" + PARAMETERSERVER+=" --id parmq-server" + PARAMETERSERVER+=" --severity info" + PARAMETERSERVER+=" --channel-name parameters" + PARAMETERSERVER+=" --channel-config name=parameters,type=rep,method=bind,transport=zeromq,address=tcp://127.0.0.1:11005,rateLogging=0" + PARAMETERSERVER+=" --first-input-name $_parfileSts;$_parfileTrdAsic;$_parfileTrdDigi;$_parfileTrdGas;$_parfileTrdGain;$_parfileTof;$_parfileBmon;$_parfileRich" + PARAMETERSERVER+=" --first-input-type ASCII" + PARAMETERSERVER+=" --setup $_setup_name" + + echo ${_BuildDir}/bin/MQ/parmq/$PARAMETERSERVER &> $PARAMSRV_LOG & + ${_BuildDir}/bin/MQ/parmq/$PARAMETERSERVER &> $PARAMSRV_LOG & +fi + +############################ +# Event Sink # +############################ +EVTSINK_LOG="${_log_folder}evtsink1_${LOGFILETAG}" + +EVTSINK="DigiEventSink" +EVTSINK+=" --control static" +EVTSINK+=" --id evtsink1" +EVTSINK+=" --severity info" +# EVTSINK+=" --severity debug" +EVTSINK+=" --StoreFullTs 0" +EVTSINK+=" --BypassConsecutiveTs true" +EVTSINK+=" --WriteMissingTs false" +# EVTSINK+=" --DisableCompression true" +EVTSINK+=" --TreeFileMaxSize 4000000000" +EVTSINK+=" --DigiEventInput true" +if [ ${_Disk} -eq 0 ]; then + EVTSINK+=" --OutFileName /local/mcbm2022/data/${_run_id}_${_TriggSet}_${_localhost}.digi_events.root" +else + EVTSINK+=" --OutFileName /storage/${_Disk}/mcbm2022/data/${_run_id}_${_TriggSet}_${_localhost}.digi_events.root" +fi +EVTSINK+=" --FillHistos true" +EVTSINK+=" --PubFreqTs $_pubfreqts" +EVTSINK+=" --PubTimeMin $_pubminsec" +EVTSINK+=" --PubTimeMax $_pubmaxsec" +EVTSINK+=" --EvtNameIn events" +EVTSINK+=" --channel-config name=events,type=pull,method=bind,transport=zeromq,rcvBufSize=1,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" +EVTSINK+=" --channel-config name=missedts,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11006,rateLogging=$_ratelog" +EVTSINK+=" --channel-config name=commands,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11007,rateLogging=$_ratelog" +EVTSINK+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://${_histServHost}:11666,rateLogging=$_ratelog" + +echo ${_BuildDir}/bin/MQ/mcbm/$EVTSINK &> $EVTSINK_LOG & +${_BuildDir}/bin/MQ/mcbm/$EVTSINK &> $EVTSINK_LOG & + +############################ +# Processing branches # +############################ +_iBranch=0 +while (( _iBranch < _nbbranch )); do + (( _iPort = 11680 + _iBranch )) + + ########################## + # Unpacker # + ########################## + UNPACKER_LOG="${_log_folder}unp${_iBranch}_${LOGFILETAG}" + + UNPACKER="MqUnpack" + UNPACKER+=" --control static" + UNPACKER+=" --id unp$_iBranch" +# UNPACKER+=" --severity error" + UNPACKER+=" --severity info" +# UNPACKER+=" --severity debug" + UNPACKER+=" --Setup $_setup_name" + UNPACKER+=" --RunId $_run_id" + UNPACKER+=" --IgnOverMs false" + UNPACKER+=" --UnpBmon true" + UNPACKER+=" --UnpMuch false" + UNPACKER+=" --UnpPsd false" + UNPACKER+=" --SetTimeOffs kT0,${_UnpTimeOffsBMon}" + UNPACKER+=" --SetTimeOffs kSTS,${_UnpTimeOffsSts}" + UNPACKER+=" --SetTimeOffs kTRD,${_UnpTimeOffsTrd1d}" + UNPACKER+=" --SetTimeOffs kTRD2D,${_UnpTimeOffsTrd2d}" + UNPACKER+=" --SetTimeOffs kTOF,${_UnpTimeOffsTof}" + UNPACKER+=" --SetTimeOffs kRICH,${_UnpTimeOffsRich}" + UNPACKER+=" --PubFreqTs $_pubfreqts" + UNPACKER+=" --PubTimeMin $_pubminsec" + UNPACKER+=" --PubTimeMax $_pubmaxsec" +# if [ ${_iBranch} -eq 0 ]; then +# UNPACKER+=" --FillHistos true" +# else +# UNPACKER+=" --FillHistos false" +# fi + UNPACKER+=" --TsNameOut unpts$_iBranch" + UNPACKER+=" --channel-config name=ts-request,type=req,method=connect,transport=zeromq,address=tcp://127.0.0.1:11555,rateLogging=$_ratelog" + UNPACKER+=" --channel-config name=unpts$_iBranch,type=push,method=bind,transport=zeromq,sndBufSize=1,address=tcp://127.0.0.1:$_iPort,rateLogging=$_ratelog" +# UNPACKER+=" --channel-config name=commands,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11007" + UNPACKER+=" --channel-config name=parameters,type=req,method=connect,transport=zeromq,address=tcp://${_parServHost}:11005,rateLogging=0" + UNPACKER+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://${_histServHost}:11666,rateLogging=$_ratelog" + UNPACKER+=" --transport zeromq" + + echo ${_BuildDir}/bin/MQ/mcbm/$UNPACKER &> $UNPACKER_LOG & + ${_BuildDir}/bin/MQ/mcbm/$UNPACKER &> $UNPACKER_LOG & + + ########################## + # Event Builder # + ########################## + EVTBUILDER_LOG="${_log_folder}build${_iBranch}_${LOGFILETAG}" + + EVTBUILDER="BuildDigiEvents" + EVTBUILDER+=" --control static" + EVTBUILDER+=" --id build$_iBranch" + EVTBUILDER+=" --severity info" +# EVTBUILDER+=" --severity debug" + EVTBUILDER+=" --PubFreqTs $_pubfreqts" + EVTBUILDER+=" --PubTimeMin $_pubminsec" + EVTBUILDER+=" --PubTimeMax $_pubmaxsec" +# if [ ${_iBranch} -eq 0 ]; then +# EVTBUILDER+=" --FillHistos true" +# else + EVTBUILDER+=" --FillHistos false" +# fi + EVTBUILDER+=" --IgnTsOver false" + EVTBUILDER+=" --EvtOverMode AllowOverlap" + EVTBUILDER+=" --RefDet kT0" + EVTBUILDER+=" --DelDet kMuch" + EVTBUILDER+=" --DelDet kPsd" + + EVTBUILDER+=" --SetTrigWin kT0,${_TrigWinMinBMon},${_TrigWinMaxBMon}" + EVTBUILDER+=" --SetTrigWin kSts,${_TrigWinMinSts},${_TrigWinMaxSts}" + EVTBUILDER+=" --SetTrigWin kTrd,${_TrigWinMinTrd1d},${_TrigWinMaxTrd1d}" + EVTBUILDER+=" --SetTrigWin kTrd2D,${_TrigWinMinTrd2d},${_TrigWinMaxTrd2d}" + EVTBUILDER+=" --SetTrigWin kTof,${_TrigWinMinTof},${_TrigWinMaxTof}" + EVTBUILDER+=" --SetTrigWin kRich,${_TrigWinMinRich},${_TrigWinMaxRich}" + + EVTBUILDER+=" --SetTrigMinNb kT0,${_TriggerMinNumberBmon}" + EVTBUILDER+=" --SetTrigMinNb kSts,${_TriggerMinNumberSts}" + EVTBUILDER+=" --SetTrigMinNb kTrd,${_TriggerMinNumberTrd1d}" + EVTBUILDER+=" --SetTrigMinNb kTrd2D,${_TriggerMinNumberTrd2d}" + EVTBUILDER+=" --SetTrigMinNb kTof,${_TriggerMinNumberTof}" + EVTBUILDER+=" --SetTrigMinNb kRich,${_TriggerMinNumberRich}" + + EVTBUILDER+=" --SetTrigMaxNb kT0,${_TriggerMaxNumberBMon}" + EVTBUILDER+=" --SetTrigMaxNb kSts,${_TriggerMaxNumberSts}" + EVTBUILDER+=" --SetTrigMaxNb kTrd,${_TriggerMaxNumberTrd1d}" + EVTBUILDER+=" --SetTrigMaxNb kTrd2D,${_TriggerMaxNumberTrd2d}" + EVTBUILDER+=" --SetTrigMaxNb kTof,${_TriggerMaxNumberTof}" + EVTBUILDER+=" --SetTrigMaxNb kRich,${_TriggerMaxNumberRich}" + + EVTBUILDER+=" --SetTrigMinLayersNb kTof,${_TriggerMinLayersNumberTof}" + EVTBUILDER+=" --SetTrigMinLayersNb kSts,${_TriggerMinLayersNumberSts}" + + EVTBUILDER+=" --TsNameIn unpts$_iBranch" + EVTBUILDER+=" --EvtNameOut events" +# EVTBUILDER+=" --DoNotSend true" + EVTBUILDER+=" --DigiEventOutput true" + + EVTBUILDER+=" --channel-config name=unpts$_iBranch,type=pull,method=connect,transport=zeromq,rcvBufSize=1,address=tcp://127.0.0.1:$_iPort,rateLogging=$_ratelog" + EVTBUILDER+=" --channel-config name=events,type=push,method=connect,transport=zeromq,sndBufSize=1,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" +# EVTBUILDER+=" --channel-config name=commands,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11007" + EVTBUILDER+=" --channel-config name=parameters,type=req,method=connect,transport=zeromq,address=tcp://${_parServHost}:11005,rateLogging=0" + EVTBUILDER+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://${_histServHost}:11666,rateLogging=$_ratelog" + EVTBUILDER+=" --transport zeromq" + + echo ${_BuildDir}/bin/MQ/mcbm/$EVTBUILDER &> $EVTBUILDER_LOG & + ${_BuildDir}/bin/MQ/mcbm/$EVTBUILDER &> $EVTBUILDER_LOG & + (( _iBranch += 1 )) +done + +STILL_RUNNING=`ps | wc -l` +STILL_RUNNING_OUT="${STILL_RUNNING}\n" +STILL_RUNNING_OUT+=`ps` +echo `date` "${STILL_RUNNING_OUT}" > ${_log_folder}/still_running.txt +while [ 6 -lt $STILL_RUNNING ]; do + sleep 5 +# ps +# echo `ps | wc -l` + STILL_RUNNING=`ps | wc -l` + STILL_RUNNING_OUT="${STILL_RUNNING}\n" + STILL_RUNNING_OUT+=`ps` + echo `date` "${STILL_RUNNING_OUT}" > ${_log_folder}/still_running.txt +done diff --git a/macro/beamtime/mcbm2022/online/mq_sink.sbatch b/macro/beamtime/mcbm2022/online/mq_sink.sbatch index a354b0a32e..21c060eba8 100644 --- a/macro/beamtime/mcbm2022/online/mq_sink.sbatch +++ b/macro/beamtime/mcbm2022/online/mq_sink.sbatch @@ -86,7 +86,7 @@ EVTSINK+=" --PubFreqTs $_pubfreqts" EVTSINK+=" --PubTimeMin $_pubminsec" EVTSINK+=" --PubTimeMax $_pubmaxsec" EVTSINK+=" --EvtNameIn events" -EVTSINK+=" --channel-config name=events,type=pull,method=bind,transport=zeromq,rcvBufSize=$_nbbranch,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" +EVTSINK+=" --channel-config name=events,type=pull,method=bind,transport=zeromq,rcvBufSize=1,address=tcp://127.0.0.1:11556,rateLogging=$_ratelog" EVTSINK+=" --channel-config name=missedts,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11006,rateLogging=$_ratelog" EVTSINK+=" --channel-config name=commands,type=sub,method=connect,transport=zeromq,address=tcp://127.0.0.1:11007,rateLogging=$_ratelog" EVTSINK+=" --channel-config name=histogram-in,type=pub,method=connect,transport=zeromq,address=tcp://${_histServHost}:11666,rateLogging=$_ratelog" diff --git a/macro/beamtime/mcbm2022/online/replay.sbatch b/macro/beamtime/mcbm2022/online/replay.sbatch new file mode 100644 index 0000000000..d562714bf7 --- /dev/null +++ b/macro/beamtime/mcbm2022/online/replay.sbatch @@ -0,0 +1,46 @@ +#!/bin/bash + +#SBATCH -J Replay + +# Copyright (C) 2022 Facility for Antiproton and Ion Research in Europe, Darmstadt +# SPDX-License-Identifier: GPL-3.0-only +# author: Pierre-Alain Loizeau [committer] + +if [ $# -ge 2 ]; then + _run_id=$1 + _port=$2 +else + echo 'Missing parameters. Only following pattern allowed:' + echo 'replay.sbatch <Run Id> <port>' + + return -1 +fi + +Filename1=/storage/1/data/${_run_id}_*_1_*.tsa +Filename2=/storage/2/data/${_run_id}_*_2_*.tsa +Filename3=/storage/3/data/${_run_id}_*_3_*.tsa +Filename4=/storage/4/data/${_run_id}_*_4_*.tsa +Filename5=/storage/5/data/${_run_id}_*_5_*.tsa +Filename6=/storage/7/data/${_run_id}_*_6_*.tsa +Filename7=/storage/8/data/${_run_id}_*_7_*.tsa +Filename8=/storage/6/data/${_run_id}_*_8_*.tsa +Filename9=/storage/6/data/${_run_id}_*_9_*.tsa + +HOSTNAME=`hostname` +HOST=${HOSTNAME:4:2} +# Force cast to base 10 to avoid wrong octal base assumption +HOSTCLEAN=$((10#${HOST})) + +## => Hostname and port for replay toward virgo +#hostnameIB="cbmfles"$HOSTNAME +#ipaddrIB=`dig $hostnameIB.gsi.de +short` +#Port=$((5550 + $HOSTCLEAN)) + +## => Hostname and port for replay withing mFLES (could also be set to "*") +ipaddrIB=* + +LogFile=/home/loizeau/rep_${HOSTNAME}_${_run_id}.log + +echo "${ipaddrIB}" $Port "${Filename1};${Filename2};${Filename3};${Filename4};${Filename5};${Filename6};${Filename7};${Filename8};${Filename9}" + +tsclient -i file:"${Filename1};${Filename2};${Filename3};${Filename4};${Filename5};${Filename6};${Filename7};${Filename8};${Filename9}"? -P "tcp://${ipaddrIB}:${_port}" --publish-hwm 100 --rate-limit 2 &> $LogFile # ~real readout diff --git a/macro/beamtime/mcbm2022/online/start_topology_dev.sh b/macro/beamtime/mcbm2022/online/start_topology_dev.sh new file mode 100755 index 0000000000..75801f848f --- /dev/null +++ b/macro/beamtime/mcbm2022/online/start_topology_dev.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright (C) 2022 Facility for Antiproton and Ion Research in Europe, Darmstadt +# SPDX-License-Identifier: GPL-3.0-only +# author: Pierre-Alain Loizeau [committer] + +if [ $# -eq 4 ]; then + _run_id=$1 + _nbbranch=$2 + _TriggSet=$3 + _Disk=$4 + if [ ${_nbbranch} -eq 0 ]; then + echo 'Nb branches cannot be 0! At least one branch is needed!' + return -1 + fi + if [ ${_Disk} -lt 0 ] || [ ${_Disk} -gt 7 ]; then + echo 'Disk index on the en13 nodes can only be in [0-7]!' + return -1 + fi +else + echo 'Missing parameters. Only following pattern allowed:' + echo 'start_topology.sh <Run Id> <Nb // branches> <Trigger set> <Storage disk index>' + + return -1 +fi + +((_nbjobs = 4 + $_nbbranch*2 )) +#_log_folder="/local/mcbm2022/online_logs/${_run_id}" +_log_folder="/storage/6/mcbm2022/online_logs/${_run_id}" +_log_config="-D ${_log_folder} -o ${_run_id}_%A_%a.out.log -e ${_run_id}_%A_%a.err.log" + +# Create the log folders +sbatch -w en13 create_log_folder_dev.sbatch ${_run_id} +sleep 2 + +# Online ports +#sbatch -w en13 ${_log_config} mq_processing_node.sbatch ${_run_id} ${_nbbranch} ${_TriggSet} ${_Disk} node8ib2:5560 + +# Replay ports +sbatch -w en13 ${_log_config} mq_processing_node_dev.sbatch ${_run_id} ${_nbbranch} ${_TriggSet} ${_Disk} node8ib2:5557 +sleep 10 + +# Replay job +sbatch -w node8 replay.sbatch ${_run_id} 5557 -- GitLab