diff --git a/bin/qtl b/bin/qtl index 95f431fcb..abfe8de05 100755 --- a/bin/qtl +++ b/bin/qtl @@ -18,6 +18,7 @@ usage() { physics Generate and analyze physics QA timelines (Step 2) error Scan for errors in Slurm logs (for Step 1) reheat Reproduce a data file, e.g., to rerun postprocessing + xtrain Cross check run list from trains and DSTs OPTIONS: Each command has its own set of options; run a command with no additional options to see usage for that command. @@ -41,6 +42,7 @@ case $cmd in ph*) exec $TIMELINESRC/bin/qtl-physics "$@" ;; er*) exec $TIMELINESRC/bin/qtl-error "$@" ;; re*) exec $TIMELINESRC/bin/qtl-reheat "$@" ;; + xt*) exec $TIMELINESRC/bin/qtl-xtrain "$@" ;; -v|--version) echo $(mvn -q help:evaluate -Dexpression=project.version -DforceStdout -f $TIMELINESRC/pom.xml || echo "UNKNOWN") exit 0 diff --git a/bin/qtl-xtrain b/bin/qtl-xtrain new file mode 100755 index 000000000..11fb755aa --- /dev/null +++ b/bin/qtl-xtrain @@ -0,0 +1,73 @@ +#!/usr/bin/env ruby + +require 'set' + +unless ARGV.length == 2 + puts """ + Verify that a directory of a train's skim files has the same list of + run numbers as a directory of DST-file run directories. + + USAGE: qtl xtrain [TRAIN_DIR] [DST_DIR] + + Both directories must be on /mss + """ + exit 2 +end +train_dir, dst_dir = ARGV + +# function to get a set of run numbers from one of the argument dirs +def get_runnums(path, type) + runnums = Set.new + raise "#{type} dir `#{path}` is not on /mss" unless path.match? /^\/mss\// + raise "#{type} dir `#{path}` does not exist" unless Dir.exist? path + + # get list of files/directories within + files = [] + case type + when :train + files = Dir.glob File.join(path, '*.hipo') + when :DST + files = Dir.glob File.join(path, '*/') + else + raise 'bad type' + end + raise "no #{type} files found in #{type} dir `#{path}`" if files.empty? + + # extract their run numbers + files.each do |file| + nums = File.basename(file).scan(/\d+/).map &:to_i + raise "failed to get run number from #{type} object `#{file}`" unless nums.length == 1 + runnums << nums[0] + end + raise "failed to get run numbers from #{type} dir `#{path}`" if runnums.empty? + runnums +end + +# get runnum lists +train_runs = get_runnums train_dir, :train +dst_runs = get_runnums dst_dir, :DST +puts """---------------------------------------------------------------------------------- +train dir run list: +#{train_runs} +DST dir run list: +#{dst_runs} +----------------------------------------------------------------------------------""" + +# compare runnum sets +only_in_trains = train_runs - dst_runs +only_in_dsts = dst_runs - train_runs + +# return results +code = 0 +unless only_in_trains.empty? + $stderr.puts "ERROR: there are runs with skim files, but no corresponding DST-file directories:" + $stderr.puts only_in_trains + code = 1 +end +unless only_in_dsts.empty? + $stderr.puts "ERROR: there are runs with DST-file directories, but no corresponding skim files:" + $stderr.puts only_in_dsts + code = 1 +end +puts "All good" if code == 0 +exit code diff --git a/doc/qa.md b/doc/qa.md index 16bed5688..f08093694 100644 --- a/doc/qa.md +++ b/doc/qa.md @@ -49,6 +49,14 @@ If you are performing a manual QA as part of a cross check, skip to the next sec - use the scripts in the [`prescaler/` directory](/qadb/prescaler) +
+- [ ] cross check run list from trains and from DSTs + +- use `qtl xtrain` to make sure the list of DST runs is consistent with the list of runs from a train + - sometimes there are missing train files + - the script also checks for missing DST files (though that should be impossible to happen) +
+
- [ ] make sure all data are cached diff --git a/qadb/notes/rga_fa18.md b/qadb/notes/rga_fa18.md index 97f04c7d5..eca5a600c 100644 --- a/qadb/notes/rga_fa18.md +++ b/qadb/notes/rga_fa18.md @@ -7,15 +7,21 @@ We will use the `nSidis` train. -First make sure all skim files are cached: +Cross check the train and DST run lists: ```bash -qtl histogram -d rga_fa18_inbending_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/train/nSidis -qtl histogram -d rga_fa18_outbending_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus+1/pass2/train/nSidis +bin/qtl xtrain /mss/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/train/nSidis /mss/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/dst/recon/ +bin/qtl xtrain /mss/clas12/rg-a/production/recon/fall2018/torus+1/pass2/train/nSidis /mss/clas12/rg-a/production/recon/fall2018/torus+1/pass2/dst/recon/ +``` + +Make sure all skim files are cached: +```bash +bin/qtl histogram -d rga_fa18_inbending_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/train/nSidis +bin/qtl histogram -d rga_fa18_outbending_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus+1/pass2/train/nSidis ``` then run monitoring ```bash -qtl histogram -d rga_fa18_inbending_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/train/nSidis -qtl histogram -d rga_fa18_outbending_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus+1/pass2/train/nSidis +bin/qtl histogram -d rga_fa18_inbending_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/train/nSidis +bin/qtl histogram -d rga_fa18_outbending_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus+1/pass2/train/nSidis ``` ## Double check that we have all the runs diff --git a/qadb/notes/rga_sp19.md b/qadb/notes/rga_sp19.md index 174329bb0..73d48bab4 100644 --- a/qadb/notes/rga_sp19.md +++ b/qadb/notes/rga_sp19.md @@ -26,12 +26,12 @@ start-workflow.sh rga-a-sp19*.json ## check that this is the correct JSON file For the prescaled train: ```bash -qtl histogram -d rga_sp19_prescaled --submit --focus-physics PATH_TO_PRESCALED_TRAIN +bin/qtl histogram -d rga_sp19_prescaled --submit --focus-physics PATH_TO_PRESCALED_TRAIN ``` For the SIDIS train, `nSidis`, first make sure all skim files are cached: ```bash -qtl histogram -d rga_sp19_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis +bin/qtl histogram -d rga_sp19_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis ``` If they are not: ```bash @@ -39,9 +39,13 @@ ls /mss/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis/* jcache get $(cat jlist.txt) # then wait for them to be cached ``` +Cross check the train and DST run lists: +```bash +bin/qtl xtrain /mss/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis /mss/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/recon +``` then run monitoring ```bash -qtl histogram -d rga_sp19_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis +bin/qtl histogram -d rga_sp19_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis ``` ## Make timelines diff --git a/qadb/notes/rgb_fa19.md b/qadb/notes/rgb_fa19.md index a8ef12cd7..61ccbc4b9 100644 --- a/qadb/notes/rgb_fa19.md +++ b/qadb/notes/rgb_fa19.md @@ -8,15 +8,21 @@ We will use the `sidisdvcs` train. There are inbending and outbending data, which we'll combine to one "dataset" in `qtl histogram`. -First make sure all skim files are cached: +Cross check the train and DST run lists: ```bash -qtl histogram -d rgb_fa19_sidisdvcs --check-cache --flatdir --focus-physics \ +bin/qtl xtrain /mss/clas12/rg-b/production/recon/fall2019/torus+1/pass2/v1/dst/train/sidisdvcs /mss/clas12/rg-b/production/recon/fall2019/torus+1/pass2/v1/dst/recon +bin/qtl xtrain /mss/clas12/rg-b/production/recon/fall2019/torus-1/pass2/v1/dst/train/sidisdvcs /mss/clas12/rg-b/production/recon/fall2019/torus-1/pass2/v1/dst/recon +``` + +Make sure all skim files are cached: +```bash +bin/qtl histogram -d rgb_fa19_sidisdvcs --check-cache --flatdir --focus-physics \ /cache/clas12/rg-b/production/recon/fall2019/torus+1/pass2/v1/dst/train/sidisdvcs/ \ /cache/clas12/rg-b/production/recon/fall2019/torus-1/pass2/v1/dst/train/sidisdvcs/ ``` then run monitoring ```bash -qtl histogram -d rgb_fa19_sidisdvcs --submit --flatdir --focus-physics \ +bin/qtl histogram -d rgb_fa19_sidisdvcs --submit --flatdir --focus-physics \ /cache/clas12/rg-b/production/recon/fall2019/torus+1/pass2/v1/dst/train/sidisdvcs/ \ /cache/clas12/rg-b/production/recon/fall2019/torus-1/pass2/v1/dst/train/sidisdvcs/ ``` diff --git a/qadb/notes/rgb_sp19.md b/qadb/notes/rgb_sp19.md index 5daf75248..cbbcdf686 100644 --- a/qadb/notes/rgb_sp19.md +++ b/qadb/notes/rgb_sp19.md @@ -7,13 +7,18 @@ We will use the `sidisdvcs` train. -First make sure all skim files are cached: +Cross check the train and DST run lists: ```bash -qtl histogram -d rgb_sp19_sidisdvcs --check-cache --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/train/sidisdvcs +bin/qtl xtrain /mss/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/train/sidisdvcs /mss/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/recon/ +``` + +Make sure all skim files are cached: +```bash +bin/qtl histogram -d rgb_sp19_sidisdvcs --check-cache --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/train/sidisdvcs ``` then run monitoring ```bash -qtl histogram -d rgb_sp19_sidisdvcs --submit --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/train/sidisdvcs +bin/qtl histogram -d rgb_sp19_sidisdvcs --submit --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/train/sidisdvcs ``` ## Double check that we have all the runs diff --git a/qadb/notes/rgb_wi20.md b/qadb/notes/rgb_wi20.md index cb047a643..d09b3b3fc 100644 --- a/qadb/notes/rgb_wi20.md +++ b/qadb/notes/rgb_wi20.md @@ -7,13 +7,18 @@ We will use the `sidisdvcs` train. -First make sure all skim files are cached: +Cross check the train and DST run lists: ```bash -qtl histogram -d rgb_wi20_sidisdvcs --check-cache --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/train/sidisdvcs +bin/qtl xtrain /mss/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/train/sidisdvcs /mss/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/recon +``` + +Make sure all skim files are cached: +```bash +bin/qtl histogram -d rgb_wi20_sidisdvcs --check-cache --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/train/sidisdvcs ``` then run monitoring ```bash -qtl histogram -d rgb_wi20_sidisdvcs --submit --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/train/sidisdvcs +bin/qtl histogram -d rgb_wi20_sidisdvcs --submit --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/train/sidisdvcs ``` ## Double check that we have all the runs diff --git a/qadb/notes/rgc_fa22.md b/qadb/notes/rgc_fa22.md index 94907d765..8f40a74c2 100644 --- a/qadb/notes/rgc_fa22.md +++ b/qadb/notes/rgc_fa22.md @@ -7,10 +7,15 @@ We will use the `sidisdvcs` train. +Cross check the train and DST run lists: +```bash +for d in $(ls -d /mss/clas12/rg-c/production/fall22/pass1/*/dst); do echo "===== $d ====="; bin/qtl xtrain $d/train/sidisdvcs $d/recon; done +``` + We will combine the targets' data into a single dataset named `rgc_fa22_prescaled`. ```bash -qtl histogram --check-cache -d rgc_fa22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/fall22/pass1/*/dst/train/sidisdvcs/) -qtl histogram -d rgc_fa22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/fall22/pass1/*/dst/train/sidisdvcs/) +bin/qtl histogram --check-cache -d rgc_fa22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/fall22/pass1/*/dst/train/sidisdvcs/) +bin/qtl histogram -d rgc_fa22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/fall22/pass1/*/dst/train/sidisdvcs/) ``` ## Double check that we have all the runs diff --git a/qadb/notes/rgc_sp23.md b/qadb/notes/rgc_sp23.md index 3adcb18fa..90aca8cdf 100644 --- a/qadb/notes/rgc_sp23.md +++ b/qadb/notes/rgc_sp23.md @@ -7,10 +7,15 @@ We will use the `sidisdvcs` train. +Cross check the train and DST run lists: +```bash +for d in $(ls -d /mss/clas12/rg-c/production/spring23/pass1/*/dst); do echo "===== $d ====="; bin/qtl xtrain $d/train/sidisdvcs $d/recon; done +``` + We will combine the targets' data into a single dataset named `rgc_sp23_prescaled`. ```bash -qtl histogram --check-cache -d rgc_sp23_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/spring23/pass1/*/dst/train/sidisdvcs/) -qtl histogram -d rgc_sp23_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/spring23/pass1/*/dst/train/sidisdvcs/) +bin/qtl histogram --check-cache -d rgc_sp23_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/spring23/pass1/*/dst/train/sidisdvcs/) +bin/qtl histogram -d rgc_sp23_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/spring23/pass1/*/dst/train/sidisdvcs/) ``` ## Double check that we have all the runs diff --git a/qadb/notes/rgc_su22.md b/qadb/notes/rgc_su22.md index 2b87c1f8e..a181d8e37 100644 --- a/qadb/notes/rgc_su22.md +++ b/qadb/notes/rgc_su22.md @@ -43,12 +43,16 @@ Assuming your output data are in ``` and that this wildcard pattern does _not_ include any files you _don't_ want, you may run ```bash -qtl histogram -d rgc_su22_prescaled --flatdir --focus-physics $(ls -d /volatile/clas12/users/$LOGNAME/qa_rgc_su22_*/train/QA) +bin/qtl histogram -d rgc_su22_prescaled --flatdir --focus-physics $(ls -d /volatile/clas12/users/$LOGNAME/qa_rgc_su22_*/train/QA) ``` Alternatively, for `sidisdvcs` trains (which have better statistics for asymmetries): ```bash -qtl histogram --check-cache -d rgc_su22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/summer22/pass1/*/*/dst/train/sidisdvcs) -qtl histogram -d rgc_su22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/summer22/pass1/*/*/dst/train/sidisdvcs) +bin/qtl histogram --check-cache -d rgc_su22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/summer22/pass1/*/*/dst/train/sidisdvcs) +bin/qtl histogram -d rgc_su22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/summer22/pass1/*/*/dst/train/sidisdvcs) +``` +Cross check the train and DST run lists: +```bash +for d in $(ls -d /mss/clas12/rg-c/production/summer22/pass1/*/*/dst); do echo "===== $d ====="; bin/qtl xtrain $d/train/sidisdvcs $d/recon; done ``` ## Make timelines