diff --git a/bin/qtl b/bin/qtl
index 95f431fcb..abfe8de05 100755
--- a/bin/qtl
+++ b/bin/qtl
@@ -18,6 +18,7 @@ usage() {
physics Generate and analyze physics QA timelines (Step 2)
error Scan for errors in Slurm logs (for Step 1)
reheat Reproduce a data file, e.g., to rerun postprocessing
+ xtrain Cross check run list from trains and DSTs
OPTIONS: Each command has its own set of options; run a command with no
additional options to see usage for that command.
@@ -41,6 +42,7 @@ case $cmd in
ph*) exec $TIMELINESRC/bin/qtl-physics "$@" ;;
er*) exec $TIMELINESRC/bin/qtl-error "$@" ;;
re*) exec $TIMELINESRC/bin/qtl-reheat "$@" ;;
+ xt*) exec $TIMELINESRC/bin/qtl-xtrain "$@" ;;
-v|--version)
echo $(mvn -q help:evaluate -Dexpression=project.version -DforceStdout -f $TIMELINESRC/pom.xml || echo "UNKNOWN")
exit 0
diff --git a/bin/qtl-xtrain b/bin/qtl-xtrain
new file mode 100755
index 000000000..11fb755aa
--- /dev/null
+++ b/bin/qtl-xtrain
@@ -0,0 +1,73 @@
+#!/usr/bin/env ruby
+
+require 'set'
+
+unless ARGV.length == 2
+ puts """
+ Verify that a directory of a train's skim files has the same list of
+ run numbers as a directory of DST-file run directories.
+
+ USAGE: qtl xtrain [TRAIN_DIR] [DST_DIR]
+
+ Both directories must be on /mss
+ """
+ exit 2
+end
+train_dir, dst_dir = ARGV
+
+# function to get a set of run numbers from one of the argument dirs
+def get_runnums(path, type)
+ runnums = Set.new
+ raise "#{type} dir `#{path}` is not on /mss" unless path.match? /^\/mss\//
+ raise "#{type} dir `#{path}` does not exist" unless Dir.exist? path
+
+ # get list of files/directories within
+ files = []
+ case type
+ when :train
+ files = Dir.glob File.join(path, '*.hipo')
+ when :DST
+ files = Dir.glob File.join(path, '*/')
+ else
+ raise 'bad type'
+ end
+ raise "no #{type} files found in #{type} dir `#{path}`" if files.empty?
+
+ # extract their run numbers
+ files.each do |file|
+ nums = File.basename(file).scan(/\d+/).map &:to_i
+ raise "failed to get run number from #{type} object `#{file}`" unless nums.length == 1
+ runnums << nums[0]
+ end
+ raise "failed to get run numbers from #{type} dir `#{path}`" if runnums.empty?
+ runnums
+end
+
+# get runnum lists
+train_runs = get_runnums train_dir, :train
+dst_runs = get_runnums dst_dir, :DST
+puts """----------------------------------------------------------------------------------
+train dir run list:
+#{train_runs}
+DST dir run list:
+#{dst_runs}
+----------------------------------------------------------------------------------"""
+
+# compare runnum sets
+only_in_trains = train_runs - dst_runs
+only_in_dsts = dst_runs - train_runs
+
+# return results
+code = 0
+unless only_in_trains.empty?
+ $stderr.puts "ERROR: there are runs with skim files, but no corresponding DST-file directories:"
+ $stderr.puts only_in_trains
+ code = 1
+end
+unless only_in_dsts.empty?
+ $stderr.puts "ERROR: there are runs with DST-file directories, but no corresponding skim files:"
+ $stderr.puts only_in_dsts
+ code = 1
+end
+puts "All good" if code == 0
+exit code
diff --git a/doc/qa.md b/doc/qa.md
index 16bed5688..f08093694 100644
--- a/doc/qa.md
+++ b/doc/qa.md
@@ -49,6 +49,14 @@ If you are performing a manual QA as part of a cross check, skip to the next sec
- use the scripts in the [`prescaler/` directory](/qadb/prescaler)
+
+- [ ] cross check run list from trains and from DSTs
+
+- use `qtl xtrain` to make sure the list of DST runs is consistent with the list of runs from a train
+ - sometimes there are missing train files
+ - the script also checks for missing DST files (though that should be impossible to happen)
+
+
- [ ] make sure all data are cached
diff --git a/qadb/notes/rga_fa18.md b/qadb/notes/rga_fa18.md
index 97f04c7d5..eca5a600c 100644
--- a/qadb/notes/rga_fa18.md
+++ b/qadb/notes/rga_fa18.md
@@ -7,15 +7,21 @@
We will use the `nSidis` train.
-First make sure all skim files are cached:
+Cross check the train and DST run lists:
```bash
-qtl histogram -d rga_fa18_inbending_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/train/nSidis
-qtl histogram -d rga_fa18_outbending_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus+1/pass2/train/nSidis
+bin/qtl xtrain /mss/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/train/nSidis /mss/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/dst/recon/
+bin/qtl xtrain /mss/clas12/rg-a/production/recon/fall2018/torus+1/pass2/train/nSidis /mss/clas12/rg-a/production/recon/fall2018/torus+1/pass2/dst/recon/
+```
+
+Make sure all skim files are cached:
+```bash
+bin/qtl histogram -d rga_fa18_inbending_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/train/nSidis
+bin/qtl histogram -d rga_fa18_outbending_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus+1/pass2/train/nSidis
```
then run monitoring
```bash
-qtl histogram -d rga_fa18_inbending_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/train/nSidis
-qtl histogram -d rga_fa18_outbending_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus+1/pass2/train/nSidis
+bin/qtl histogram -d rga_fa18_inbending_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus-1/pass2/main/train/nSidis
+bin/qtl histogram -d rga_fa18_outbending_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/fall2018/torus+1/pass2/train/nSidis
```
## Double check that we have all the runs
diff --git a/qadb/notes/rga_sp19.md b/qadb/notes/rga_sp19.md
index 174329bb0..73d48bab4 100644
--- a/qadb/notes/rga_sp19.md
+++ b/qadb/notes/rga_sp19.md
@@ -26,12 +26,12 @@ start-workflow.sh rga-a-sp19*.json ## check that this is the correct JSON file
For the prescaled train:
```bash
-qtl histogram -d rga_sp19_prescaled --submit --focus-physics PATH_TO_PRESCALED_TRAIN
+bin/qtl histogram -d rga_sp19_prescaled --submit --focus-physics PATH_TO_PRESCALED_TRAIN
```
For the SIDIS train, `nSidis`, first make sure all skim files are cached:
```bash
-qtl histogram -d rga_sp19_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis
+bin/qtl histogram -d rga_sp19_nSidis --check-cache --flatdir --focus-physics /cache/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis
```
If they are not:
```bash
@@ -39,9 +39,13 @@ ls /mss/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis/*
jcache get $(cat jlist.txt)
# then wait for them to be cached
```
+Cross check the train and DST run lists:
+```bash
+bin/qtl xtrain /mss/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis /mss/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/recon
+```
then run monitoring
```bash
-qtl histogram -d rga_sp19_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis
+bin/qtl histogram -d rga_sp19_nSidis --submit --flatdir --focus-physics /cache/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/train/nSidis
```
## Make timelines
diff --git a/qadb/notes/rgb_fa19.md b/qadb/notes/rgb_fa19.md
index a8ef12cd7..61ccbc4b9 100644
--- a/qadb/notes/rgb_fa19.md
+++ b/qadb/notes/rgb_fa19.md
@@ -8,15 +8,21 @@
We will use the `sidisdvcs` train. There are inbending and outbending data, which we'll
combine to one "dataset" in `qtl histogram`.
-First make sure all skim files are cached:
+Cross check the train and DST run lists:
```bash
-qtl histogram -d rgb_fa19_sidisdvcs --check-cache --flatdir --focus-physics \
+bin/qtl xtrain /mss/clas12/rg-b/production/recon/fall2019/torus+1/pass2/v1/dst/train/sidisdvcs /mss/clas12/rg-b/production/recon/fall2019/torus+1/pass2/v1/dst/recon
+bin/qtl xtrain /mss/clas12/rg-b/production/recon/fall2019/torus-1/pass2/v1/dst/train/sidisdvcs /mss/clas12/rg-b/production/recon/fall2019/torus-1/pass2/v1/dst/recon
+```
+
+Make sure all skim files are cached:
+```bash
+bin/qtl histogram -d rgb_fa19_sidisdvcs --check-cache --flatdir --focus-physics \
/cache/clas12/rg-b/production/recon/fall2019/torus+1/pass2/v1/dst/train/sidisdvcs/ \
/cache/clas12/rg-b/production/recon/fall2019/torus-1/pass2/v1/dst/train/sidisdvcs/
```
then run monitoring
```bash
-qtl histogram -d rgb_fa19_sidisdvcs --submit --flatdir --focus-physics \
+bin/qtl histogram -d rgb_fa19_sidisdvcs --submit --flatdir --focus-physics \
/cache/clas12/rg-b/production/recon/fall2019/torus+1/pass2/v1/dst/train/sidisdvcs/ \
/cache/clas12/rg-b/production/recon/fall2019/torus-1/pass2/v1/dst/train/sidisdvcs/
```
diff --git a/qadb/notes/rgb_sp19.md b/qadb/notes/rgb_sp19.md
index 5daf75248..cbbcdf686 100644
--- a/qadb/notes/rgb_sp19.md
+++ b/qadb/notes/rgb_sp19.md
@@ -7,13 +7,18 @@
We will use the `sidisdvcs` train.
-First make sure all skim files are cached:
+Cross check the train and DST run lists:
```bash
-qtl histogram -d rgb_sp19_sidisdvcs --check-cache --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/train/sidisdvcs
+bin/qtl xtrain /mss/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/train/sidisdvcs /mss/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/recon/
+```
+
+Make sure all skim files are cached:
+```bash
+bin/qtl histogram -d rgb_sp19_sidisdvcs --check-cache --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/train/sidisdvcs
```
then run monitoring
```bash
-qtl histogram -d rgb_sp19_sidisdvcs --submit --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/train/sidisdvcs
+bin/qtl histogram -d rgb_sp19_sidisdvcs --submit --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2019/torus-1/pass2/v0/dst/train/sidisdvcs
```
## Double check that we have all the runs
diff --git a/qadb/notes/rgb_wi20.md b/qadb/notes/rgb_wi20.md
index cb047a643..d09b3b3fc 100644
--- a/qadb/notes/rgb_wi20.md
+++ b/qadb/notes/rgb_wi20.md
@@ -7,13 +7,18 @@
We will use the `sidisdvcs` train.
-First make sure all skim files are cached:
+Cross check the train and DST run lists:
```bash
-qtl histogram -d rgb_wi20_sidisdvcs --check-cache --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/train/sidisdvcs
+bin/qtl xtrain /mss/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/train/sidisdvcs /mss/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/recon
+```
+
+Make sure all skim files are cached:
+```bash
+bin/qtl histogram -d rgb_wi20_sidisdvcs --check-cache --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/train/sidisdvcs
```
then run monitoring
```bash
-qtl histogram -d rgb_wi20_sidisdvcs --submit --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/train/sidisdvcs
+bin/qtl histogram -d rgb_wi20_sidisdvcs --submit --flatdir --focus-physics /cache/clas12/rg-b/production/recon/spring2020/torus-1/pass2/v1/dst/train/sidisdvcs
```
## Double check that we have all the runs
diff --git a/qadb/notes/rgc_fa22.md b/qadb/notes/rgc_fa22.md
index 94907d765..8f40a74c2 100644
--- a/qadb/notes/rgc_fa22.md
+++ b/qadb/notes/rgc_fa22.md
@@ -7,10 +7,15 @@
We will use the `sidisdvcs` train.
+Cross check the train and DST run lists:
+```bash
+for d in $(ls -d /mss/clas12/rg-c/production/fall22/pass1/*/dst); do echo "===== $d ====="; bin/qtl xtrain $d/train/sidisdvcs $d/recon; done
+```
+
We will combine the targets' data into a single dataset named `rgc_fa22_prescaled`.
```bash
-qtl histogram --check-cache -d rgc_fa22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/fall22/pass1/*/dst/train/sidisdvcs/)
-qtl histogram -d rgc_fa22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/fall22/pass1/*/dst/train/sidisdvcs/)
+bin/qtl histogram --check-cache -d rgc_fa22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/fall22/pass1/*/dst/train/sidisdvcs/)
+bin/qtl histogram -d rgc_fa22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/fall22/pass1/*/dst/train/sidisdvcs/)
```
## Double check that we have all the runs
diff --git a/qadb/notes/rgc_sp23.md b/qadb/notes/rgc_sp23.md
index 3adcb18fa..90aca8cdf 100644
--- a/qadb/notes/rgc_sp23.md
+++ b/qadb/notes/rgc_sp23.md
@@ -7,10 +7,15 @@
We will use the `sidisdvcs` train.
+Cross check the train and DST run lists:
+```bash
+for d in $(ls -d /mss/clas12/rg-c/production/spring23/pass1/*/dst); do echo "===== $d ====="; bin/qtl xtrain $d/train/sidisdvcs $d/recon; done
+```
+
We will combine the targets' data into a single dataset named `rgc_sp23_prescaled`.
```bash
-qtl histogram --check-cache -d rgc_sp23_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/spring23/pass1/*/dst/train/sidisdvcs/)
-qtl histogram -d rgc_sp23_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/spring23/pass1/*/dst/train/sidisdvcs/)
+bin/qtl histogram --check-cache -d rgc_sp23_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/spring23/pass1/*/dst/train/sidisdvcs/)
+bin/qtl histogram -d rgc_sp23_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/spring23/pass1/*/dst/train/sidisdvcs/)
```
## Double check that we have all the runs
diff --git a/qadb/notes/rgc_su22.md b/qadb/notes/rgc_su22.md
index 2b87c1f8e..a181d8e37 100644
--- a/qadb/notes/rgc_su22.md
+++ b/qadb/notes/rgc_su22.md
@@ -43,12 +43,16 @@ Assuming your output data are in
```
and that this wildcard pattern does _not_ include any files you _don't_ want, you may run
```bash
-qtl histogram -d rgc_su22_prescaled --flatdir --focus-physics $(ls -d /volatile/clas12/users/$LOGNAME/qa_rgc_su22_*/train/QA)
+bin/qtl histogram -d rgc_su22_prescaled --flatdir --focus-physics $(ls -d /volatile/clas12/users/$LOGNAME/qa_rgc_su22_*/train/QA)
```
Alternatively, for `sidisdvcs` trains (which have better statistics for asymmetries):
```bash
-qtl histogram --check-cache -d rgc_su22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/summer22/pass1/*/*/dst/train/sidisdvcs)
-qtl histogram -d rgc_su22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/summer22/pass1/*/*/dst/train/sidisdvcs)
+bin/qtl histogram --check-cache -d rgc_su22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/summer22/pass1/*/*/dst/train/sidisdvcs)
+bin/qtl histogram -d rgc_su22_sidisdvcs --flatdir --focus-physics $(ls -d /cache/clas12/rg-c/production/summer22/pass1/*/*/dst/train/sidisdvcs)
+```
+Cross check the train and DST run lists:
+```bash
+for d in $(ls -d /mss/clas12/rg-c/production/summer22/pass1/*/*/dst); do echo "===== $d ====="; bin/qtl xtrain $d/train/sidisdvcs $d/recon; done
```
## Make timelines