Skip to content

Commit 5887e39

Browse files
authored
Merge pull request #1146 from jbv2/nf-test-humanpopgen
DSL2: nf-test profile test-humanpopgen
2 parents c7dc65b + 0bd747d commit 5887e39

2 files changed

Lines changed: 726 additions & 0 deletions

File tree

tests/test_humanpopgen.nf.test

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
nextflow_pipeline {
2+
3+
name "Test pipeline: NFCORE_EAGER"
4+
script "main.nf"
5+
tag "pipeline"
6+
tag "nfcore_eager"
7+
tag "test_humanpopgen"
8+
9+
test("test_humanmpopgen_profile") {
10+
11+
when {
12+
params {
13+
outdir = "$outputDir"
14+
}
15+
}
16+
17+
then {
18+
19+
///////////////////
20+
// DOCUMENTATION //
21+
///////////////////
22+
23+
// The contents of each top level results directory should be tested with individually named snapshots.
24+
// Within each snapshot, there should be two to three distinct variables, that contain the files to be tested.
25+
// - stable_name_<dir> is for files with variable md5sums (i.e. content) so only names will be compared
26+
// - stable_content_<dir> is for files with stable md5sums (i.e. content) so md5sums will be compared
27+
// - bams_<dir> is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable)
28+
// If a directory is fully stable, you can drop `stable_name_*`
29+
// If a directory contains no BAMs, you can drop `bams_*`
30+
31+
// Generate with: nf-test test --tag test --profile docker,test --update-snapshot
32+
// Test with: nf-test test --tag test --profile docker,test
33+
// NOTE: BAMs are always only stable in name, because:
34+
// a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112)
35+
// b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order)
36+
// point b) also causes BAIs to be unstable.
37+
// c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes)
38+
39+
//////////////////////
40+
// DEFINE VARIABLES //
41+
//////////////////////
42+
43+
// Define exclusion patterns for files with unstable contents
44+
// NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here
45+
// This is particularly important if the patterns excluded in the stable content section should be included in the stable name section
46+
def unstable_patterns_auth = [
47+
'**/mapped_reads_gc-content_distribution.txt',
48+
'**/mapped_reads_nucleotide_content.txt',
49+
'**/genome_gc_content_per_window.png',
50+
'**/*.{svg,pdf,html,png}',
51+
'**/DamageProfiler.log',
52+
'**/3p_freq_misincorporations.txt',
53+
'**/DNA_comp_genome.txt',
54+
'**/DNA_composition_sample.txt',
55+
'**/genome_results.txt',
56+
'**/Runtime_log.txt',
57+
'**/3pGtoA_freq.txt',
58+
'**/5pCtoT_freq.txt',
59+
'**/dnacomp.txt',
60+
'**/Igdistribution',
61+
'**/misincorporation.txt',
62+
'**/*sexdeterrmine.tsv',
63+
]
64+
65+
// Check that no files are missing/added
66+
// Command legend: Result directory to index , includeDir: include dirs?, ignore: exclude patterns , ignoreFile: exclude pattern list , include: include patterns
67+
def stable_name_all = getAllFilesFromDir("$outputDir/" , includeDir: false , ignore: ['pipeline_info/*'] , ignoreFile: null , include: ['*', '**/*'] )
68+
69+
// Authentication
70+
def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] )
71+
def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth)
72+
73+
// Damage manipulation
74+
def stable_content_damage_manipulation = getAllFilesFromDir("$outputDir/damage_manipulation" , includeDir: false , ignore: ['**/*.{bam,bai}'] , ignoreFile: null , include: ['*', '**/*'] )
75+
def stable_name_damage_manipulation = getAllFilesFromDir("$outputDir/damage_manipulation" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
76+
77+
// Deduplication - TODO -> snapshot both lists are empty!?
78+
def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
79+
def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
80+
81+
// Final_bams
82+
def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
83+
def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
84+
85+
// Genotyping
86+
def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{geno,ind,snp,tsv}'] , ignoreFile: null , include: ['**/*'] )
87+
def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{geno,ind,snp,tsv}'] )
88+
89+
// Mapping (incl. bam_input flasgstat)
90+
def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
91+
def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
92+
93+
// Metagenomics
94+
def stable_content_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: ['**/*.biom', '**/*table.tsv'] , ignoreFile: null , include: ['**/*'] )
95+
def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.biom', '**/*table.tsv'] )
96+
97+
// Preprocessing
98+
// NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing.
99+
def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'], ignoreFile: null , include: ['**/*'] )
100+
def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] )
101+
102+
// Read filtering
103+
def stable_content_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
104+
def stable_name_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
105+
106+
// MultiQC
107+
def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] )
108+
109+
///////////////////////
110+
// DEFINE ASSERTIONS //
111+
///////////////////////
112+
113+
assertAll(
114+
{ assert workflow.success },
115+
// This checks that there are no missing or additional output files.
116+
// Also a good starting point to look at all the files in the output folder than need to be checked in subsequent sections.
117+
{ assert snapshot( stable_name_all*.name ).match("all_files") },
118+
119+
// Checking changes to contents of each section
120+
// NOTE: Keep the order of the sections in the alphanumeric order of the output directories.
121+
// Each section should first check stable_content, stable_name second (if applicable).
122+
{ assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") },
123+
{ assert snapshot( stable_content_damage_manipulation , stable_name_damage_manipulation*.name ).match("damage_manipulation") },
124+
{ assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") },
125+
{ assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") },
126+
{ assert snapshot( stable_content_genotyping , stable_name_genotyping*.name ).match("genotyping") },
127+
// NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
128+
{ assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
129+
{ assert snapshot( stable_content_metagenomics , stable_name_metagenomics*.name ).match("metagenomics") },
130+
{ assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") },
131+
{ assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") },
132+
{ assert snapshot( stable_name_multiqc*.name ).match("multiqc") },
133+
134+
// Versions
135+
{ assert new File("$outputDir/pipeline_info/nf_core_eager_software_mqc_versions.yml").exists() },
136+
137+
)
138+
}
139+
}
140+
}

0 commit comments

Comments
 (0)