Skip to content

Commit 824f340

Browse files
authored
Merge branch 'dev' into nf-test-microbial
2 parents 68d6732 + 50b039f commit 824f340

2 files changed

Lines changed: 918 additions & 0 deletions

File tree

tests/test_humanpopgen.nf.test

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
nextflow_pipeline {
2+
3+
name "Test pipeline: NFCORE_EAGER"
4+
script "main.nf"
5+
tag "pipeline"
6+
tag "nfcore_eager"
7+
tag "test_humanpopgen"
8+
profile "test_humanpopgen"
9+
10+
test("Test `test_humanpopgen` profile:") {
11+
12+
when {
13+
params {
14+
outdir = "$outputDir"
15+
}
16+
}
17+
18+
then {
19+
20+
///////////////////
21+
// DOCUMENTATION //
22+
///////////////////
23+
24+
// The contents of each top level results directory should be tested with individually named snapshots.
25+
// Within each snapshot, there should be two to three distinct variables, that contain the files to be tested.
26+
// - stable_name_<dir> is for files with variable md5sums (i.e. content) so only names will be compared
27+
// - stable_content_<dir> is for files with stable md5sums (i.e. content) so md5sums will be compared
28+
// - bams_<dir> is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable)
29+
// If a directory is fully stable, you can drop `stable_name_*`
30+
// If a directory contains no BAMs, you can drop `bams_*`
31+
32+
// Generate with: nf-test test --profile +docker --tag test_humanpopgen --update-snapshot
33+
// Test with: nf-test test --profile +docker --tag test_humanpopgen
34+
// NOTE: BAMs are always only stable in name, because:
35+
// a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112)
36+
// b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order)
37+
// point b) also causes BAIs to be unstable.
38+
// c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes)
39+
40+
//////////////////////
41+
// DEFINE VARIABLES //
42+
//////////////////////
43+
44+
// Define exclusion patterns for files with unstable contents
45+
// NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here
46+
// This is particularly important if the patterns excluded in the stable content section should be included in the stable name section
47+
def unstable_patterns_auth = [
48+
'**/mapped_reads_gc-content_distribution.txt',
49+
'**/mapped_reads_nucleotide_content.txt',
50+
'**/genome_gc_content_per_window.png',
51+
'**/*.{svg,pdf,html,png}',
52+
'**/DamageProfiler.log',
53+
'**/3p_freq_misincorporations.txt',
54+
'**/DNA_comp_genome.txt',
55+
'**/DNA_composition_sample.txt',
56+
'**/genome_results.txt',
57+
'**/Runtime_log.txt',
58+
'**/3pGtoA_freq.txt',
59+
'**/5pCtoT_freq.txt',
60+
'**/dnacomp.txt',
61+
'**/lgdistribution.txt',
62+
'**/misincorporation.txt',
63+
'**/*sexdeterrmine.tsv',
64+
'**/*command.log',
65+
]
66+
67+
// Check that no files are missing/added
68+
// Command legend: Result directory to index , includeDir: include dirs? , ignore: exclude patterns , ignoreFile: exclude pattern list , include: include patterns
69+
def stable_name_all = getAllFilesFromDir("$outputDir/" , includeDir: false , ignore: ['pipeline_info/*'] , ignoreFile: null , include: ['*', '**/*'] )
70+
71+
// Authentication
72+
def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] )
73+
def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth)
74+
75+
// Damage manipulation
76+
def stable_content_damage_manipulation = getAllFilesFromDir("$outputDir/damage_manipulation" , includeDir: false , ignore: ['**/*.{bam,bai}'] , ignoreFile: null , include: ['*', '**/*'] )
77+
def stable_name_damage_manipulation = getAllFilesFromDir("$outputDir/damage_manipulation" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
78+
79+
// Deduplication
80+
def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
81+
def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
82+
83+
// Final_bams
84+
def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
85+
def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
86+
87+
// Genotyping
88+
def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{geno,ind,snp,tsv}'] , ignoreFile: null , include: ['**/*'] )
89+
def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{geno,ind,snp,tsv}'] )
90+
91+
// Mapping (incl. bam_input flasgstat)
92+
def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
93+
def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
94+
95+
// Metagenomics
96+
def stable_content_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: ['**/*.biom', '**/*table.tsv'], ignoreFile: null , include: ['**/*'] )
97+
def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.biom', '**/*table.tsv'] )
98+
99+
// Preprocessing
100+
// NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing.
101+
def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'] , ignoreFile: null , include: ['**/*'] )
102+
def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] )
103+
104+
// Read filtering
105+
def stable_content_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
106+
def stable_name_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
107+
108+
// MultiQC
109+
def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] )
110+
111+
///////////////////////
112+
// DEFINE ASSERTIONS //
113+
///////////////////////
114+
115+
assertAll(
116+
{ assert workflow.success },
117+
// This checks that there are no missing or additional output files.
118+
// Also a good starting point to look at all the files in the output folder than need to be checked in subsequent sections.
119+
{ assert snapshot( stable_name_all*.name ).match("all_files") },
120+
121+
// Checking changes to contents of each section
122+
// NOTE: Keep the order of the sections in the alphanumeric order of the output directories.
123+
// Each section should first check stable_content, stable_name second (if applicable).
124+
{ assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") },
125+
{ assert snapshot( stable_content_damage_manipulation , stable_name_damage_manipulation*.name ).match("damage_manipulation") },
126+
{ assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") },
127+
{ assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") },
128+
{ assert snapshot( stable_content_genotyping , stable_name_genotyping*.name ).match("genotyping") },
129+
// NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
130+
{ assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
131+
{ assert snapshot( stable_content_metagenomics , stable_name_metagenomics*.name ).match("metagenomics") },
132+
{ assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") },
133+
{ assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") },
134+
{ assert snapshot( stable_name_multiqc*.name ).match("multiqc") },
135+
136+
// Versions
137+
{ assert new File("$outputDir/pipeline_info/nf_core_eager_software_mqc_versions.yml").exists() },
138+
139+
)
140+
}
141+
}
142+
}

0 commit comments

Comments
 (0)