Skip to content

Commit 52da7b4

Browse files
authored
Merge pull request #1151 from nf-core/nf-test-microbial
DSL2: Add nf-test microbial
2 parents 50b039f + 824f340 commit 52da7b4

4 files changed

Lines changed: 909 additions & 4 deletions

File tree

conf/modules.config

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,9 +1698,9 @@ process {
16981698
[
16991699
ref_meta.ploidy ? "-p ${ref_meta.ploidy}" : '',
17001700
"-C ${params.genotyping_freebayes_min_alternate_count}",
1701-
{ params.genotyping_freebayes_skip_coverage == 0 ? "" : "-g ${params.genotyping_freebayes_skip_coverage}" },
1702-
]
1703-
}.join(' ').trim()
1701+
params.genotyping_freebayes_skip_coverage == 0 ? "" : "-g ${params.genotyping_freebayes_skip_coverage}",
1702+
].join(' ').trim()
1703+
}
17041704
}
17051705
ext.prefix = { "${meta.sample_id}_${meta.reference}" }
17061706
publishDir = [

tests/default.nf.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ nextflow_pipeline {
6767
def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] )
6868
def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth)
6969

70-
// Deduplication - TODO -> snapshot both lists are empty!?
70+
// Deduplication
7171
def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
7272
def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
7373

tests/test_microbial.nf.test

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
nextflow_pipeline {
2+
3+
name "Test pipeline: NFCORE_EAGER"
4+
script "main.nf"
5+
tag "pipeline"
6+
tag "nfcore_eager"
7+
tag "test_microbial"
8+
profile "test_microbial"
9+
10+
test("Test `test_microbial` profile:") {
11+
12+
when {
13+
params {
14+
outdir = "$outputDir"
15+
}
16+
}
17+
18+
then {
19+
20+
///////////////////
21+
// DOCUMENTATION //
22+
///////////////////
23+
24+
// The contents of each top level results directory should be tested with individually named snapshots.
25+
// Within each snapshot, there should be two to three distinct variables, that contain the files to be tested.
26+
// - stable_name_<dir> is for files with variable md5sums (i.e. content) so only names will be compared
27+
// - stable_content_<dir> is for files with stable md5sums (i.e. content) so md5sums will be compared
28+
// - bams_<dir> is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable)
29+
// If a directory is fully stable, you can drop `stable_name_*`
30+
// If a directory contains no BAMs, you can drop `bams_*`
31+
32+
// Generate with: nf-test test --profile +docker --tag test_microbial --update-snapshot
33+
// Test with: nf-test test --profile +docker --tag test_microbial
34+
// NOTE: BAMs are always only stable in name, because:
35+
// a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112)
36+
// b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order)
37+
// point b) also causes BAIs to be unstable.
38+
// c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes)
39+
40+
//////////////////////
41+
// DEFINE VARIABLES //
42+
//////////////////////
43+
44+
// Define exclusion patterns for files with unstable contents
45+
// NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here
46+
// This is particularly important if the patterns excluded in the stable content section should be included in the stable name section
47+
def unstable_patterns_auth = [
48+
'**/mapped_reads_gc-content_distribution.txt',
49+
'**/mapped_reads_nucleotide_content.txt',
50+
'**/genome_gc_content_per_window.png',
51+
'**/*.{svg,pdf,html,png}',
52+
'**/DamageProfiler.log',
53+
'**/3p_freq_misincorporations.txt',
54+
'**/5p_freq_misincorporations.txt',
55+
'**/DNA_comp_genome.txt',
56+
'**/DNA_composition_sample.txt',
57+
'**/misincorporation.txt',
58+
'**/genome_results.txt',
59+
'**/*command.log',
60+
]
61+
62+
// Check that no files are missing/added
63+
// Command legend: Result directory to index , includeDir: include dirs?, ignore: exclude patterns , ignoreFile: exclude pattern list , include: include patterns
64+
def stable_name_all = getAllFilesFromDir("$outputDir/" , includeDir: false , ignore: ['pipeline_info/*'] , ignoreFile: null , include: ['*', '**/*'] )
65+
66+
// Authentication
67+
def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] )
68+
def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth)
69+
70+
// Deduplication
71+
def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
72+
def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
73+
74+
// Final_bams
75+
def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
76+
def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
77+
78+
// Mapping (incl. bam_input flasgstat)
79+
def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
80+
def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
81+
82+
// Preprocessing
83+
// NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing.
84+
def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'], ignoreFile: null , include: ['**/*'] )
85+
def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] )
86+
87+
// Read filtering
88+
def stable_content_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
89+
def stable_name_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
90+
91+
// Genotyping
92+
def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{tbi,vcf.gz}'] , ignoreFile: null , include: ['**/*'] )
93+
def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.tbi'] )
94+
// We need to collect the vcfs separately to run more specific md5sum checks on the header (contnts are unstable due to same reasons as BAMs, explained above).
95+
def genotyping_vcfs = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.vcf.gz'] )
96+
97+
// Metagenomics
98+
// This section has no stable content, because the kraken report files contain a timestamp, and the taxpasta results change ever so slightly because of inconsistent BAM files.
99+
def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*'] )
100+
101+
// MultiQC
102+
def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] )
103+
104+
///////////////////////
105+
// DEFINE ASSERTIONS //
106+
///////////////////////
107+
108+
assertAll(
109+
{ assert workflow.success },
110+
// This checks that there are no missing or additional output files.
111+
// Also a good starting point to look at all the files in the output folder than need to be checked in subsequent sections.
112+
{ assert snapshot( stable_name_all*.name ).match("all_files") },
113+
114+
// Checking changes to contents of each section
115+
// NOTE: Keep the order of the sections in the alphanumeric order of the output directories.
116+
// Each section should first check stable_content, stable_name second (if applicable).
117+
{ assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") },
118+
{ assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") },
119+
{ assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") },
120+
// NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
121+
{ assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
122+
{ assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") },
123+
{ assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") },
124+
{ assert snapshot( stable_content_genotyping , stable_name_genotyping*.name ).match("genotyping") },
125+
// Additional checks on the genotyping VCFs for content. Specifically the md5sums of the header FORMAT, INFO, FILTER, CONTIG lines, and sample names
126+
{ assert snapshot(
127+
genotyping_vcfs.collect {
128+
file ->
129+
def vcf_head = path(file.toString()).vcf.header
130+
// The header contains lines in the "OTHER" category, which contain a timestamp and/or work dir paths, so we need to filter those out, then calculate md5sums.
131+
def header_md5 = [
132+
vcf_head.getFormatHeaderLines().toString(),
133+
vcf_head.getInfoHeaderLines().toString(),
134+
vcf_head.getFilterLines().toString(),
135+
vcf_head.getIDHeaderLines().toString(),
136+
vcf_head.getGenotypeSamples().toString(),
137+
vcf_head.getContigLines().toString(),
138+
].join(' ').md5()
139+
file.getName() + ":header_md5," + header_md5
140+
}
141+
).match("genotyping_vcfs")},
142+
{ assert snapshot( stable_name_metagenomics*.name ).match("metagenomics") },
143+
{ assert snapshot( stable_name_multiqc*.name ).match("multiqc") },
144+
145+
// Versions
146+
{ assert new File("$outputDir/pipeline_info/nf_core_eager_software_mqc_versions.yml").exists() },
147+
148+
)
149+
}
150+
}
151+
}

0 commit comments

Comments
 (0)