-
Notifications
You must be signed in to change notification settings - Fork 88
Expand file tree
/
Copy pathtest_humanpopgen.nf.test
More file actions
142 lines (117 loc) · 11.1 KB
/
test_humanpopgen.nf.test
File metadata and controls
142 lines (117 loc) · 11.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
nextflow_pipeline {
name "Test pipeline: NFCORE_EAGER"
script "main.nf"
tag "pipeline"
tag "nfcore_eager"
tag "test_humanpopgen"
profile "test_humanpopgen"
test("Test `test_humanpopgen` profile:") {
when {
params {
outdir = "$outputDir"
}
}
then {
///////////////////
// DOCUMENTATION //
///////////////////
// The contents of each top level results directory should be tested with individually named snapshots.
// Within each snapshot, there should be two to three distinct variables, that contain the files to be tested.
// - stable_name_<dir> is for files with variable md5sums (i.e. content) so only names will be compared
// - stable_content_<dir> is for files with stable md5sums (i.e. content) so md5sums will be compared
// - bams_<dir> is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable)
// If a directory is fully stable, you can drop `stable_name_*`
// If a directory contains no BAMs, you can drop `bams_*`
// Generate with: nf-test test --profile +docker --tag test_humanpopgen --update-snapshot
// Test with: nf-test test --profile +docker --tag test_humanpopgen
// NOTE: BAMs are always only stable in name, because:
// a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112)
// b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order)
// point b) also causes BAIs to be unstable.
// c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes)
//////////////////////
// DEFINE VARIABLES //
//////////////////////
// Define exclusion patterns for files with unstable contents
// NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here
// This is particularly important if the patterns excluded in the stable content section should be included in the stable name section
def unstable_patterns_auth = [
'**/mapped_reads_gc-content_distribution.txt',
'**/mapped_reads_nucleotide_content.txt',
'**/genome_gc_content_per_window.png',
'**/*.{svg,pdf,html,png}',
'**/DamageProfiler.log',
'**/3p_freq_misincorporations.txt',
'**/DNA_comp_genome.txt',
'**/DNA_composition_sample.txt',
'**/genome_results.txt',
'**/Runtime_log.txt',
'**/3pGtoA_freq.txt',
'**/5pCtoT_freq.txt',
'**/dnacomp.txt',
'**/lgdistribution.txt',
'**/misincorporation.txt',
'**/*sexdeterrmine.tsv',
'**/*command.log',
]
// Check that no files are missing/added
// Command legend: Result directory to index , includeDir: include dirs? , ignore: exclude patterns , ignoreFile: exclude pattern list , include: include patterns
def stable_name_all = getAllFilesFromDir("$outputDir/" , includeDir: false , ignore: ['pipeline_info/*'] , ignoreFile: null , include: ['*', '**/*'] )
// Authentication
def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] )
def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth)
// Damage manipulation
def stable_content_damage_manipulation = getAllFilesFromDir("$outputDir/damage_manipulation" , includeDir: false , ignore: ['**/*.{bam,bai}'] , ignoreFile: null , include: ['*', '**/*'] )
def stable_name_damage_manipulation = getAllFilesFromDir("$outputDir/damage_manipulation" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
// Deduplication
def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
// Final_bams
def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
// Genotyping
def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{geno,ind,snp,tsv}'] , ignoreFile: null , include: ['**/*'] )
def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{geno,ind,snp,tsv}'] )
// Mapping (incl. bam_input flasgstat)
def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
// Metagenomics
def stable_content_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: ['**/*.biom', '**/*table.tsv'], ignoreFile: null , include: ['**/*'] )
def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.biom', '**/*table.tsv'] )
// Preprocessing
// NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing.
def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'] , ignoreFile: null , include: ['**/*'] )
def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] )
// Read filtering
def stable_content_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
def stable_name_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
// MultiQC
def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] )
///////////////////////
// DEFINE ASSERTIONS //
///////////////////////
assertAll(
{ assert workflow.success },
// This checks that there are no missing or additional output files.
// Also a good starting point to look at all the files in the output folder than need to be checked in subsequent sections.
{ assert snapshot( stable_name_all*.name ).match("all_files") },
// Checking changes to contents of each section
// NOTE: Keep the order of the sections in the alphanumeric order of the output directories.
// Each section should first check stable_content, stable_name second (if applicable).
{ assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") },
{ assert snapshot( stable_content_damage_manipulation , stable_name_damage_manipulation*.name ).match("damage_manipulation") },
{ assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") },
{ assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") },
{ assert snapshot( stable_content_genotyping , stable_name_genotyping*.name ).match("genotyping") },
// NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
{ assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
{ assert snapshot( stable_content_metagenomics , stable_name_metagenomics*.name ).match("metagenomics") },
{ assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") },
{ assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") },
{ assert snapshot( stable_name_multiqc*.name ).match("multiqc") },
// Versions
{ assert new File("$outputDir/pipeline_info/nf_core_eager_software_mqc_versions.yml").exists() },
)
}
}
}