@@ -47,13 +47,23 @@ nextflow_pipeline {
4747 '**/mapped_reads_gc-content_distribution.txt',
4848 '**/mapped_reads_nucleotide_content.txt',
4949 '**/genome_gc_content_per_window.png',
50- '**/*.{svg,pdf,html,png}',
50+ '**/*.{svg,pdf,html,png,json }',
5151 '**/DamageProfiler.log',
52- '**/3p_freq_misincorporations .txt',
52+ '**/{3,5}p_freq_misincorporations .txt',
5353 '**/DNA_comp_genome.txt',
5454 '**/DNA_composition_sample.txt',
5555 '**/misincorporation.txt',
5656 '**/genome_results.txt',
57+ '**/3pGtoA_freq.txt',
58+ '**/5pCtoT_freq.txt',
59+ '**/lgdistribution.txt',
60+ '**/*c_curve.txt',
61+ "**/coverage_across_reference.txt",
62+ "**/coverage_histogram.txt",
63+ "**/duplication_rate_histogram.txt",
64+ "**/genome_fraction_coverage.txt",
65+ "**/mapping_quality_across_reference.txt",
66+ "**/mapping_quality_histogram.txt",
5767 ]
5868
5969 // Check that no files are missing/added
@@ -64,13 +74,13 @@ nextflow_pipeline {
6474 def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] )
6575 def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth)
6676
67- // Deduplication - TODO -> snapshot both lists are empty!?
68- def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
69- def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai} '] )
77+ // Deduplication
78+ // NOTE: even the flagstats are unstable, so we only check the names
79+ def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*'] )
7080
7181 // Final_bams
72- def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
73- def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai} '] )
82+ // NOTE: BAMs are unstable, since upstream BAMs are unstable.
83+ def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*'] )
7484
7585 // Mapping (incl. bam_input flasgstat)
7686 def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
@@ -82,10 +92,8 @@ nextflow_pipeline {
8292 def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] )
8393
8494 // Genotyping
85- def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{tbi,vcf.gz}'] , ignoreFile: null , include: ['**/*'] )
86- def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.tbi'] )
87- // We need to collect the vcfs separately to run more specific md5sum checks on the header (contnts are unstable due to same reasons as BAMs, explained above).
88- def genotyping_vcfs = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.vcf.gz'] )
95+ // NOTE: single file is created, with unstable content due to unstable BAM input.
96+ def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.glf.gz'] )
8997
9098 // Metagenomics
9199 def stable_content_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: ['**/*.biom', '**/*table.tsv'] , ignoreFile: null , include: ['**/*'] )
@@ -108,30 +116,13 @@ nextflow_pipeline {
108116 // NOTE: Keep the order of the sections in the alphanumeric order of the output directories.
109117 // Each section should first check stable_content, stable_name second (if applicable).
110118 { assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") },
111- { assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") },
112- { assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") },
119+ { assert snapshot( stable_name_deduplication*.name ).match("deduplication") },
120+ { assert snapshot( stable_name_final_bams*.name ).match("final_bams") },
113121 // NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
114122 { assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
115123 { assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") },
116- { assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") },
117- { assert snapshot( stable_content_genotyping , stable_name_genotyping*.name ).match("genotyping") },
118- // Additional checks on the genotyping VCFs for content. Specifically the md5sums of the header FORMAT and INFO lines
119- { assert snapshot(
120- genotyping_vcfs.collect {
121- file ->
122- def vcf_head = path(file.toString()).vcf.header
123- // The header contains lines in the "OTHER" category, which contain a timestamp, so we need to filter those out, then calculate md5sums.
124- def header_md5 = [
125- vcf_head.getFormatHeaderLines().toString(),
126- vcf_head.getInfoHeaderLines().toString(),
127- vcf_head.getFilterLines().toString(),
128- vcf_head.getIDHeaderLines().toString(),
129- vcf_head.getGenotypeSamples().toString(),
130- vcf_head.getContigLines().toString(),
131- ].join(' ').md5()
132- file.getName() + ":header_md5," + header_md5
133- }
134- ).match("genotyping_vcfs")},
124+ // { assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") },
125+ { assert snapshot( stable_name_genotyping*.name ).match("genotyping") },
135126 { assert snapshot( stable_content_metagenomics , stable_name_metagenomics*.name ).match("metagenomics") },
136127 { assert snapshot( stable_name_multiqc*.name ).match("multiqc") },
137128
0 commit comments