diff --git a/conf/modules.config b/conf/modules.config index 92847ce9..069ae125 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1698,9 +1698,9 @@ process { [ ref_meta.ploidy ? "-p ${ref_meta.ploidy}" : '', "-C ${params.genotyping_freebayes_min_alternate_count}", - { params.genotyping_freebayes_skip_coverage == 0 ? "" : "-g ${params.genotyping_freebayes_skip_coverage}" }, - ] - }.join(' ').trim() + params.genotyping_freebayes_skip_coverage == 0 ? "" : "-g ${params.genotyping_freebayes_skip_coverage}", + ].join(' ').trim() + } } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ diff --git a/tests/default.nf.test b/tests/default.nf.test index 88474726..6d29b2da 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -67,7 +67,7 @@ nextflow_pipeline { def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] ) def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth) - // Deduplication - TODO -> snapshot both lists are empty!? + // Deduplication def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) diff --git a/tests/test_microbial.nf.test b/tests/test_microbial.nf.test new file mode 100644 index 00000000..9fd4d104 --- /dev/null +++ b/tests/test_microbial.nf.test @@ -0,0 +1,151 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_EAGER" + script "main.nf" + tag "pipeline" + tag "nfcore_eager" + tag "test_microbial" + profile "test_microbial" + + test("Test `test_microbial` profile:") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + + /////////////////// + // DOCUMENTATION // + /////////////////// + + // The contents of each top level results directory should be tested with individually named snapshots. + // Within each snapshot, there should be two to three distinct variables, that contain the files to be tested. + // - stable_name_ is for files with variable md5sums (i.e. content) so only names will be compared + // - stable_content_ is for files with stable md5sums (i.e. content) so md5sums will be compared + // - bams_ is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable) + // If a directory is fully stable, you can drop `stable_name_*` + // If a directory contains no BAMs, you can drop `bams_*` + + // Generate with: nf-test test --profile +docker --tag test_microbial --update-snapshot + // Test with: nf-test test --profile +docker --tag test_microbial + // NOTE: BAMs are always only stable in name, because: + // a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112) + // b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order) + // point b) also causes BAIs to be unstable. + // c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes) + + ////////////////////// + // DEFINE VARIABLES // + ////////////////////// + + // Define exclusion patterns for files with unstable contents + // NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here + // This is particularly important if the patterns excluded in the stable content section should be included in the stable name section + def unstable_patterns_auth = [ + '**/mapped_reads_gc-content_distribution.txt', + '**/mapped_reads_nucleotide_content.txt', + '**/genome_gc_content_per_window.png', + '**/*.{svg,pdf,html,png}', + '**/DamageProfiler.log', + '**/3p_freq_misincorporations.txt', + '**/5p_freq_misincorporations.txt', + '**/DNA_comp_genome.txt', + '**/DNA_composition_sample.txt', + '**/misincorporation.txt', + '**/genome_results.txt', + '**/*command.log', + ] + + // Check that no files are missing/added + // Command legend: Result directory to index , includeDir: include dirs?, ignore: exclude patterns , ignoreFile: exclude pattern list , include: include patterns + def stable_name_all = getAllFilesFromDir("$outputDir/" , includeDir: false , ignore: ['pipeline_info/*'] , ignoreFile: null , include: ['*', '**/*'] ) + + // Authentication + def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] ) + def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth) + + // Deduplication + def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // Final_bams + def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // Mapping (incl. bam_input flasgstat) + def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // Preprocessing + // NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing. + def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'], ignoreFile: null , include: ['**/*'] ) + def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] ) + + // Read filtering + def stable_content_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + def stable_name_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // Genotyping + def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{tbi,vcf.gz}'] , ignoreFile: null , include: ['**/*'] ) + def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.tbi'] ) + // We need to collect the vcfs separately to run more specific md5sum checks on the header (contnts are unstable due to same reasons as BAMs, explained above). + def genotyping_vcfs = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.vcf.gz'] ) + + // Metagenomics + // This section has no stable content, because the kraken report files contain a timestamp, and the taxpasta results change ever so slightly because of inconsistent BAM files. + def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*'] ) + + // MultiQC + def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] ) + + /////////////////////// + // DEFINE ASSERTIONS // + /////////////////////// + + assertAll( + { assert workflow.success }, + // This checks that there are no missing or additional output files. + // Also a good starting point to look at all the files in the output folder than need to be checked in subsequent sections. + { assert snapshot( stable_name_all*.name ).match("all_files") }, + + // Checking changes to contents of each section + // NOTE: Keep the order of the sections in the alphanumeric order of the output directories. + // Each section should first check stable_content, stable_name second (if applicable). + { assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") }, + { assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") }, + { assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") }, + // NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279 + { assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") }, + { assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") }, + { assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") }, + { assert snapshot( stable_content_genotyping , stable_name_genotyping*.name ).match("genotyping") }, + // Additional checks on the genotyping VCFs for content. Specifically the md5sums of the header FORMAT, INFO, FILTER, CONTIG lines, and sample names + { assert snapshot( + genotyping_vcfs.collect { + file -> + def vcf_head = path(file.toString()).vcf.header + // The header contains lines in the "OTHER" category, which contain a timestamp and/or work dir paths, so we need to filter those out, then calculate md5sums. + def header_md5 = [ + vcf_head.getFormatHeaderLines().toString(), + vcf_head.getInfoHeaderLines().toString(), + vcf_head.getFilterLines().toString(), + vcf_head.getIDHeaderLines().toString(), + vcf_head.getGenotypeSamples().toString(), + vcf_head.getContigLines().toString(), + ].join(' ').md5() + file.getName() + ":header_md5," + header_md5 + } + ).match("genotyping_vcfs")}, + { assert snapshot( stable_name_metagenomics*.name ).match("metagenomics") }, + { assert snapshot( stable_name_multiqc*.name ).match("multiqc") }, + + // Versions + { assert new File("$outputDir/pipeline_info/nf_core_eager_software_mqc_versions.yml").exists() }, + + ) + } + } +} diff --git a/tests/test_microbial.nf.test.snap b/tests/test_microbial.nf.test.snap new file mode 100644 index 00000000..d89eafc7 --- /dev/null +++ b/tests/test_microbial.nf.test.snap @@ -0,0 +1,754 @@ +{ + "all_files": { + "content": [ + [ + "JK2782_Mammoth_MT_Krause_depth.bed", + "3pGtoA_freq.txt", + "3p_freq_misincorporations.txt", + "5pCtoT_freq.txt", + "5p_freq_misincorporations.txt", + "DNA_comp_genome.txt", + "DNA_composition_sample.txt", + "DamagePlot.pdf", + "DamagePlot_five_prime.svg", + "DamagePlot_three_prime.svg", + "DamageProfiler.log", + "Length_plot.pdf", + "Length_plot_combined_data.svg", + "Length_plot_forward_reverse_separated.svg", + "dmgprof.json", + "editDistance.txt", + "edit_distance.pdf", + "edit_distance.svg", + "lgdistribution.txt", + "misincorporation.txt", + "3pGtoA_freq.txt", + "3p_freq_misincorporations.txt", + "5pCtoT_freq.txt", + "5p_freq_misincorporations.txt", + "DNA_comp_genome.txt", + "DNA_composition_sample.txt", + "DamagePlot.pdf", + "DamagePlot_five_prime.svg", + "DamagePlot_three_prime.svg", + "DamageProfiler.log", + "Length_plot.pdf", + "Length_plot_combined_data.svg", + "Length_plot_forward_reverse_separated.svg", + "dmgprof.json", + "editDistance.txt", + "edit_distance.pdf", + "edit_distance.svg", + "lgdistribution.txt", + "misincorporation.txt", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_percent_on_target_mqc.json", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_percent_on_target_mqc.json", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.c_curve.txt", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.command.log", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.c_curve.txt", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.command.log", + "agogo.css", + "ajax-loader.gif", + "basic.css", + "bgfooter.png", + "bgtop.png", + "comment-bright.png", + "comment-close.png", + "comment.png", + "doctools.js", + "down-pressed.png", + "down.png", + "file.png", + "jquery.js", + "minus.png", + "plus.png", + "pygments.css", + "qualimap_logo_small.png", + "report.css", + "searchtools.js", + "underscore.js", + "up-pressed.png", + "up.png", + "websupport.js", + "genome_results.txt", + "genome_coverage_0to50_histogram.png", + "genome_coverage_across_reference.png", + "genome_coverage_histogram.png", + "genome_coverage_quotes.png", + "genome_gc_content_per_window.png", + "genome_homopolymer_indels.png", + "genome_mapping_quality_across_reference.png", + "genome_mapping_quality_histogram.png", + "genome_reads_clipping_profile.png", + "genome_reads_content_per_read_position.png", + "genome_uniq_read_starts_histogram.png", + "qualimapReport.html", + "coverage_across_reference.txt", + "coverage_histogram.txt", + "duplication_rate_histogram.txt", + "genome_fraction_coverage.txt", + "homopolymer_indels.txt", + "mapped_reads_clipping_profile.txt", + "mapped_reads_gc-content_distribution.txt", + "mapped_reads_nucleotide_content.txt", + "mapping_quality_across_reference.txt", + "mapping_quality_histogram.txt", + "agogo.css", + "ajax-loader.gif", + "basic.css", + "bgfooter.png", + "bgtop.png", + "comment-bright.png", + "comment-close.png", + "comment.png", + "doctools.js", + "down-pressed.png", + "down.png", + "file.png", + "jquery.js", + "minus.png", + "plus.png", + "pygments.css", + "qualimap_logo_small.png", + "report.css", + "searchtools.js", + "underscore.js", + "up-pressed.png", + "up.png", + "websupport.js", + "genome_results.txt", + "qualimapReport.html", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.bam", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.bam", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.flagstat", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.flagstat", + "JK2782_Mammoth_MT_Krause.bam", + "JK2782_Mammoth_MT_Krause.bam.bai", + "JK2782_hs37d5_chr21-MT.bam", + "JK2782_hs37d5_chr21-MT.bam.bai", + "JK2782_Mammoth_MT_Krause.flagstat", + "JK2782_hs37d5_chr21-MT.flagstat", + "JK2782_Mammoth_MT_Krause.bcftools_stats.txt", + "JK2782_hs37d5_chr21-MT.bcftools_stats.txt", + "JK2782_Mammoth_MT_Krause.vcf.gz", + "JK2782_Mammoth_MT_Krause.vcf.gz.tbi", + "JK2782_hs37d5_chr21-MT.vcf.gz", + "JK2782_hs37d5_chr21-MT.vcf.gz.tbi", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.bam", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.bam", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.flagstat", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.flagstat", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_metagenomics_fastq_unmapped_other.krakenuniq.report.txt", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_metagenomics_fastq_unmapped_other.krakenuniq.report.txt", + "krakenuniq_taxpasta_table.tsv", + "BETA-multiqc.parquet", + "bcftools-stats-subtypes.txt", + "bcftools_stats_indel-lengths.txt", + "bcftools_stats_variant_depths.txt", + "bcftools_stats_vqc_Count_Indels.txt", + "bcftools_stats_vqc_Count_SNP.txt", + "bcftools_stats_vqc_Count_Transitions.txt", + "bcftools_stats_vqc_Count_Transversions.txt", + "fastp-insert-size-plot.txt", + "fastp-seq-content-gc-plot_Merged_and_filtered.txt", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt", + "fastp-seq-content-n-plot_Merged_and_filtered.txt", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt", + "fastp-seq-quality-plot_Merged_and_filtered.txt", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt", + "fastp-seq-quality-plot_Read_2_Before_filtering.txt", + "fastp_filtered_reads_plot.txt", + "fiveprime_misinc_plot.txt", + "length-distribution-Forward.txt", + "length-distribution-Reverse.txt", + "mapdamage-fiveprime_misinc_plot.txt", + "mapdamage-length-distribution-Forward.txt", + "mapdamage-length-distribution-Reverse.txt", + "mapdamage-threeprime_misinc_plot.txt", + "multiqc.log", + "multiqc_bcftools_stats.txt", + "multiqc_citations.txt", + "multiqc_damageprofiler_metrics.txt", + "multiqc_data.json", + "multiqc_fastp.txt", + "multiqc_general_stats.txt", + "multiqc_qualimap_bamqc_genome_results.txt", + "multiqc_samtools_flagstat.txt", + "multiqc_software_versions.txt", + "multiqc_sources.txt", + "preseq.txt", + "preseq_complexity_plot_molecules.txt", + "qualimap_coverage_histogram.txt", + "qualimap_gc_content.txt", + "qualimap_genome_fraction.txt", + "samtools-flagstat-pct-table.txt", + "samtools-flagstat-table.txt", + "threeprime_misinc_plot.txt", + "bcftools-stats-subtypes-cnt.pdf", + "bcftools-stats-subtypes-pct.pdf", + "bcftools_stats_indel-lengths-cnt.pdf", + "bcftools_stats_indel-lengths-log.pdf", + "bcftools_stats_variant_depths.pdf", + "bcftools_stats_vqc_Count_Indels.pdf", + "bcftools_stats_vqc_Count_SNP.pdf", + "bcftools_stats_vqc_Count_Transitions.pdf", + "bcftools_stats_vqc_Count_Transversions.pdf", + "fastp-insert-size-plot.pdf", + "fastp-seq-content-gc-plot_Merged_and_filtered.pdf", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf", + "fastp-seq-content-n-plot_Merged_and_filtered.pdf", + "fastp-seq-content-n-plot_Read_1_Before_filtering.pdf", + "fastp-seq-content-n-plot_Read_2_Before_filtering.pdf", + "fastp-seq-quality-plot_Merged_and_filtered.pdf", + "fastp-seq-quality-plot_Read_1_Before_filtering.pdf", + "fastp-seq-quality-plot_Read_2_Before_filtering.pdf", + "fastp_filtered_reads_plot-cnt.pdf", + "fastp_filtered_reads_plot-pct.pdf", + "fiveprime_misinc_plot.pdf", + "length-distribution-Forward.pdf", + "length-distribution-Reverse.pdf", + "mapdamage-fiveprime_misinc_plot.pdf", + "mapdamage-length-distribution-Forward.pdf", + "mapdamage-length-distribution-Reverse.pdf", + "mapdamage-threeprime_misinc_plot.pdf", + "preseq_complexity_plot_molecules.pdf", + "qualimap_coverage_histogram.pdf", + "qualimap_gc_content.pdf", + "qualimap_genome_fraction.pdf", + "samtools-flagstat-pct-table.pdf", + "samtools-flagstat-table.pdf", + "threeprime_misinc_plot.pdf", + "bcftools-stats-subtypes-cnt.png", + "bcftools-stats-subtypes-pct.png", + "bcftools_stats_indel-lengths-cnt.png", + "bcftools_stats_indel-lengths-log.png", + "bcftools_stats_variant_depths.png", + "bcftools_stats_vqc_Count_Indels.png", + "bcftools_stats_vqc_Count_SNP.png", + "bcftools_stats_vqc_Count_Transitions.png", + "bcftools_stats_vqc_Count_Transversions.png", + "fastp-insert-size-plot.png", + "fastp-seq-content-gc-plot_Merged_and_filtered.png", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.png", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.png", + "fastp-seq-content-n-plot_Merged_and_filtered.png", + "fastp-seq-content-n-plot_Read_1_Before_filtering.png", + "fastp-seq-content-n-plot_Read_2_Before_filtering.png", + "fastp-seq-quality-plot_Merged_and_filtered.png", + "fastp-seq-quality-plot_Read_1_Before_filtering.png", + "fastp-seq-quality-plot_Read_2_Before_filtering.png", + "fastp_filtered_reads_plot-cnt.png", + "fastp_filtered_reads_plot-pct.png", + "fiveprime_misinc_plot.png", + "length-distribution-Forward.png", + "length-distribution-Reverse.png", + "mapdamage-fiveprime_misinc_plot.png", + "mapdamage-length-distribution-Forward.png", + "mapdamage-length-distribution-Reverse.png", + "mapdamage-threeprime_misinc_plot.png", + "preseq_complexity_plot_molecules.png", + "qualimap_coverage_histogram.png", + "qualimap_gc_content.png", + "qualimap_genome_fraction.png", + "samtools-flagstat-pct-table.png", + "samtools-flagstat-table.png", + "threeprime_misinc_plot.png", + "bcftools-stats-subtypes-cnt.svg", + "bcftools-stats-subtypes-pct.svg", + "bcftools_stats_indel-lengths-cnt.svg", + "bcftools_stats_indel-lengths-log.svg", + "bcftools_stats_variant_depths.svg", + "bcftools_stats_vqc_Count_Indels.svg", + "bcftools_stats_vqc_Count_SNP.svg", + "bcftools_stats_vqc_Count_Transitions.svg", + "bcftools_stats_vqc_Count_Transversions.svg", + "fastp-insert-size-plot.svg", + "fastp-seq-content-gc-plot_Merged_and_filtered.svg", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.svg", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.svg", + "fastp-seq-content-n-plot_Merged_and_filtered.svg", + "fastp-seq-content-n-plot_Read_1_Before_filtering.svg", + "fastp-seq-content-n-plot_Read_2_Before_filtering.svg", + "fastp-seq-quality-plot_Merged_and_filtered.svg", + "fastp-seq-quality-plot_Read_1_Before_filtering.svg", + "fastp-seq-quality-plot_Read_2_Before_filtering.svg", + "fastp_filtered_reads_plot-cnt.svg", + "fastp_filtered_reads_plot-pct.svg", + "fiveprime_misinc_plot.svg", + "length-distribution-Forward.svg", + "length-distribution-Reverse.svg", + "mapdamage-fiveprime_misinc_plot.svg", + "mapdamage-length-distribution-Forward.svg", + "mapdamage-length-distribution-Reverse.svg", + "mapdamage-threeprime_misinc_plot.svg", + "preseq_complexity_plot_molecules.svg", + "qualimap_coverage_histogram.svg", + "qualimap_gc_content.svg", + "qualimap_genome_fraction.svg", + "samtools-flagstat-pct-table.svg", + "samtools-flagstat-table.svg", + "threeprime_misinc_plot.svg", + "multiqc_report.html", + "JK2782_JK2782_TGGCCGATCAACGA_L1_data.txt", + "JK2782_JK2782_TGGCCGATCAACGA_L1_report.html", + "JK2782_JK2782_TGGCCGATCAACGA_L1_summary.txt", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_fastqc_data.txt", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_fastqc_report.html", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_summary.txt", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_fastqc_data.txt", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_fastqc_report.html", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_summary.txt", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.html", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.json", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.log", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_filtered.flagstat", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_filtered.flagstat" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-16T13:00:10.310232" + }, + "read_filtering": { + "content": [ + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_filtered.flagstat:md5,e020b9f057207812f1d7d4c2dc2775c7", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_filtered.flagstat:md5,995f2c36894ef7c9954c924f125a7fb1" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.581877" + }, + "genotyping_vcfs": { + "content": [ + [ + "JK2782_Mammoth_MT_Krause.vcf.gz:header_md5,f028f9b24ba56d5d10a08e033fd10dc4", + "JK2782_hs37d5_chr21-MT.vcf.gz:header_md5,0d967f517a2027bce43b08f0b8ca1e58" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.891115" + }, + "preprocessing": { + "content": [ + [ + "JK2782_JK2782_TGGCCGATCAACGA_L1_data.txt:md5,59a2b8a41a68e90cf1ce8490973eb55a", + "JK2782_JK2782_TGGCCGATCAACGA_L1_summary.txt:md5,32133b6c95c4307ec05287b2626fe962", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_fastqc_data.txt:md5,81e758dc2ae45da2522e723d61534ed8", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_summary.txt:md5,14ac35d492f82bea791844ebc85dac4b", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_fastqc_data.txt:md5,e4a62b746b9ba6889a1a31cde7831b3c", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_summary.txt:md5,9463c4fbc53f6c588cb1652d59566402", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.json:md5,33ac3cf9cb1351d4a72c910c84db9983" + ], + [ + "JK2782_JK2782_TGGCCGATCAACGA_L1_report.html", + "JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz_fastqc_report.html", + "JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz_fastqc_report.html", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.html", + "JK2782_JK2782_TGGCCGATCAACGA_L1.fastp.log" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.4904" + }, + "metagenomics": { + "content": [ + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_metagenomics_fastq_unmapped_other.krakenuniq.report.txt", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_metagenomics_fastq_unmapped_other.krakenuniq.report.txt", + "krakenuniq_taxpasta_table.tsv" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-07-18T11:45:53.966751" + }, + "genotyping": { + "content": [ + [ + "JK2782_Mammoth_MT_Krause.bcftools_stats.txt:md5,61ea79d61d3db9758e2ef15b5e895580", + "JK2782_hs37d5_chr21-MT.bcftools_stats.txt:md5,f8570bcd90546d6543ee4e8aa3363c28" + ], + [ + "JK2782_Mammoth_MT_Krause.vcf.gz.tbi", + "JK2782_hs37d5_chr21-MT.vcf.gz.tbi" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.632792" + }, + "final_bams": { + "content": [ + [ + "JK2782_Mammoth_MT_Krause.flagstat:md5,dc1a84b8181885ff72a221576d916326", + "JK2782_hs37d5_chr21-MT.flagstat:md5,995f2c36894ef7c9954c924f125a7fb1" + ], + [ + "JK2782_Mammoth_MT_Krause.bam", + "JK2782_Mammoth_MT_Krause.bam.bai", + "JK2782_hs37d5_chr21-MT.bam", + "JK2782_hs37d5_chr21-MT.bam.bai" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.339986" + }, + "deduplication": { + "content": [ + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.flagstat:md5,dc1a84b8181885ff72a221576d916326", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.flagstat:md5,995f2c36894ef7c9954c924f125a7fb1" + ], + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.bam", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_dedupped.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.bam", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_dedupped.bam.bai" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.251664" + }, + "multiqc": { + "content": [ + [ + "BETA-multiqc.parquet", + "bcftools-stats-subtypes.txt", + "bcftools_stats_indel-lengths.txt", + "bcftools_stats_variant_depths.txt", + "bcftools_stats_vqc_Count_Indels.txt", + "bcftools_stats_vqc_Count_SNP.txt", + "bcftools_stats_vqc_Count_Transitions.txt", + "bcftools_stats_vqc_Count_Transversions.txt", + "fastp-insert-size-plot.txt", + "fastp-seq-content-gc-plot_Merged_and_filtered.txt", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt", + "fastp-seq-content-n-plot_Merged_and_filtered.txt", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt", + "fastp-seq-quality-plot_Merged_and_filtered.txt", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt", + "fastp-seq-quality-plot_Read_2_Before_filtering.txt", + "fastp_filtered_reads_plot.txt", + "fiveprime_misinc_plot.txt", + "length-distribution-Forward.txt", + "length-distribution-Reverse.txt", + "mapdamage-fiveprime_misinc_plot.txt", + "mapdamage-length-distribution-Forward.txt", + "mapdamage-length-distribution-Reverse.txt", + "mapdamage-threeprime_misinc_plot.txt", + "multiqc.log", + "multiqc_bcftools_stats.txt", + "multiqc_citations.txt", + "multiqc_damageprofiler_metrics.txt", + "multiqc_data.json", + "multiqc_fastp.txt", + "multiqc_general_stats.txt", + "multiqc_qualimap_bamqc_genome_results.txt", + "multiqc_samtools_flagstat.txt", + "multiqc_software_versions.txt", + "multiqc_sources.txt", + "preseq.txt", + "preseq_complexity_plot_molecules.txt", + "qualimap_coverage_histogram.txt", + "qualimap_gc_content.txt", + "qualimap_genome_fraction.txt", + "samtools-flagstat-pct-table.txt", + "samtools-flagstat-table.txt", + "threeprime_misinc_plot.txt", + "bcftools-stats-subtypes-cnt.pdf", + "bcftools-stats-subtypes-pct.pdf", + "bcftools_stats_indel-lengths-cnt.pdf", + "bcftools_stats_indel-lengths-log.pdf", + "bcftools_stats_variant_depths.pdf", + "bcftools_stats_vqc_Count_Indels.pdf", + "bcftools_stats_vqc_Count_SNP.pdf", + "bcftools_stats_vqc_Count_Transitions.pdf", + "bcftools_stats_vqc_Count_Transversions.pdf", + "fastp-insert-size-plot.pdf", + "fastp-seq-content-gc-plot_Merged_and_filtered.pdf", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf", + "fastp-seq-content-n-plot_Merged_and_filtered.pdf", + "fastp-seq-content-n-plot_Read_1_Before_filtering.pdf", + "fastp-seq-content-n-plot_Read_2_Before_filtering.pdf", + "fastp-seq-quality-plot_Merged_and_filtered.pdf", + "fastp-seq-quality-plot_Read_1_Before_filtering.pdf", + "fastp-seq-quality-plot_Read_2_Before_filtering.pdf", + "fastp_filtered_reads_plot-cnt.pdf", + "fastp_filtered_reads_plot-pct.pdf", + "fiveprime_misinc_plot.pdf", + "length-distribution-Forward.pdf", + "length-distribution-Reverse.pdf", + "mapdamage-fiveprime_misinc_plot.pdf", + "mapdamage-length-distribution-Forward.pdf", + "mapdamage-length-distribution-Reverse.pdf", + "mapdamage-threeprime_misinc_plot.pdf", + "preseq_complexity_plot_molecules.pdf", + "qualimap_coverage_histogram.pdf", + "qualimap_gc_content.pdf", + "qualimap_genome_fraction.pdf", + "samtools-flagstat-pct-table.pdf", + "samtools-flagstat-table.pdf", + "threeprime_misinc_plot.pdf", + "bcftools-stats-subtypes-cnt.png", + "bcftools-stats-subtypes-pct.png", + "bcftools_stats_indel-lengths-cnt.png", + "bcftools_stats_indel-lengths-log.png", + "bcftools_stats_variant_depths.png", + "bcftools_stats_vqc_Count_Indels.png", + "bcftools_stats_vqc_Count_SNP.png", + "bcftools_stats_vqc_Count_Transitions.png", + "bcftools_stats_vqc_Count_Transversions.png", + "fastp-insert-size-plot.png", + "fastp-seq-content-gc-plot_Merged_and_filtered.png", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.png", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.png", + "fastp-seq-content-n-plot_Merged_and_filtered.png", + "fastp-seq-content-n-plot_Read_1_Before_filtering.png", + "fastp-seq-content-n-plot_Read_2_Before_filtering.png", + "fastp-seq-quality-plot_Merged_and_filtered.png", + "fastp-seq-quality-plot_Read_1_Before_filtering.png", + "fastp-seq-quality-plot_Read_2_Before_filtering.png", + "fastp_filtered_reads_plot-cnt.png", + "fastp_filtered_reads_plot-pct.png", + "fiveprime_misinc_plot.png", + "length-distribution-Forward.png", + "length-distribution-Reverse.png", + "mapdamage-fiveprime_misinc_plot.png", + "mapdamage-length-distribution-Forward.png", + "mapdamage-length-distribution-Reverse.png", + "mapdamage-threeprime_misinc_plot.png", + "preseq_complexity_plot_molecules.png", + "qualimap_coverage_histogram.png", + "qualimap_gc_content.png", + "qualimap_genome_fraction.png", + "samtools-flagstat-pct-table.png", + "samtools-flagstat-table.png", + "threeprime_misinc_plot.png", + "bcftools-stats-subtypes-cnt.svg", + "bcftools-stats-subtypes-pct.svg", + "bcftools_stats_indel-lengths-cnt.svg", + "bcftools_stats_indel-lengths-log.svg", + "bcftools_stats_variant_depths.svg", + "bcftools_stats_vqc_Count_Indels.svg", + "bcftools_stats_vqc_Count_SNP.svg", + "bcftools_stats_vqc_Count_Transitions.svg", + "bcftools_stats_vqc_Count_Transversions.svg", + "fastp-insert-size-plot.svg", + "fastp-seq-content-gc-plot_Merged_and_filtered.svg", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.svg", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.svg", + "fastp-seq-content-n-plot_Merged_and_filtered.svg", + "fastp-seq-content-n-plot_Read_1_Before_filtering.svg", + "fastp-seq-content-n-plot_Read_2_Before_filtering.svg", + "fastp-seq-quality-plot_Merged_and_filtered.svg", + "fastp-seq-quality-plot_Read_1_Before_filtering.svg", + "fastp-seq-quality-plot_Read_2_Before_filtering.svg", + "fastp_filtered_reads_plot-cnt.svg", + "fastp_filtered_reads_plot-pct.svg", + "fiveprime_misinc_plot.svg", + "length-distribution-Forward.svg", + "length-distribution-Reverse.svg", + "mapdamage-fiveprime_misinc_plot.svg", + "mapdamage-length-distribution-Forward.svg", + "mapdamage-length-distribution-Reverse.svg", + "mapdamage-threeprime_misinc_plot.svg", + "preseq_complexity_plot_molecules.svg", + "qualimap_coverage_histogram.svg", + "qualimap_gc_content.svg", + "qualimap_genome_fraction.svg", + "samtools-flagstat-pct-table.svg", + "samtools-flagstat-table.svg", + "threeprime_misinc_plot.svg", + "multiqc_report.html" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-16T13:00:10.794583" + }, + "mapping_output": { + "content": [ + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.flagstat:md5,1b7e5d27190dc712f107f4e274d43378", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.flagstat:md5,f5b65baf228adddd68359f5dad5162a6" + ], + [ + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.bam", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_sorted.bam.bai", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.bam", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_sorted.bam.bai" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-06-20T11:36:35.419002" + }, + "authentication": { + "content": [ + [ + "JK2782_Mammoth_MT_Krause_depth.bed:md5,fb8622fe4f5c61d492dccaf4aada12f8", + "3pGtoA_freq.txt:md5,25a4caf935e2f9e515b0ae3296eaaefa", + "5pCtoT_freq.txt:md5,96e6e1f7a092c2d74c8c1b6d92107b4f", + "dmgprof.json:md5,3217dc500f1e092744dcde51f0cfd136", + "editDistance.txt:md5,8153fa52a92ec3159c3cb9541b473ce4", + "lgdistribution.txt:md5,079157ae272bceb736ffeebea78ac895", + "3pGtoA_freq.txt:md5,1f420f7930f6966d868b386dd3289ff9", + "5pCtoT_freq.txt:md5,381bd0d3782f745b48d20a4024e88d0d", + "dmgprof.json:md5,9220a6e588b97dc37d643b4c5b11361d", + "editDistance.txt:md5,97f16676725302ebe8ceeede42a8d7fd", + "lgdistribution.txt:md5,173232f3c31bc70869f0069b9694e6e8", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause_percent_on_target_mqc.json:md5,a0ba9e85a612bc78341a40d7c37913d7", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT_percent_on_target_mqc.json:md5,cc397422fe920aae9a50c6f176320fd3", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.c_curve.txt:md5,07edc21d807f0ba05e1ebc25dbf1a6f5", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.c_curve.txt:md5,cf4743abdd355595d6ec1fb3f38e66e5", + "agogo.css:md5,bd757b1a7ce6fdc0288ba148680f4583", + "ajax-loader.gif:md5,ae6667053ad118020b8e68ccf307b519", + "basic.css:md5,25b2823342c0604924a2870eeb4e7e94", + "doctools.js:md5,5ff571aa60e63f69c1890283e240ff8d", + "jquery.js:md5,10092eee563dec2dca82b77d2cf5a1ae", + "pygments.css:md5,d625a0adb949f181bd0d3f1432b0fa7f", + "report.css:md5,7a5f09eaf7c176f966f4e8854168b812", + "searchtools.js:md5,d550841adeedc8ed47c40ee607620937", + "underscore.js:md5,db5ba047a66617d4cd3e8c5099cc51db", + "websupport.js:md5,9e61e1e8a7433c56bd7e5a615affcf85", + "coverage_across_reference.txt:md5,b001e266a182e478d28e431bdaed32db", + "coverage_histogram.txt:md5,2ce0583063044147b0d9d96d6a0635c3", + "duplication_rate_histogram.txt:md5,e6c9e9b9fb0bc1afa5f86a88d50edb3b", + "genome_fraction_coverage.txt:md5,a1ced5dc2e0f55b9b5147822c31994bb", + "homopolymer_indels.txt:md5,2cc3cb5d9105a4d5feca2b3dbe32494e", + "mapped_reads_clipping_profile.txt:md5,402f69e636aa0fca915459c0c1a00a9f", + "mapping_quality_across_reference.txt:md5,89eba94fe07faa8ddcb52f264e3dc5ab", + "mapping_quality_histogram.txt:md5,d7fb55f12bf19b4ee41facb02262f590", + "agogo.css:md5,bd757b1a7ce6fdc0288ba148680f4583", + "ajax-loader.gif:md5,ae6667053ad118020b8e68ccf307b519", + "basic.css:md5,25b2823342c0604924a2870eeb4e7e94", + "doctools.js:md5,5ff571aa60e63f69c1890283e240ff8d", + "jquery.js:md5,10092eee563dec2dca82b77d2cf5a1ae", + "pygments.css:md5,d625a0adb949f181bd0d3f1432b0fa7f", + "report.css:md5,7a5f09eaf7c176f966f4e8854168b812", + "searchtools.js:md5,d550841adeedc8ed47c40ee607620937", + "underscore.js:md5,db5ba047a66617d4cd3e8c5099cc51db", + "websupport.js:md5,9e61e1e8a7433c56bd7e5a615affcf85" + ], + [ + "3p_freq_misincorporations.txt", + "5p_freq_misincorporations.txt", + "DNA_comp_genome.txt", + "DNA_composition_sample.txt", + "DamagePlot.pdf", + "DamagePlot_five_prime.svg", + "DamagePlot_three_prime.svg", + "DamageProfiler.log", + "Length_plot.pdf", + "Length_plot_combined_data.svg", + "Length_plot_forward_reverse_separated.svg", + "edit_distance.pdf", + "edit_distance.svg", + "misincorporation.txt", + "3p_freq_misincorporations.txt", + "5p_freq_misincorporations.txt", + "DNA_comp_genome.txt", + "DNA_composition_sample.txt", + "DamagePlot.pdf", + "DamagePlot_five_prime.svg", + "DamagePlot_three_prime.svg", + "DamageProfiler.log", + "Length_plot.pdf", + "Length_plot_combined_data.svg", + "Length_plot_forward_reverse_separated.svg", + "edit_distance.pdf", + "edit_distance.svg", + "misincorporation.txt", + "JK2782_JK2782_TGGCCGATCAACGA_Mammoth_MT_Krause.command.log", + "JK2782_JK2782_TGGCCGATCAACGA_hs37d5_chr21-MT.command.log", + "bgfooter.png", + "bgtop.png", + "comment-bright.png", + "comment-close.png", + "comment.png", + "down-pressed.png", + "down.png", + "file.png", + "minus.png", + "plus.png", + "qualimap_logo_small.png", + "up-pressed.png", + "up.png", + "genome_results.txt", + "genome_coverage_0to50_histogram.png", + "genome_coverage_across_reference.png", + "genome_coverage_histogram.png", + "genome_coverage_quotes.png", + "genome_gc_content_per_window.png", + "genome_homopolymer_indels.png", + "genome_mapping_quality_across_reference.png", + "genome_mapping_quality_histogram.png", + "genome_reads_clipping_profile.png", + "genome_reads_content_per_read_position.png", + "genome_uniq_read_starts_histogram.png", + "qualimapReport.html", + "mapped_reads_gc-content_distribution.txt", + "mapped_reads_nucleotide_content.txt", + "bgfooter.png", + "bgtop.png", + "comment-bright.png", + "comment-close.png", + "comment.png", + "down-pressed.png", + "down.png", + "file.png", + "minus.png", + "plus.png", + "qualimap_logo_small.png", + "up-pressed.png", + "up.png", + "genome_results.txt", + "qualimapReport.html" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-16T13:00:10.465714" + } +} \ No newline at end of file