diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 861caa48a..364f52f5e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,7 +64,7 @@ jobs: nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --save_reference - name: REFERENCE Basic workflow, with supplied indices run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --bwa_index 'results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta' --fasta_index 'https://github.com/nf-core/test-datasets/blob/eager/reference/Mammoth/Mammoth_MT_Krause.fasta.fai' + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --bwa_index 'results/reference_genome/bwa_index/BWAIndex/' --fasta_index 'https://github.com/nf-core/test-datasets/blob/eager/reference/Mammoth/Mammoth_MT_Krause.fasta.fai' - name: REFERENCE Run the basic pipeline with FastA reference with `fna` extension run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_fna,docker @@ -103,7 +103,7 @@ jobs: nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --mapper 'bowtie2' --bt2_alignmode 'local' --bt2_sensitivity 'sensitive' --bt2n 1 --bt2l 16 --bt2_trim5 1 --bt2_trim3 1 - name: STRIP_FASTQ Run the basic pipeline with output unmapped reads as fastq run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --strip_input_fastq + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --strip_input_fastq - name: BAM_FILTERING Run basic mapping pipeline with mapping quality filtering, and unmapped export run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_mapping_quality_threshold 37 --bam_unmapped_type 'fastq' diff --git a/assets/dummy.txt b/assets/dummy.txt deleted file mode 100644 index 3d370b26b..000000000 --- a/assets/dummy.txt +++ /dev/null @@ -1 +0,0 @@ -Empty dummy file for processes with optional input but channels still need to be filled. \ No newline at end of file diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 958e06c37..dee97e4ce 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -161,6 +161,7 @@ table_columns_visible: 1_x_pc: True 5_x_pc: True percentage_aligned: False + median_insert_size: False MultiVCFAnalyzer: Heterozygous SNP alleles (percent): True endorSpy: @@ -204,11 +205,11 @@ table_columns_placement: flagstat_total: 551 mapped_passed: 552 Samtools Flagstat (post-samtools filter): - flagstat_total: 553 - mapped_passed: 554 + flagstat_total: 600 + mapped_passed: 620 endorSpy: - endogenous_dna: 600 - endogenous_dna_post: 610 + endogenous_dna: 610 + endogenous_dna_post: 640 nuclear_contamination: Num_SNPs: 1100 Method1_MOM_estimate: 1110 diff --git a/assets/nf-core_eager_dummy.txt b/assets/nf-core_eager_dummy.txt new file mode 100644 index 000000000..e0517908b --- /dev/null +++ b/assets/nf-core_eager_dummy.txt @@ -0,0 +1 @@ +This is a dummy file for when we need a 'fake' file to satisfy all nextflow channel inputs being filled, even if we actually only use one. \ No newline at end of file diff --git a/assets/nf-core_eager_dummy2.txt b/assets/nf-core_eager_dummy2.txt new file mode 100644 index 000000000..3eb0a1670 --- /dev/null +++ b/assets/nf-core_eager_dummy2.txt @@ -0,0 +1 @@ +This is a second dummy file for when we need a 'fake' file to satisfy all nextflow channel inputs being filled, even if we actually only use one. \ No newline at end of file diff --git a/conf/benchmarking_vikingfish.config b/conf/benchmarking_vikingfish.config index 060604639..b0c456c61 100644 --- a/conf/benchmarking_vikingfish.config +++ b/conf/benchmarking_vikingfish.config @@ -12,7 +12,7 @@ params { config_profile_description = "A 'fullsized' benchmarking profile for deepish sequencing aDNA data" //Input data - input = 'https://raw.githubusercontent.com/jfy133/test-datasets/eager/testdata/Benchmarking/benchmarking_vikingfish.tsv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Benchmarking/benchmarking_vikingfish.tsv' // Genome reference fasta = 'https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_other/Gadus_morhua/representative/GCF_902167405.1_gadMor3.0/GCF_902167405.1_gadMor3.0_genomic.fna.gz' diff --git a/conf/benchmarking_vikingfish_single.config b/conf/benchmarking_vikingfish_single.config new file mode 100644 index 000000000..41bc8f8cd --- /dev/null +++ b/conf/benchmarking_vikingfish_single.config @@ -0,0 +1,56 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run nf-core/eager -profile test, docker (or singularity, or conda) + */ + +params { + config_profile_name = 'nf-core/eager benchmarking - Viking Fish profile' + config_profile_description = "A 'fullsized' benchmarking profile for deepish sequencing aDNA data" + + //Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Benchmarking/benchmarking_vikingfish_single.tsv' + // Genome reference + fasta = 'https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_other/Gadus_morhua/representative/GCF_902167405.1_gadMor3.0/GCF_902167405.1_gadMor3.0_genomic.fna.gz' + + bwaalnn = 0.04 + bwaalnl = 1024 + + run_bam_filtering = true + bam_discard_unmapped = true + bam_unmapped_type = 'discard' + bam_mapping_quality_threshold = 25 + + run_genotyping = true + genotyping_tool = 'hc' + genotyping_source = 'raw' + gatk_ploidy = 2 + +} + +process { + withName:'adapter_removal'{ + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 2.h * task.attempt, 'time' ) } + } + withName:'bwa'{ + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withName:'dedup'{ + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withName:'genotyping_hc'{ + cpus = { check_max( 8, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + +} diff --git a/docs/output.md b/docs/output.md index d51c2e301..40d29a513 100644 --- a/docs/output.md +++ b/docs/output.md @@ -98,15 +98,16 @@ The possible columns displayed by default are as follows: - **Mappability** This is from MALT. It reports the percentage of the off-target reads (from mapping), that could map to your MALT metagenomic database. This can often be low for aDNA due to short reads and database bias. - **% Unclassified** This is from Kraken. It reports the percentage of reads that could not be aligned and taxonomically assigned against your Kraken metagenomic database. This can often be high for aDNA due to short reads and database bias. - **Reads Mapped** This is from Samtools. This is the raw number of preprocessed reads mapped to your reference genome _prior_ map quality filtering and deduplication. -- **Reads Mapped** This is from Samtools. This is the raw number of preprocessed reads mapped to your reference genome _after_ map quality filtering and deduplication (note the column name does not distinguish itself from prior-map quality filtering, but the post-filter column is always second) - **Endogenous DNA (%)** This is from the endorS.py tool. It displays a percentage of mapped reads over total reads that went into mapped (i.e. the percentage DNA content of the library that matches the reference). Assuming a perfect ancient sample with no modern contamination, this would be the amount of true ancient DNA in the sample. However this value _most likely_ include contamination and will not entirely be the true 'endogenous' content. +- **Reads Mapped** This is from Samtools. This is the raw number of preprocessed reads mapped to your reference genome _after_ map quality filtering and deduplication (note the column name does not distinguish itself from prior-map quality filtering, but the post-filter column is always second) - **Endogenous DNA Post (%)** This is from the endorS.py tool. It displays a percentage of mapped reads _after_ BAM filtering (e.g. for mapping quality) over total reads that went into mapped (i.e. the percentage DNA content of the library that matches the reference). This column will only be displayed if BAM filtering is turned on and is based on the original mapping for total reads, and mapped reads as calculated from the post-filtering BAM. - **ClusterFactor** This is from DeDup. This is a value representing the how many duplicates in the library exist for each unique read. A cluster factor close to one replicates a highly complex library and could be sequenced further. Generally with a value of more than 2 you will not be gaining much more information by sequencing deeper. - **Dups** This is from Picard's markDuplicates. It represents the percentage of reads in your library that were exact duplicates of other reads in your database. The lower the better, as high duplication rate means lots of sequencing of the same information (and therefore is not time or cost effective). - **X Prime Y>Z N base** These columns are from DamageProfiler. The prime numbers represent which end of the reads the damage is referring to. The Y>Z is the type of substitution (C>T is the true damage, G>A is the complementary). You should see for no- and half- UDG treatment a decrease in frequency from the 1st to 2nd base. - **Mean Read Length** This is from DamageProfiler. This is the mean length of all de-duplicated mapped reads. Ancient DNA normally will have a mean between 30-75, however this can vary. - **Median Read Length** This is from DamageProfiler. This is the median length of all de-duplicated mapped reads. Ancient DNA normally will have a mean between 30-75, however this can vary. -- **Coverage** This is from Qualimap. This is the median number of times a base on your reference genome was covered by a read (i.e. depth coverage).. This average includes bases with 0 reads covering that position. +- **Algined** This is from Qualimap. This is the total number of _deduplicated_ reads that mapped to your reference genome. +- **Mean/Median Coverage** This is from Qualimap. This is the mean/median number of times a base on your reference genome was covered by a read (i.e. depth coverage). This average includes bases with 0 reads covering that position. - **>= 1X** to **>= 5X** These are from Qualimap. This is the percentage of the genome covered at that particular depth coverage. - **% GC** This is the mean GC content in percent of all mapped reads post-deduplication. This should normally be close to the GC content of your reference genome. - **MT to Nuclear Ratio** This from MTtoNucRatio. This reports the number of reads aligned to a mitochondrial entry in your reference FASTA to all other entries. This will typically be high but will vary depending on tissue type. diff --git a/docs/usage.md b/docs/usage.md index 0aed90ad2..1c15121c4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -192,6 +192,7 @@ If you have multiple files in different directories, you can use additional wild 4. When using the pipeline with **paired end data**, the path must use `{1,2}` notation to specify read pairs. 5. Files names must be unique, having files with the same name, but in different directories is _not_ sufficient - This can happen when a library has been sequenced across two sequencers on the same lane. Either rename the file, try a symlink with a unique name, or merge the two FASTQ files prior input. +6. Due to limitations of downstream tools (e.g. FastQC), sample IDs maybe truncated after the first `.` in the name, Ensure file names are unique prior to this! ##### TSV Input Method @@ -362,7 +363,7 @@ Use this if you do not have pre-made reference FASTA indices for `bwa`, `samtool #### `--bwa_index` -If you want to use pre-existing `bwa index` indices, please supply the path **and file** to the FASTA you also specified in `--fasta` (see above). EAGER2 will automagically detect the index files by searching for the FASTA filename with the corresponding `bwa` index file suffixes. +If you want to use pre-existing `bwa index` indices, please supply the **directory** to the FASTA you also specified in `--fasta` (see above). EAGER2 will automagically detect the index files by searching for the FASTA filename with the corresponding `bwa` index file suffixes. For example: @@ -371,7 +372,7 @@ nextflow run nf-core/eager \ -profile test,docker \ --input '*{R1,R2}*.fq.gz' --fasta 'results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta' \ ---bwa_index 'results/reference_genome/bwa_index/BWAIndex/Mammoth_MT_Krause.fasta' +--bwa_index 'results/reference_genome/bwa_index/BWAIndex/' ``` > `bwa index` does not give you an option to supply alternative suffixes/names for these indices. Thus, the file names generated by this command _must not_ be changed, otherwise EAGER2 will not be able to find them. @@ -734,6 +735,8 @@ Sets DeDup to treat all reads as merged reads. This is useful if reads are for e ### Library Complexity Estimation Parameters +nf-core/eager uses Preseq on map reads as one method to calculate library complexity. If DeDup is used, Preseq uses the historigram output of DeDup, otherwise the sored non-duplicated BAM file is supplied. Furthermore, if paired-end read collapsing is not performed, the `-P` flag is used. + #### `--preseq_step_size` Can be used to configure the step size of Preseqs `c_curve` method. Can be useful when only few and thus shallow sequencing results are used for extrapolation. diff --git a/main.nf b/main.nf index 7a8b743f0..2d5d8ee43 100644 --- a/main.nf +++ b/main.nf @@ -253,42 +253,45 @@ println "" //////////////////////////////////////////////////// /* -- VALIDATE INPUTS -- */ //////////////////////////////////////////////////// +/**FASTA input handling +**/ -// Validate reference inputs -if ( params.fasta.isEmpty () ){ - exit 1, "[nf-core/eager] error: please specify --fasta with the path to your reference" -} else if("${params.fasta}".endsWith(".gz")){ - //Put the zip into a channel, then unzip it and forward to downstream processes. DONT unzip in all steps, this is inefficient as NXF links the files anyways from work to work dir - zipped_fasta = file("${params.fasta}") +if (params.fasta) { + file(params.fasta, checkIfExists: true) - rm_gz = params.fasta - '.gz' - lastPath = rm_gz.lastIndexOf(File.separator) - bwa_base = rm_gz.substring(lastPath+1) + lastPath = params.fasta.lastIndexOf(File.separator) + lastExt = params.fasta.lastIndexOf(".") + fasta_base = params.fasta.substring(lastPath+1) + index_base = params.fasta.substring(lastPath+1,lastExt) + if (params.fasta.endsWith('.gz')) { + fasta_base = params.fasta.substring(lastPath+1,lastExt) + index_base = fasta_base.substring(0,fasta_base.lastIndexOf(".")) + } +} else { + exit 1, "[nf-core/eager] error: please specify --fasta with the path to your reference" +} +// Validate reference inputs +if("${params.fasta}".endsWith(".gz")){ process unzip_reference{ tag "${zipped_fasta}" input: - file zipped_fasta + path zipped_fasta from params.fasta output: - file "*.{fa,fn,fna,fasta}" into ch_fasta_for_bwaindex,ch_fasta_for_bt2index,ch_fasta_for_faidx,ch_fasta_for_seqdict,ch_fasta_for_circulargenerator,ch_fasta_for_circularmapper,ch_fasta_for_damageprofiler,ch_fasta_for_qualimap,ch_fasta_for_pmdtools,ch_fasta_for_genotyping_ug,ch_fasta_for_genotyping_hc,ch_fasta_for_genotyping_freebayes,ch_fasta_for_genotyping_pileupcaller,ch_fasta_for_vcf2genome,ch_fasta_for_multivcfanalyzer,ch_fasta_for_genotyping_angsd + path "$unzip" into ch_fasta into ch_fasta_for_bwaindex,ch_fasta_for_bt2index,ch_fasta_for_faidx,ch_fasta_for_seqdict,ch_fasta_for_circulargenerator,ch_fasta_for_circularmapper,ch_fasta_for_damageprofiler,ch_fasta_for_qualimap,ch_fasta_for_pmdtools,ch_fasta_for_genotyping_ug,ch_fasta_for_genotyping_hc,ch_fasta_for_genotyping_freebayes,ch_fasta_for_genotyping_pileupcaller,ch_fasta_for_vcf2genome,ch_fasta_for_multivcfanalyzer,ch_fasta_for_genotyping_angsd script: - rm_zip = zipped_fasta - '.gz' + unzip = zipped_fasta.toString() - '.gz' """ pigz -f -d -p ${task.cpus} $zipped_fasta """ } - } else { fasta_for_indexing = Channel .fromPath("${params.fasta}", checkIfExists: true) .into{ ch_fasta_for_bwaindex; ch_fasta_for_bt2index; ch_fasta_for_faidx; ch_fasta_for_seqdict; ch_fasta_for_circulargenerator; ch_fasta_for_circularmapper; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_genotyping_ug; ch_fasta__for_genotyping_hc; ch_fasta_for_genotyping_hc; ch_fasta_for_genotyping_freebayes; ch_fasta_for_genotyping_pileupcaller; ch_fasta_for_vcf2genome; ch_fasta_for_multivcfanalyzer;ch_fasta_for_genotyping_angsd } - - lastPath = params.fasta.lastIndexOf(File.separator) - bwa_base = params.fasta.substring(lastPath+1) - bt2_base = params.fasta.substring(lastPath+1) } // Check that fasta index file path ends in '.fai' @@ -321,16 +324,12 @@ if (params.bt2n != 0 && params.bt2n != 1) { // Index files provided? Then check whether they are correct and complete if( params.bwa_index != '' && (params.mapper == 'bwaaln' | params.mapper == 'bwamem')){ - lastPath = params.bwa_index.lastIndexOf(File.separator) - bwa_dir = params.bwa_index.substring(0,lastPath+1) - bwa_base = params.bwa_index.substring(lastPath+1) - Channel - .fromPath(bwa_dir, checkIfExists: true) - .ifEmpty { exit 1, "[nf-core/eager] error: bwa indicies not found in: ${bwa_dir}." } + .fromPath(params.bwa_index, checkIfExists: true) + .ifEmpty { exit 1, "[nf-core/eager] error: bwa indices not found in: ${index_base}." } .into {bwa_index; bwa_index_bwamem} - bt2_index = '' + bt2_index = Channel.empty() } if( params.bt2_index != '' && params.mapper == 'bowtie2' ){ @@ -339,11 +338,11 @@ if( params.bt2_index != '' && params.mapper == 'bowtie2' ){ bt2_base = params.bt2_index.substring(lastPath+1) Channel - .fromPath(bt2_dir, checkIfExists: true) - .ifEmpty { exit 1, "[nf-core/eager] error: bowtie2 indicies not found in: ${bt2_dir}." } + .fromPath(params.bt2_index, checkIfExists: true) + .ifEmpty { exit 1, "[nf-core/eager] error: bowtie2 indices not found in: ${bt2_dir}." } .into {bt2_index; bt2_index_bwamem} - bwa_index = '' + bwa_index = Channel.empty() } // Validate BAM input isn't set to paired_end @@ -768,12 +767,12 @@ if( params.bwa_index == '' && !params.fasta.isEmpty() && (params.mapper == 'bwaa } input: - file fasta from ch_fasta_for_bwaindex - file where_are_my_files + path fasta from ch_fasta_for_bwaindex + path where_are_my_files output: - file "BWAIndex" into (bwa_index, bwa_index_bwamem) - file "where_are_my_files.txt" + path "BWAIndex" into (bwa_index, bwa_index_bwamem) + path "where_are_my_files.txt" script: """ @@ -781,7 +780,7 @@ if( params.bwa_index == '' && !params.fasta.isEmpty() && (params.mapper == 'bwaa mkdir BWAIndex && mv ${fasta}* BWAIndex """ } - bt2_index = 'none' + bt2_index = Channel.empty() } // bowtie2 Index @@ -796,12 +795,12 @@ if(params.bt2_index == '' && !params.fasta.isEmpty() && params.mapper == "bowtie } input: - file fasta from ch_fasta_for_bt2index - file where_are_my_files + path fasta from ch_fasta_for_bt2index + path where_are_my_files output: - file "BT2Index" into (bt2_index) - file "where_are_my_files.txt" + path "BT2Index" into (bt2_index) + path "where_are_my_files.txt" script: """ @@ -810,8 +809,8 @@ if(params.bt2_index == '' && !params.fasta.isEmpty() && params.mapper == "bowtie """ } - bwa_index = 'none' - bwa_index_bwamem = 'none' + bwa_index = Channel.empty() + bwa_index_bwamem = Channel.empty() } @@ -838,12 +837,12 @@ process makeFastaIndex { when: params.fasta_index == '' && !params.fasta.isEmpty() && ( params.mapper == 'bwaaln' || params.mapper == 'bwamem' || params.mapper == 'circularmapper') input: - file fasta from ch_fasta_for_faidx - file where_are_my_files + path fasta from ch_fasta_for_faidx + path where_are_my_files output: - file "*.fai" into ch_fasta_faidx_index - file "where_are_my_files.txt" + path "*.fai" into ch_fasta_faidx_index + path "where_are_my_files.txt" script: """ @@ -878,12 +877,12 @@ process makeSeqDict { when: params.seq_dict == '' && !params.fasta.isEmpty() input: - file fasta from ch_fasta_for_seqdict - file where_are_my_files + path fasta from ch_fasta_for_seqdict + path where_are_my_files output: - file "*.dict" into ch_seq_dict - file "where_are_my_files.txt" + path "*.dict" into ch_seq_dict + path "where_are_my_files.txt" script: """ @@ -907,10 +906,10 @@ process convertBam { params.run_convertinputbam input: - tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, file(bam) from ch_input_for_convertbam + tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, path(bam) from ch_input_for_convertbam output: - tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, file("*fastq.gz"), val('NA') into ch_output_from_convertbam + tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, path("*fastq.gz"), val('NA') into ch_output_from_convertbam script: base = "${bam.baseName}" @@ -928,10 +927,10 @@ process indexinputbam { bam != 'NA' && !params.run_convertinputbam input: - tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, file(bam) from ch_input_for_indexbam + tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, path(bam) from ch_input_for_indexbam output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file("*.{bai,csi}") into ch_indexbam_for_filtering + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), file("*.{bai,csi}") into ch_indexbam_for_filtering script: def size = params.large_ref ? '-c' : '' @@ -956,14 +955,14 @@ process fastqc { publishDir "${params.outdir}/FastQC/input_fastq", mode: 'copy', saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} - when: - !params.skip_fastqc - input: tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_convertbam_for_fastqc output: - file "*_fastqc.{zip,html}" into ch_prefastqc_for_multiqc + path "*_fastqc.{zip,html}" into ch_prefastqc_for_multiqc + + when: + !params.skip_fastqc script: if ( seqtype == 'PE' ) { @@ -1009,8 +1008,8 @@ process fastp { tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_input_for_fastp.twocol output: - tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, file("*.pG.fq.gz") into ch_output_from_fastp - file("*.json") into ch_fastp_for_multiqc + tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, path("*.pG.fq.gz") into ch_output_from_fastp + path("*.json") into ch_fastp_for_multiqc script: if( seqtype == 'SE' ){ @@ -1035,7 +1034,7 @@ ch_input_for_fastp.fourcol def strandedness = it[6] def udg = it[7] def r1 = it[8] - def r2 = seqtype == 'PE' ? it[9] : 'NA' + def r2 = seqtype == "PE" ? it[9] : file("$baseDir/assets/nf-core_eager_dummy.txt") [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] @@ -1051,8 +1050,8 @@ ch_output_from_fastp def organism = it[5] def strandedness = it[6] def udg = it[7] - def r1 = it[8].getClass() == ArrayList ? it[8].sort()[0] : it[8] - def r2 = seqtype == 'PE' ? it[8].sort()[1] : 'NA' + def r1 = it[8] instanceof ArrayList ? it[8].sort()[0] : it[8] + def r2 = seqtype == "PE" ? it[8].sort()[1] : file("$baseDir/assets/nf-core_eager_dummy.txt") [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] @@ -1073,9 +1072,9 @@ process adapter_removal { tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_fastp_for_adapterremoval output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("output/*{combined.fq,.se.truncated,pair1.truncated}.gz") into ch_output_from_adapterremoval_r1 - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("output/*pair2.truncated.gz") optional true into ch_output_from_adapterremoval_r2 - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("output/*.settings") into ch_adapterremoval_logs + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*{combined.fq,.se.truncated,pair1.truncated}.gz") into ch_output_from_adapterremoval_r1 + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*pair2.truncated.gz") optional true into ch_output_from_adapterremoval_r2 + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("output/*.settings") into ch_adapterremoval_logs when: !params.skip_adapterremoval @@ -1155,7 +1154,7 @@ if ( params.skip_collapse ){ def strandedness = it[5] def udg = it[6] def r1 = file(it[7].sort()[0]) - def r2 = seqtype == 'PE' ? file(it[7].sort()[1]) : 'NA' + def r2 = seqtype == "PE" ? file(it[7].sort()[1]) : file("$baseDir/assets/nf-core_eager_dummy.txt") [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] @@ -1173,7 +1172,7 @@ if ( params.skip_collapse ){ def strandedness = it[5] def udg = it[6] def r1 = file(it[7]) - def r2 = 'NA' + def r2 = file("$baseDir/assets/nf-core_eager_dummy.txt") [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] } @@ -1184,6 +1183,7 @@ if ( params.skip_collapse ){ if (!params.skip_adapterremoval) { ch_output_from_adapterremoval.mix(ch_fastp_for_skipadapterremoval) .filter { it =~/.*combined.fq.gz|.*truncated.gz/ } + .dump(tag: "AR Bypass") .into { ch_adapterremoval_for_fastqc_after_clipping; ch_adapterremoval_for_lanemerge; } } else { ch_fastp_for_skipadapterremoval @@ -1208,21 +1208,60 @@ ch_branched_for_lanemerge = ch_adapterremoval_for_lanemerge [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] } + .dump(tag: "LaneMerge Bypass") .branch { skip_merge: it[7].size() == 1 // Can skip merging if only single lanes merge_me: it[7].size() > 1 } +ch_branched_for_lanemerge_skipme = ch_branched_for_lanemerge.skip_merge + .map{ + it -> + def samplename = it[0] + def libraryid = it[1] + def lane = it[2] + def seqtype = it[3] + def organism = it[4] + def strandedness = it[5] + def udg = it[6] + def r1 = it[7][0] + def r2 = it[8][0] + + [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] + } + .dump(tag: "LaneMerge Reconfigure") + + +ch_branched_for_lanemerge_ready = ch_branched_for_lanemerge.merge_me + .map{ + it -> + def samplename = it[0] + def libraryid = it[1] + def lane = it[2] + def seqtype = it[3] + def organism = it[4] + def strandedness = it[5] + def udg = it[6] + def r1 = it[7] + + // find and remove duplicate dummies to prevent file collision error + def r2 = it[8]*.toString() + r2.removeAll{ it == "$baseDir/assets/nf-core_eager_dummy.txt" } + + [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] + } + process lanemerge { label 'sc_tiny' tag "${libraryid}" publishDir "${params.outdir}/lanemerging", mode: 'copy' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_branched_for_lanemerge.merge_me + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(r1), path(r2) from ch_branched_for_lanemerge_ready output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.fq.gz") into ch_lanemerge_for_mapping + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*_R1_lanemerged.fq.gz") into ch_lanemerge_for_mapping_r1 + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*_R2_lanemerged.fq.gz") optional true into ch_lanemerge_for_mapping_r2 script: if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){ @@ -1233,32 +1272,54 @@ process lanemerge { """ } else { """ - cat ${r1} > "${libraryid}"_lanemerged.fq.gz + cat ${r1} > "${libraryid}"_R1_lanemerged.fq.gz """ } } -ch_lanemerge_for_mapping - .map { - def samplename = it[0] - def libraryid = it[1] - def lane = it[2] - def seqtype = it[3] - def organism = it[4] - def strandedness = it[5] - def udg = it[6] - def reads = arrayify(it[7]) - def r1 = it[7].getClass() == ArrayList ? reads[0] : it[7] - def r2 = reads[1] ? reads[1] : "NA" - +// Ensuring always valid R2 file even if doesn't exist for AWS +if ( ( params.skip_collapse || params.skip_adapterremoval ) ) { + ch_lanemerge_for_mapping_r1 + .dump(tag: "Post LaneMerge Reconfigure") + .mix(ch_lanemerge_for_mapping_r2) + .groupTuple(by: [0,1,2,3,4,5,6]) + .map{ + it -> + def samplename = it[0] + def libraryid = it[1] + def lane = it[2] + def seqtype = it[3] + def organism = it[4] + def strandedness = it[5] + def udg = it[6] + def r1 = file(it[7].sort()[0]) + def r2 = seqtype == "PE" ? file(it[7].sort()[1]) : file("$baseDir/assets/nf-core_eager_dummy.txt") + [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] - [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] + } + .mix(ch_branched_for_lanemerge_skipme) + .into { ch_lanemerge_for_skipmap; ch_lanemerge_for_bwa; ch_lanemerge_for_cm; ch_lanemerge_for_bwamem; ch_lanemerge_for_bt2 } +} else { + ch_lanemerge_for_mapping_r1 + .map{ + it -> + def samplename = it[0] + def libraryid = it[1] + def lane = it[2] + def seqtype = it[3] + def organism = it[4] + def strandedness = it[5] + def udg = it[6] + def r1 = file(it[7]) + def r2 = file("$baseDir/assets/nf-core_eager_dummy.txt") - } - .mix(ch_branched_for_lanemerge.skip_merge) - .into { ch_lanemerge_for_skipmap; ch_lanemerge_for_bwa; ch_lanemerge_for_cm; ch_lanemerge_for_bwamem; ch_lanemerge_for_bt2 } + [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] + } + .mix(ch_branched_for_lanemerge_skipme) + .into { ch_lanemerge_for_skipmap; ch_lanemerge_for_bwa; ch_lanemerge_for_cm; ch_lanemerge_for_bwamem; ch_lanemerge_for_bt2 } +} // ENA upload doesn't do separate lanes, so merge raw FASTQs for mapped-reads stripping @@ -1274,7 +1335,7 @@ process lanemerge_stripfastq { tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_input_for_lanemerge_stripfastq.groupTuple(by: [0,1,3,4,5,6,7]) output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.fq.gz") into ch_fastqlanemerge_for_stripfastq + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.fq.gz") into ch_fastqlanemerge_for_stripfastq script: if ( seqtype == 'PE' ){ @@ -1305,7 +1366,7 @@ process fastqc_after_clipping { tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_adapterremoval_for_fastqc_after_clipping output: - file("*_fastqc.{zip,html}") into ch_fastqc_after_clipping + path("*_fastqc.{zip,html}") into ch_fastqc_after_clipping script: if ( params.skip_collapse && seqtype == 'PE' ) { @@ -1332,18 +1393,18 @@ process bwa { publishDir "${params.outdir}/mapping/bwa", mode: 'copy' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_lanemerge_for_bwa - file index from bwa_index.collect() + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(r1), path(r2) from ch_lanemerge_for_bwa.dump(tag: "input_tuple") + path index from bwa_index.collect().dump(tag: "input_index") output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_bwa + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.mapped.bam"), path("*.{bai,csi}") into ch_output_from_bwa when: params.mapper == 'bwaaln' script: def size = params.large_ref ? '-c' : '' - def fasta = "${index}/${bwa_base}" + def fasta = "${index}/${fasta_base}" //PE data without merging, PE data without any AR applied if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){ @@ -1373,16 +1434,16 @@ process bwamem { input: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_lanemerge_for_bwamem - file index from bwa_index_bwamem.collect() + path index from bwa_index_bwamem.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_bwamem + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.mapped.bam"), path("*.{bai,csi}") into ch_output_from_bwamem when: params.mapper == 'bwamem' script: - def fasta = "${index}/${bwa_base}" + def fasta = "${index}/${fasta_base}" def size = params.large_ref ? '-c' : '' if (!params.single_end && params.skip_collapse){ @@ -1415,6 +1476,7 @@ process circulargenerator{ output: file "${prefix}.{amb,ann,bwt,sa,pac}" into ch_circularmapper_indices + file "*_elongated" into ch_circularmapper_elongatedfasta when: params.mapper == 'circularmapper' @@ -1437,6 +1499,7 @@ process circularmapper{ tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_lanemerge_for_cm file index from ch_circularmapper_indices.collect() file fasta from ch_fasta_for_circularmapper.collect() + file elongated from ch_circularmapper_elongatedfasta.collect() output: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_cm @@ -1477,18 +1540,18 @@ process bowtie2 { input: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_lanemerge_for_bt2 - file index from bt2_index.collect() + path index from bt2_index.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mapped.bam"), file("*.{bai,csi}") into ch_output_from_bt2 - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*_bt2.log") into ch_bt2_for_multiqc + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.mapped.bam"), path("*.{bai,csi}") into ch_output_from_bt2 + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*_bt2.log") into ch_bt2_for_multiqc when: params.mapper == 'bowtie2' script: def size = params.large_ref ? '-c' : '' - def fasta = "${index}/${bt2_base}" + def fasta = "${index}/${fasta_base}" def trim5 = params.bt2_trim5 != 0 ? "--trim5 ${params.bt2_trim5}" : "" def trim3 = params.bt2_trim3 != 0 ? "--trim3 ${params.bt2_trim3}" : "" def bt2n = params.bt2n != 0 ? "-N ${params.bt2n}" : "" @@ -1557,9 +1620,8 @@ ch_fastqlanemerge_for_stripfastq def organism = it[4] def strandedness = it[5] def udg = it[6] - def reads = arrayify(it[7]) - def r1 = it[7].getClass() == ArrayList ? reads[0] : it[7] - def r2 = it[7].getClass() == ArrayList ? reads[1] : "NA" + def r1 = seqtype == "PE" ? file(it[7].sort()[0]) : file(it[7]) + def r2 = seqtype == "PE" ? file(it[7].sort()[1]) : file("$baseDir/assets/nf-core_eager_dummy.txt") [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ] @@ -1583,6 +1645,7 @@ ch_fastqlanemerge_for_stripfastq } .filter{ it[8] != null } + .dump(tag: "StripFastq Input") .set { ch_synced_for_stripfastq } // Remove mapped reads from original (lane merged) input FASTQ e.g. for sensitive host data when running metagenomic data @@ -1596,7 +1659,7 @@ process strip_input_fastq { params.strip_input_fastq input: - tuple samplename, libraryid, seqtype, organism, strandedness, udg, file(r1), file(r2), file(bam), file(bai) from ch_synced_for_stripfastq + tuple samplename, libraryid, seqtype, organism, strandedness, udg, path(r1), path(r2), file(bam), file(bai) from ch_synced_for_stripfastq output: tuple samplename, libraryid, seqtype, organism, strandedness, udg, file("*.fq.gz") into ch_output_from_stripfastq @@ -1647,6 +1710,7 @@ ch_branched_for_seqtypemerge = ch_mapping_for_seqtype_merging [ samplename, libraryid, lane, seqtype_new, organism, strandedness, udg, r1, r2 ] } + .dump(tag: "Seqtype") .branch { skip_merge: it[7].size() == 1 // Can skip merging if only single lanes merge_me: it[7].size() > 1 @@ -1688,8 +1752,9 @@ process samtools_flagstat { input: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_seqtypemerged_for_samtools_flagstat + output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*stats") into ch_flagstat_for_multiqc,ch_flagstat_for_endorspy + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*stats") into ch_flagstat_for_multiqc,ch_flagstat_for_endorspy script: """ @@ -1816,11 +1881,11 @@ process samtools_filter { if (params.run_bam_filtering) { ch_seqtypemerged_for_skipfiltering.mix(ch_output_from_filtering) .filter { it =~/.*filtered.bam/ } - .into { ch_filtering_for_skiprmdup; ch_filtering_for_dedup; ch_filtering_for_markdup; ch_filtering_for_flagstat; ch_skiprmdup_for_libeval } + .into { ch_filtering_for_skiprmdup; ch_filtering_for_dedup; ch_filtering_for_markdup; ch_filtering_for_flagstat; ch_skiprmdup_for_libeval; ch_mapped_for_preseq } } else { ch_seqtypemerged_for_skipfiltering - .into { ch_filtering_for_skiprmdup; ch_filtering_for_dedup; ch_filtering_for_markdup; ch_filtering_for_flagstat; ch_skiprmdup_for_libeval } + .into { ch_filtering_for_skiprmdup; ch_filtering_for_dedup; ch_filtering_for_markdup; ch_filtering_for_flagstat; ch_skiprmdup_for_libeval; ch_mapped_for_preseq } } @@ -1835,10 +1900,10 @@ process samtools_flagstat_after_filter { params.run_bam_filtering input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_filtering_for_flagstat + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_filtering_for_flagstat output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.stats") into ch_bam_filtered_flagstat_for_multiqc, ch_bam_filtered_flagstat_for_endorspy + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.stats") into ch_bam_filtered_flagstat_for_multiqc, ch_bam_filtered_flagstat_for_endorspy script: """ @@ -1849,7 +1914,6 @@ process samtools_flagstat_after_filter { if (params.run_bam_filtering) { ch_flagstat_for_endorspy .join(ch_bam_filtered_flagstat_for_endorspy, by: [0,1,2,3,4,5,6]) - .dump(tag: "Joined") .set{ ch_allflagstats_for_endorspy } } else { @@ -1864,7 +1928,7 @@ if (params.run_bam_filtering) { def strandedness = it[5] def udg = it[6] def stats = file(it[7]) - def poststats = file("$baseDir/assets/dummy.txt") + def poststats = file("$baseDir/assets/nf-core_eager_dummy.txt") [samplename, libraryid, lane, seqtype, organism, strandedness, udg, stats, poststats ] } .set{ ch_allflagstats_for_endorspy } @@ -1878,10 +1942,10 @@ process endorSpy { publishDir "${params.outdir}/endorSpy", mode: 'copy' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(stats), file(poststats) from ch_allflagstats_for_endorspy + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(stats), path(poststats) from ch_allflagstats_for_endorspy output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.json") into ch_endorspy_for_multiqc + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.json") into ch_endorspy_for_multiqc script: if (params.run_bam_filtering) { @@ -1907,12 +1971,12 @@ process dedup{ !params.skip_deduplication && params.dedupper == 'dedup' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_filtering_for_dedup + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_filtering_for_dedup output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.hist") into ch_hist_for_preseq - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.json") into ch_dedup_results_for_multiqc - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("${libraryid}_rmdup.bam"), file("*.{bai,csi}") into ch_output_from_dedup, ch_dedup_for_libeval + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.hist") into ch_hist_for_preseq + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.json") into ch_dedup_results_for_multiqc + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("${libraryid}_rmdup.bam"), path("*.{bai,csi}") into ch_output_from_dedup, ch_dedup_for_libeval script: def outname = "${bam.baseName}" @@ -1934,7 +1998,7 @@ process dedup{ process markduplicates{ label 'mc_small' - tag "${outname}" + tag "${libraryid}" publishDir "${params.outdir}/deduplication/", mode: 'copy', saveAs: {filename -> "${libraryid}/$filename"} @@ -1942,17 +2006,22 @@ process markduplicates{ !params.skip_deduplication && params.dedupper == 'markduplicates' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_filtering_for_markdup + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_filtering_for_markdup output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.metrics") into ch_markdup_results_for_multiqc - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("${libraryid}_rmdup.bam"), file("*.{bai,csi}") into ch_output_from_markdup, ch_markdup_for_libeval + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.metrics") into ch_markdup_results_for_multiqc + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("${libraryid}_rmdup.bam"), path("*.{bai,csi}") into ch_output_from_markdup, ch_markdup_for_libeval script: def outname = "${bam.baseName}" def size = params.large_ref ? '-c' : '' """ - picard -Xmx${task.memory.toMega()}M -Xms${task.memory.toMega()}M MarkDuplicates INPUT=$bam OUTPUT=${libraryid}_rmdup.bam REMOVE_DUPLICATES=TRUE AS=TRUE METRICS_FILE="${libraryid}_rmdup.metrics" VALIDATION_STRINGENCY=SILENT + ## To make sure direct BAMs have a clean name + if [[ "${bam}" != "${libraryid}.bam" ]]; then + mv ${bam} ${libraryid}.bam + fi + + picard -Xmx${task.memory.toMega()}M -Xms${task.memory.toMega()}M MarkDuplicates INPUT=${libraryid}.bam OUTPUT=${libraryid}_rmdup.bam REMOVE_DUPLICATES=TRUE AS=TRUE METRICS_FILE="${libraryid}_rmdup.metrics" VALIDATION_STRINGENCY=SILENT samtools index ${libraryid}_rmdup.bam ${size} """ } @@ -1967,7 +2036,9 @@ if ( params.skip_deduplication ) { .into{ ch_rmdup_for_preseq; ch_rmdup_for_damageprofiler; ch_for_nuclear_contamination; ch_rmdup_formtnucratio } } -// Merge independent libraries sequenced but with same treatment (often done to improve complexity). Different strand/UDG libs not merged because bamtrim/pmdtools needs UDG info +// Merge independent libraries sequenced but with same treatment (often done to +// improve complexity) with the same _sample_ name. Different strand/UDG libs +// not merged because bamtrim/pmdtools/genotyping needs that info. // Step one: work out which are single libraries (from skipping rmdup and both dedups) that do not need merging and pass to a skipping if ( params.skip_deduplication ) { @@ -1989,25 +2060,39 @@ if ( params.skip_deduplication ) { // For non-merging libraries, fix group libraryIDs into single values. // This is a bit hacky as theoretically could have different, but this should // rarely be the case. - ch_input_for_librarymerging.clean_libraryid .map{ it -> def libraryid = it[1][0] - [it[0], libraryid, it[2], it[3], it[4], it[5], it[6], it[7], it[8] ] + def bam = it[7].flatten() + def bai = it[8].flatten() + + [it[0], libraryid, it[2], it[3], it[4], it[5], it[6], bam, bai ] } .set { ch_input_for_skiplibrarymerging } +ch_input_for_librarymerging.merge_me + .map{ + it -> + def libraryid = it[1][0] + def seqtype = "merged" + def bam = it[7].flatten() + def bai = it[8].flatten() + + [it[0], libraryid, it[2], seqtype, it[4], it[5], it[6], bam, bai ] + } + .set { ch_fixedinput_for_librarymerging } + process library_merge { label 'sc_tiny' tag "${samplename}" publishDir "${params.outdir}/merged_bams/initial", mode: 'copy' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_input_for_librarymerging.merge_me + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_fixedinput_for_librarymerging.dump(tag: "Input Tuple Library Merge") output: - tuple samplename, val("${samplename}_libmerged"), lane, seqtype, organism, strandedness, udg, file("*_libmerged_rg_rmdup.bam"), file("*_libmerged_rg_rmdup.bam.{bai,csi}") into ch_output_from_librarymerging + tuple samplename, val("${samplename}_libmerged"), lane, seqtype, organism, strandedness, udg, path("*_libmerged_rg_rmdup.bam"), path("*_libmerged_rg_rmdup.bam.{bai,csi}") into ch_output_from_librarymerging script: def size = params.large_ref ? '-c' : '' @@ -2035,6 +2120,16 @@ if (!params.skip_deduplication) { ////////////////////////////////////////////////// // Library complexity calculation from mapped reads - could a user cost-effectively sequence deeper for more unique information? +if ( params.skip_deduplication ) { + ch_input_for_preseq = ch_rmdup_for_preseq.map{ it[0,1,2,3,4,5,6,7] } + +} else if ( !params.skip_deduplication && params.dedupper == "markduplicates" ) { + ch_input_for_preseq = ch_mapped_for_preseq.map{ it[0,1,2,3,4,5,6,7] } + +} else if ( !params.skip_deduplication && params.dedupper == "dedup" ) { + ch_input_for_preseq = ch_hist_for_preseq + +} process preseq { label 'sc_tiny' @@ -2045,20 +2140,24 @@ process preseq { !params.skip_preseq input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(input) from (params.skip_deduplication ? ch_rmdup_for_preseq.map{ it[0,1,2,3,4,5,6,7] } : ch_hist_for_preseq ) + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(input) from ch_input_for_preseq output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("${input.baseName}.ccurve") into ch_preseq_for_multiqc + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("${input.baseName}.ccurve") into ch_preseq_for_multiqc script: - if(!params.skip_deduplication){ + pe_mode = params.skip_collapse && seqtype == "PE" ? '-P' : '' + if(!params.skip_deduplication && params.dedupper == "dedup"){ """ - preseq c_curve -s ${params.preseq_step_size} -o ${input.baseName}.ccurve -H $input + preseq c_curve -s ${params.preseq_step_size} -o ${input.baseName}.ccurve -H ${input} """ - - } else { + } else if( !params.skip_deduplication && params.dedupper == "markduplicates"){ """ - preseq c_curve -s ${params.preseq_step_size} -o ${input.baseName}.ccurve -B $input + preseq c_curve -s ${params.preseq_step_size} -o ${input.baseName}.ccurve -B ${input} ${pe_mode} + """ + } else if ( params.skip_deduplication ) { + """ + preseq c_curve -s ${params.preseq_step_size} -o ${input.baseName}.ccurve -B ${input} ${pe_mode} """ } } @@ -2082,11 +2181,11 @@ process bedtools { params.run_bedtools_coverage input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_rmdup_for_bedtools + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_rmdup_for_bedtools file anno_file from ch_anno_for_bedtools.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*") script: """ @@ -2111,14 +2210,14 @@ process damageprofiler { !params.skip_damage_calculation input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_rmdup_for_damageprofiler + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_rmdup_for_damageprofiler file fasta from ch_fasta_for_damageprofiler.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("${base}/*.txt") optional true - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("${base}/*.log") - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("${base}/*.pdf") optional true - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("${base}/*.json") optional true into ch_damageprofiler_results + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("${base}/*.txt") optional true + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("${base}/*.log") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("${base}/*.pdf") optional true + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("${base}/*.json") optional true into ch_damageprofiler_results script: base = "${bam.baseName}" @@ -2137,11 +2236,11 @@ process pmdtools { when: params.run_pmdtools input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_rmdup_for_pmdtools + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_rmdup_for_pmdtools file fasta from ch_fasta_for_pmdtools.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.bam"), file("*.{bai,csi}") into ch_output_from_pmdtools + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.bam"), path("*.{bai,csi}") into ch_output_from_pmdtools file "*.cpg.range*.txt" script: @@ -2192,7 +2291,7 @@ process bam_trim { when: params.run_trim_bam input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_bamutils_decision.totrim + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_bamutils_decision.totrim output: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.trimmed.bam"), file("*.trimmed.bam.{bai,csi}") into ch_trimmed_from_bamutils @@ -2209,7 +2308,10 @@ process bam_trim { """ } -// Post trimming merging, because we will presume that if trimming is turned on, 'lab-removed' libraries can be combined with merged with 'in-silico damage removed' libraries to improve genotyping +// Post trimming merging of libraries to single samples, except for SS/DS +// libraries as they should be genotyped separately, because we will assume +// that if trimming is turned on, 'lab-removed' libraries can be combined with +// merged with 'in-silico damage removed' libraries to improve genotyping ch_trimmed_formerge = ch_bamutils_decision.notrim .mix(ch_trimmed_from_bamutils) @@ -2242,10 +2344,10 @@ process additional_library_merge { publishDir "${params.outdir}/merged_bams/additional", mode: 'copy' input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_trimmed_formerge.merge_me + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_trimmed_formerge.merge_me output: - tuple samplename, val("${samplename}_libmerged"), lane, seqtype, organism, strandedness, udg, file("*_libmerged_rg_add.bam"), file("*_libmerged_rg_add.bam.{bai,csi}") into ch_output_from_trimmerge + tuple samplename, val("${samplename}_libmerged"), lane, seqtype, organism, strandedness, udg, path("*_libmerged_rg_add.bam"), path("*_libmerged_rg_add.bam.{bai,csi}") into ch_output_from_trimmerge script: def size = params.large_ref ? '-c' : '' @@ -2271,11 +2373,11 @@ process qualimap { !params.skip_qualimap input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_addlibmerge_for_qualimap + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_addlibmerge_for_qualimap file fasta from ch_fasta_for_qualimap.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*") into ch_qualimap_results + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*") into ch_qualimap_results script: def snpcap = params.snpcapture ? "-gff ${params.bedfile}" : '' @@ -2300,7 +2402,7 @@ if ( params.run_genotyping && params.genotyping_source == 'raw' ) { ch_output_from_bamutils .into { ch_damagemanipulation_for_skipgenotyping; ch_damagemanipulation_for_genotyping_ug; ch_damagemanipulation_for_genotyping_hc; ch_damagemanipulation_for_genotyping_freebayes; ch_damagemanipulation_for_genotyping_pileupcaller; ch_damagemanipulation_for_genotyping_angsd } -} else if ( params.run_genotyping && params.genotyping_source == "pmd" && !params.run_run_pmdtools ) { +} else if ( params.run_genotyping && params.genotyping_source == "pmd" && !params.run_pmdtools ) { exit 1, "[nf-core/eager] error: Cannot run genotyping with 'pmd' source without running pmtools (--run_pmdtools)! Please check input parameters." } else if ( params.run_genotyping && params.genotyping_source == "pmd" && params.run_pmdtools ) { @@ -2336,7 +2438,7 @@ if ( params.gatk_ug_jar != '' ) { process genotyping_ug { label 'mc_small' tag "${samplename}" - publishDir "${params.outdir}/genotyping", mode: 'copy' + publishDir "${params.outdir}/genotyping", mode: 'copy', pattern: '*{.vcf.gz,.realign.bam,realign.bai}' when: params.run_genotyping && params.genotyping_tool == 'ug' @@ -2350,12 +2452,11 @@ if ( params.gatk_ug_jar != '' ) { output: tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*vcf.gz") into ch_ug_for_multivcfanalyzer,ch_ug_for_vcf2genome - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*realign.{bam,bai}") optional true + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.realign.{bam,bai}") optional true script: def defaultbasequalities = params.gatk_ug_defaultbasequalities == '' ? '' : " --defaultBaseQualities ${params.gatk_ug_defaultbasequalities}" - def keep_realign = params.gatk_ug_keep_realign_bam ? "" : "rm ${samplename}.realign.bam" - def index_realign = params.gatk_ug_keep_realign_bam ? "samtools index ${samplename}.realign.bam" : "" + def keep_realign = params.gatk_ug_keep_realign_bam ? "samtools index ${samplename}.realign.bam" : "rm ${samplename}.realign.{bam,bai}" if (params.gatk_dbsnp == '') """ samtools index -b ${bam} @@ -2364,7 +2465,6 @@ if ( params.gatk_ug_jar != '' ) { java -Xmx${task.memory.toGiga()}g -jar ${jar} -T UnifiedGenotyper -R ${fasta} -I ${samplename}.realign.bam -o ${samplename}.unifiedgenotyper.vcf -nt ${task.cpus} --genotype_likelihoods_model ${params.gatk_ug_genotype_model} -stand_call_conf ${params.gatk_call_conf} --sample_ploidy ${params.gatk_ploidy} -dcov ${params.gatk_downsample} --output_mode ${params.gatk_ug_out_mode} ${defaultbasequalities} $keep_realign - $index_realign pigz -p ${task.cpus} ${samplename}.unifiedgenotyper.vcf """ @@ -2376,7 +2476,6 @@ if ( params.gatk_ug_jar != '' ) { java -jar ${jar} -T UnifiedGenotyper -R ${fasta} -I ${samplename}.realign.bam -o ${samplename}.unifiedgenotyper.vcf -nt ${task.cpus} --dbsnp ${params.gatk_dbsnp} --genotype_likelihoods_model ${params.gatk_ug_genotype_model} -stand_call_conf ${params.gatk_call_conf} --sample_ploidy ${params.gatk_ploidy} -dcov ${params.gatk_downsample} --output_mode ${params.gatk_ug_out_mode} ${defaultbasequalities} $keep_realign - $index_realign pigz -p ${task.cpus} ${samplename}.unifiedgenotyper.vcf """ @@ -2399,7 +2498,7 @@ if ( params.gatk_ug_jar != '' ) { file dict from ch_dict_for_hc.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*vcf.gz") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*vcf.gz") script: if (params.gatk_dbsnp == '') @@ -2432,7 +2531,7 @@ if ( params.gatk_ug_jar != '' ) { file dict from ch_dict_for_freebayes.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*vcf.gz") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*vcf.gz") script: def skip_coverage = "${params.freebayes_g}" == 0 ? "" : "-g ${params.freebayes_g}" @@ -2445,13 +2544,13 @@ if ( params.gatk_ug_jar != '' ) { // pileupCaller for 'random sampling' genotyping if (params.pileupcaller_bedfile.isEmpty()) { - ch_bed_for_pileupcaller = 'NO_FILE_BED' + ch_bed_for_pileupcaller = Channel.fromPath("$baseDir/assets/nf-core_eager_dummy.txt") } else { ch_bed_for_pileupcaller = Channel.fromPath(params.pileupcaller_bedfile) } if (params.pileupcaller_snpfile.isEmpty ()) { - ch_snp_for_pileupcaller = 'NO_FILE' + ch_snp_for_pileupcaller = Channel.fromPath("$baseDir/assets/nf-core_eager_dummy2.txt") } else { ch_snp_for_pileupcaller = Channel.fromPath(params.pileupcaller_snpfile) } @@ -2486,20 +2585,23 @@ if (params.pileupcaller_snpfile.isEmpty ()) { file fasta from ch_fasta_for_genotyping_pileupcaller.collect() file fai from ch_fai_for_pileupcaller.collect() file dict from ch_dict_for_pileupcaller.collect() - file bed from ch_bed_for_pileupcaller.collect() - file snp from ch_snp_for_pileupcaller.collect() + path(bed) from ch_bed_for_pileupcaller.collect() + path(snp) from ch_snp_for_pileupcaller.collect().dump(tag: "Pileupcaller SNP file") output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("pileupcaller.${strandedness}.*") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("pileupcaller.${strandedness}.*") script: + def use_bed = bed.getName() != 'nf-core_eager_dummy.txt' ? "-l ${bed}" : '' + def use_snp = snp.getName() != 'nf-core_eager_dummy2.txt' ? "-f ${snp}" : '' + def transitions_mode = strandedness == "single" ? "" : "${params.pileupcaller_transitions_mode}" == 'SkipTransitions' ? "--skipTransitions" : "${params.pileupcaller_transitions_mode}" == 'TransitionsMissing' ? "--transitionsMissing" : "" def caller = "--${params.pileupcaller_method}" def ssmode = strandedness == "single" ? "--singleStrandMode" : "" def bam_list = bam.flatten().join(" ") def sample_names = samplename.flatten().join(",") """ - samtools mpileup -B -q 30 -Q 30 -l ${bed} -f ${fasta} ${bam_list} | pileupCaller ${caller} ${ssmode} ${transitions_mode} --sampleNames ${sample_names} -f ${snp} -e pileupcaller.${strandedness} + samtools mpileup -B -q 30 -Q 30 ${use_bed} -f ${fasta} ${bam_list} | pileupCaller ${caller} ${ssmode} ${transitions_mode} --sampleNames ${sample_names} ${use_snp} -e pileupcaller.${strandedness} """ } @@ -2566,11 +2668,11 @@ process vcf2genome { params.run_vcf2genome input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(vcf) from ch_ug_for_vcf2genome + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(vcf) from ch_ug_for_vcf2genome file fasta from ch_fasta_for_vcf2genome.collect() output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.fasta.gz") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.fasta.gz") script: def out = "${params.vcf2genome_outfile}" == '' ? "${samplename}.fasta" : "${params.vcf2genome_outfile}" @@ -2640,11 +2742,11 @@ if (params.additional_vcf_files == '') { params.run_mtnucratio input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(bam), file(bai) from ch_rmdup_formtnucratio + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_rmdup_formtnucratio output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.mtnucratio") - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.json") into ch_mtnucratio_for_multiqc + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.mtnucratio") + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.json") into ch_mtnucratio_for_multiqc script: """ @@ -2655,7 +2757,7 @@ if (params.additional_vcf_files == '') { // Human biological sex estimation if (params.sexdeterrmine_bedfile == '') { - ch_bed_for_sexdeterrmine = file('NO_FILE') + ch_bed_for_sexdeterrmine = Channel.fromPath("$baseDir/assets/nf-core_eager_dummy.txt") } else { ch_bed_for_sexdeterrmine = Channel.fromPath(params.sexdeterrmine_bedfile) } @@ -2666,8 +2768,8 @@ process sex_deterrmine { publishDir "${params.outdir}/sex_determination", mode:"copy" input: - file bam from ch_for_sexdeterrmine.map { it[7] }.collect() - file bed from ch_bed_for_sexdeterrmine + path bam from ch_for_sexdeterrmine.map { it[7] }.collect() + path(bed) from ch_bed_for_sexdeterrmine output: file "SexDet.txt" @@ -2677,7 +2779,7 @@ process sex_deterrmine { params.run_sexdeterrmine script: - def filter = bed.name != 'NO_FILE' ? "-b $bed" : '' + def filter = bed.getName() != 'nf-core_eager_dummy.txt' ? "-b $bed" : '' """ for i in *.bam; do @@ -2699,10 +2801,10 @@ process sex_deterrmine { params.run_nuclear_contamination input: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file(input), file(bai) from ch_for_nuclear_contamination + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(input), path(bai) from ch_for_nuclear_contamination output: - tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file('*.X.contamination.out') into ch_from_nuclear_contamination + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path('*.X.contamination.out') into ch_from_nuclear_contamination script: """ @@ -2872,10 +2974,10 @@ if (params.run_metagenomic_screening && params.database.endsWith(".tar.gz") && p process decomp_kraken { input: - file(ckdb) from comp_kraken + path(ckdb) from comp_kraken output: - file(dbname) into ch_krakendb + path(dbname) into ch_krakendb script: dbname = params.database.tokenize("/")[-1].tokenize(".")[0] @@ -2885,7 +2987,7 @@ if (params.run_metagenomic_screening && params.database.endsWith(".tar.gz") && p } } else if (! params.database.endsWith(".tar.gz") && params.run_metagenomic_screening && params.metagenomic_tool == 'kraken') { - ch_krakendb = file(params.database) + ch_krakendb = path(params.database) } else { ch_krakendb = Channel.empty() } @@ -2899,12 +3001,12 @@ process kraken { params.run_metagenomic_screening && params.run_bam_filtering && params.bam_unmapped_type == 'fastq' && params.metagenomic_tool == 'kraken' input: - file(fastq) from ch_bam_filtering_for_metagenomic_kraken.map { it[7] } - file(krakendb) from ch_krakendb + path(fastq) from ch_bam_filtering_for_metagenomic_kraken.map { it[7] } + path(krakendb) from ch_krakendb output: file "*.kraken.out" into ch_kraken_out - tuple prefix, file("*.kreport") into ch_kraken_report, ch_kraken_for_multiqc + tuple prefix, path("*.kreport") into ch_kraken_report, ch_kraken_for_multiqc script: prefix = fastq.toString().tokenize('.')[0] @@ -2921,10 +3023,10 @@ process kraken_parse { errorStrategy 'ignore' input: - tuple val(name), file(kraken_r) from ch_kraken_report + tuple val(name), path(kraken_r) from ch_kraken_report output: - tuple val(name), file('*.kraken_parsed.csv') into ch_kraken_parsed + tuple val(name), path('*.kraken_parsed.csv') into ch_kraken_parsed script: out = name+".kraken_parsed.csv" @@ -2937,10 +3039,10 @@ process kraken_merge { publishDir "${params.outdir}/metagenomic_classification/kraken", mode:"copy" input: - file csv_count from ch_kraken_parsed.map{ it[1] }.collect().dump() + file csv_count from ch_kraken_parsed.map{ it[1] }.collect() output: - file('kraken_count_table.csv') + path('kraken_count_table.csv') script: out = "kraken_count_table.csv" @@ -3029,9 +3131,10 @@ process multiqc { input: file multiqc_config from ch_multiqc_config file (mqc_custom_config) from ch_multiqc_custom_config.collect().ifEmpty([]) - file ('fastqc_raw/*') from ch_prefastqc_for_multiqc.collect().ifEmpty([]) - file('fastqc/*') from ch_fastqc_after_clipping.collect().ifEmpty([]) file software_versions_mqc from software_versions_yaml.collect().ifEmpty([]) + file logo from ch_eager_logo + file ('fastqc_raw/*') from ch_prefastqc_for_multiqc.collect().ifEmpty([]) + path('fastqc/*') from ch_fastqc_after_clipping.collect().ifEmpty([]) file ('adapter_removal/*') from ch_adapterremoval_logs.collect().ifEmpty([]) file ('mapping/bt2/*') from ch_bt2_for_multiqc.collect().ifEmpty([]) file ('flagstat/*') from ch_flagstat_for_multiqc.collect().ifEmpty([]) @@ -3049,7 +3152,7 @@ process multiqc { file ('malt/*') from ch_malt_for_multiqc.collect().ifEmpty([]) file ('kraken/*') from ch_kraken_for_multiqc.collect().ifEmpty([]) file ('hops/*') from ch_hops_for_multiqc.collect().ifEmpty([]) - file logo from ch_eager_logo + file ('nuclear_contamination/*') from ch_nuclear_contamination_for_multiqc.collect().ifEmpty([]) file workflow_summary from ch_workflow_summary.collectFile(name: "workflow_summary_mqc.yaml") @@ -3103,7 +3206,7 @@ workflow.onComplete { try { if (workflow.success) { mqc_report = ch_multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList) { + if (mqc_report instanceof ArrayList) { log.warn "[nf-core/eager] Found multiple reports from process 'multiqc', will use only one" mqc_report = mqc_report[0] } @@ -3294,12 +3397,6 @@ def has_extension(it, extension) { it.toString().toLowerCase().endsWith(extension.toLowerCase()) } -// To convert a string to an array when not an array already -// From: https://stackoverflow.com/a/55453674/11502856 -def arrayify(it) { - [] + it ?: [it] -} - // Extract FastQs from Path // Create a channel of FASTQs from a directory pattern: "my_samples/*/" // All FASTQ files in subdirectories are collected and emitted; diff --git a/nextflow.config b/nextflow.config index 48607045d..0276bbe7e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -303,6 +303,7 @@ profiles { benchmarking_human { includeConfig 'conf/benchmarking_human.config' } benchmarking_pathogenscreening { includeConfig 'conf/benchmarking_pathogenscreening.config' } benchmarking_vikingfish { includeConfig 'conf/benchmarking_vikingfish.config' } + benchmarking_vikingfish_single { includeConfig 'conf/benchmarking_vikingfish_single.config' } } // Load igenomes.config if required if (!params.igenomes_ignore) {