nf-core
diff --git a/‎.github/workflows/local_modules.yml‎
Lines changed: 0 additions & 99 deletions b/‎.github/workflows/local_modules.yml‎
Lines changed: 0 additions & 99 deletions
diff --git a/‎conf/test.config‎
Lines changed: 1 addition & 1 deletion b/‎conf/test.config‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎conf/test_full_somatic.config‎
Lines changed: 0 additions & 1 deletion b/‎conf/test_full_somatic.config‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/usage.md‎
Lines changed: 4 additions & 3 deletions b/‎docs/usage.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎nextflow.config‎
Lines changed: 46 additions & 46 deletions b/‎nextflow.config‎
Lines changed: 46 additions & 46 deletions
diff --git a/‎nextflow_schema.json‎
Lines changed: 1 addition & 1 deletion b/‎nextflow_schema.json‎
Lines changed: 1 addition & 1 deletion
@@ -177,7 +177,7 @@ profiles {
     }
     umi {
         params.input               = "${projectDir}/tests/csv/3.0/fastq_umi.csv"
-        params.umi_read_structure  = '7M1S+T'
+        params.umi_read_structure  = '+T 7M1S+T'
     }
     use_gatk_spark {
         params.use_gatk_spark      = 'baserecalibrator,markduplicates'
 
@@ -19,7 +19,6 @@ params {
 
     // Other params
     tools = 'strelka,mutect2,freebayes,ascat,manta,cnvkit,tiddit,controlfreec,vep'
-
     split_fastq = 20000000
     intervals   = 's3://nf-core-awsmegatests/sarek/input/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed'
     wes         = true
 
@@ -26,7 +26,7 @@ Note that the pipeline will create the following files in your working directory
 ```console
 work            # Directory containing the nextflow working files
 results         # Finished results (configurable, see below)
-.nextflow_log   # Log file from Nextflow
+.nextflow.log   # Log file from Nextflow
 # Other nextflow hidden files, eg. history of pipeline runs and old logs.
 ```
 
@@ -58,7 +58,7 @@ Multiple CSV files can be specified if the path is enclosed in quotes.
 | `sex`     | **Sex chromosomes of the patient**; i.e. XX, XY..., only used for Copy-Number Variation analysis in a tumor/pair<br /> _Optional, Default: `NA`_                                                                                                                                                                                  |
 | `status`  | **Normal/tumor status of sample**; can be `0` (normal) or `1` (tumor).<br /> _Optional, Default: `0`_                                                                                                                                                                                                                             |
 | `sample`  | **Custom sample ID** for each tumor and normal sample; more than one tumor sample for each subject is possible, i.e. a tumor and a relapse; samples can have multiple lanes for which the _same_ ID must be used to merge them later (see also `lane`). Sample IDs must be unique for unique biological samples <br /> _Required_ |
-| `lane`    | Lane ID, used when the `sample` is multiplexed on several lanes. Must be unique for each lane in the same sample (but does not need to be the original lane name), and must contain at least one character <br /> _Required for `--step_mapping`_                                                                                 |
+| `lane`    | Lane ID, used when the `sample` is multiplexed on several lanes. Must be unique for each lane in the same sample (but does not need to be the original lane name), and must contain at least one character <br /> _Required for `--step mapping`_                                                                                 |
 | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension `.fastq.gz` or `.fq.gz`.                                                                                                                                                                                                        |
 | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension `.fastq.gz` or `.fq.gz`.                                                                                                                                                                                                        |
 | `bam`     | Full path to (u)BAM file                                                                                                                                                                                                                                                                                                          |
@@ -672,7 +672,8 @@ This will enable pre-processing of the reads and UMI consensus reads calling, wh
 ### UMI Read Structure
 
 This parameter is a string, which follows a [convention](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures) to describe the structure of the umi.
-If your reads contain a UMI only on one end, the string should only represent one structure (i.e. "2M11S+T"); should your reads contain a UMI on both ends, the string will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T").
+
+As an example: if your reads contain a UMI only on the forward read, the string can only represent one structure (i.e. "2M11S+T"); should your reads contain a UMI on both reas, the string will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T"); should your reads contain a UMI only on the reverse read, your structure must represent the template only for the forward read and template plus UMI for the reverse read (i.e. +T 12M11S+T). Please do refer to FGBIO documentation for more details, as providing the correct structure is essential and specific to the UMI kit used.
 
 ### Limitations and future updates
 
 
@@ -10,43 +10,43 @@ params {
     // Workflow flags:
 
     // Mandatory arguments
-    input = null // No default input
-    step = 'mapping' // Starts with mapping
+    input = null      // No default input
+    step  = 'mapping' // Starts with mapping
 
     // Genome and references options
-    genome = 'GATK.GRCh38'
-    igenomes_base = 's3://ngi-igenomes/igenomes/'
+    genome          = 'GATK.GRCh38'
+    igenomes_base   = 's3://ngi-igenomes/igenomes/'
     igenomes_ignore = false
-    save_reference = false // Built references not saved
+    save_reference  = false // Built references not saved
 
     // Main options
-    no_intervals = false // Intervals will be built from the fasta file
-    nucleotides_per_second = 1000 // Default interval size
-    tools = null // No default Variant_Calling or Annotation tools
-    skip_tools = null // All tools (markduplicates + baserecalibrator + QC) are used by default
+    no_intervals           = false // Intervals will be built from the fasta file
+    nucleotides_per_second = 1000  // Default interval size
+    tools                  = null  // No default Variant_Calling or Annotation tools
+    skip_tools             = null  // All tools (markduplicates + baserecalibrator + QC) are used by default
+    split_fastq            = 0     // FASTQ files will not be split by default by FASTP
 
-    // Modify fastqs (trim/split)
-    trim_fastq = false // No trimming
-    clip_r1 = 0
-    clip_r2 = 0
+    // Modify fastqs (trim/split) with FASTP
+    trim_fastq          = false // No trimming
+    clip_r1             = 0
+    clip_r2             = 0
     three_prime_clip_r1 = 0
     three_prime_clip_r2 = 0
-    trim_nextseq = 0
-    save_trimmed = false
-    split_fastq  = 0 // FASTQ files will not be split by default
-    save_split_fastqs  = false
+    trim_nextseq        = 0
+    save_trimmed        = false
+    save_split_fastqs   = false
 
     // UMI tagged reads
-    umi_read_structure    = null // no UMI
-    group_by_umi_strategy = 'Adjacency' // default strategy when UMI
+    umi_read_structure    = null        // no UMI
+    group_by_umi_strategy = 'Adjacency' // default strategy when running with UMI for GROUPREADSBYUMI
 
     // Preprocessing
-    aligner = 'bwa-mem' // Default is bwa-mem, bwa-mem2 and dragmap can be used too
-    use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default
-    save_bam_mapped = false // Mapped BAMs not saved
-    save_output_as_bam = false //Output files from preprocessing are saved as bam and not as cram files
-    seq_center = null // No sequencing center to be written in read group CN field by aligner
-    seq_platform = 'ILLUMINA' // Default platform written in read group PL field by aligner
+    aligner            = 'bwa-mem'  // Default is bwa-mem, bwa-mem2 and dragmap can be used too
+    use_gatk_spark     = null       // GATK Spark implementation of their tools in local mode not used by default
+    save_bam_mapped    = false      // Mapped BAMs not saved
+    save_output_as_bam = false      //Output files from preprocessing are saved as bam and not as cram files
+    seq_center         = null       // No sequencing center to be written in read group CN field by aligner
+    seq_platform       = 'ILLUMINA' // Default platform written in read group PL field by aligner
 
     // Variant Calling
     only_paired_variant_calling = false //if true, skips germline variant calling for normal-paired samples
@@ -62,31 +62,31 @@ params {
     cf_mincov = 0                       // ControlFreec default values
     cf_minqual = 0                      // ControlFreec default values
     cf_window = null                    // by default we are not using this in Control-FREEC
-    ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2
-    wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers
+    ignore_soft_clipped_bases = false   // no --dont-use-soft-clipped-bases for GATK Mutect2
+    wes = false                         // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers
 
     // Annotation
-    vep_out_format = 'vcf'
-    vep_dbnsfp = null // dbnsfp plugin disabled within VEP
-    dbnsfp = null // No dbnsfp processed file
-    dbnsfp_tbi = null // No dbnsfp processed file index
-    dbnsfp_consequence = null // No default consequence for dbnsfp plugin
-    dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin
-    vep_loftee = null // loftee plugin disabled within VEP
-    vep_spliceai = null // spliceai plugin disabled within VEP
-    spliceai_snv = null // No spliceai_snv file
-    spliceai_snv_tbi = null // No spliceai_snv file index
-    spliceai_indel = null // No spliceai_indel file
-    spliceai_indel_tbi = null // No spliceai_indel file index
-    vep_spliceregion = null // spliceregion plugin disabled within VEP
-    snpeff_cache = null // No directory for snpEff cache
-    vep_cache = null // No directory for VEP cache
-    vep_include_fasta = false // Don't use fasta file for annotation with VEP
+    vep_out_format      = 'vcf'
+    vep_dbnsfp          = null  // dbnsfp plugin disabled within VEP
+    dbnsfp              = null  // No dbnsfp processed file
+    dbnsfp_tbi          = null  // No dbnsfp processed file index
+    dbnsfp_consequence  = null  // No default consequence for dbnsfp plugin
+    dbnsfp_fields       = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin
+    vep_loftee          = null  // loftee plugin disabled within VEP
+    vep_spliceai        = null  // spliceai plugin disabled within VEP
+    spliceai_snv        = null  // No spliceai_snv file
+    spliceai_snv_tbi    = null  // No spliceai_snv file index
+    spliceai_indel      = null  // No spliceai_indel file
+    spliceai_indel_tbi  = null  // No spliceai_indel file index
+    vep_spliceregion    = null  // spliceregion plugin disabled within VEP
+    snpeff_cache        = null  // No directory for snpEff cache
+    vep_cache           = null  // No directory for VEP cache
+    vep_include_fasta   = false // Don't use fasta file for annotation with VEP
 
     // MultiQC options
-    multiqc_config             = null
-    multiqc_title              = null
-    max_multiqc_email_size     = '25.MB'
+    multiqc_config         = null
+    multiqc_title          = null
+    max_multiqc_email_size = '25.MB'
 
     // Boilerplate options
     outdir                     = 'results'
 
@@ -166,7 +166,7 @@
                     "fa_icon": "fas fa-tape",
                     "description": "Specify UMI read structure",
                     "hidden": true,
-                    "help_text": "One structure if UMI is present on one end (i.e. '2M11S+T'), or two structures separated by a blank space if UMIs a present on both ends (i.e. '2M11S+T 2M11S+T'); please note, this does not handle duplex-UMIs.\n\nIt is recommended to skip duplicate marking and base quality score recalibration. See `--skip_tools`."
+                    "help_text": "One structure if UMI is present on one end (i.e. '+T 2M11S+T'), or two structures separated by a blank space if UMIs a present on both ends (i.e. '2M11S+T 2M11S+T'); please note, this does not handle duplex-UMIs.\n\nFor more info on UMI usage in the pipeline, also check docs [here](./docs/usage.md/#how-to-handle-umis)."
                 },
                 "group_by_umi_strategy": {
                     "type": "string",
Original file line number	Diff line number	Diff line change
`@@ -177,7 +177,7 @@ profiles {`
`177`	`177`	`}`
`178`	`178`	`umi {`
`179`	`179`	`params.input = "${projectDir}/tests/csv/3.0/fastq_umi.csv"`
`180`		`- params.umi_read_structure = '7M1S+T'`
	`180`	`+ params.umi_read_structure = '+T 7M1S+T'`
`181`	`181`	`}`
`182`	`182`	`use_gatk_spark {`
`183`	`183`	`params.use_gatk_spark = 'baserecalibrator,markduplicates'`