nf-core · ewels · Feb 16, 2021 · Feb 12, 2021 · Feb 12, 2021 · Feb 12, 2021
diff --git a/.gitignore b/.gitignore
@@ -21,7 +21,6 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
 lib64/
 parts/
 sdist/

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 ### Template
 
+* Added validation of parameters against Schema [[#426]](https://github.com/nf-core/tools/issues/426)
 * Added profiles to support the [Charliecloud](https://hpc.github.io/charliecloud/) and [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) container engines [[#824](https://github.com/nf-core/tools/issues/824)]
 * Fixed typo in nf-core-lint CI that prevented the markdown summary from being automatically posted on PRs as a comment.
 

diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/Checks.groovy b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/Checks.groovy
@@ -0,0 +1,158 @@
+/*
+ * This file holds several functions used to perform standard checks for the nf-core pipeline template.
+ */
+
+class Checks {
+
+    static void aws_batch(workflow, params) {
+        if (workflow.profile.contains('awsbatch')) {
+            assert !params.awsqueue || !params.awsregion : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
+            // Check outdir paths to be S3 buckets if running on AWSBatch
+            // related: https://github.com/nextflow-io/nextflow/issues/813
+            assert !params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!"
+            // Prevent trace files to be stored on S3 since S3 does not support rolling files.
+            assert params.tracedir.startsWith('s3:') :  "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles."
+        }
+    }
+
+    static void hostname(workflow, params, log) {
+        Map colors = Headers.log_colours(params.monochrome_logs)
+        if (params.hostnames) {
+            def hostname = "hostname".execute().text.trim()
+            params.hostnames.each { prof, hnames ->
+                hnames.each { hname ->
+                    if (hostname.contains(hname) && !workflow.profile.contains(prof)) {
+                        log.info "=${colors.yellow}====================================================${colors.reset}=\n" +
+                                  "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" +
+                                  "      but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" +
+                                  "      ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" +
+                                  "=${colors.yellow}====================================================${colors.reset}="
+                    }
+                }
+            }
+        }
+    }
+
+    // Citation string
+    private static String citation(workflow) {
+        return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
+               "* The pipeline\n" + 
+               "  https://doi.org/10.5281/zenodo.1400710\n\n" +
+               "* The nf-core framework\n" +
+               "  https://dx.doi.org/10.1038/s41587-020-0439-x\n" +
+               "  https://rdcu.be/b1GjZ\n\n" +
+               "* Software dependencies\n" +
+               "  https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
+    }
+
+    // Print a warning after SRA download has completed
+    static void sra_download(log) {
+        log.warn "=============================================================================\n" +
+                 "  THIS IS AN EXPERIMENTAL FEATURE!\n\n" + 
+                 "  Please double-check the samplesheet that has been auto-created using the\n" +
+                 "  public database ids provided via the '--public_data_ids' parameter.\n\n" +
+                 "  Public databases don't reliably hold information such as experimental group,\n" +
+                 "  replicate identifiers or strandedness information.\n\n" +  
+                 "  All of the sample metadata obtained from the ENA has been appended\n" +
+                 "  as additional columns to help you manually curate the samplesheet before\n" +
+                 "  you run the pipeline.\n" +
+                 "==================================================================================="
+    }
+
+    // Print a warning if using GRCh38 assembly from igenomes.config
+    static void ncbi_genome_warn(log) {
+        log.warn "=============================================================================\n" +
+                 "  When using '--genome GRCh38' the assembly is from the NCBI and NOT Ensembl.\n" +
+                 "  Auto-activating '--skip_biotype_qc' parameter to circumvent the issue below:\n" +
+                 "  https://github.com/nf-core/rnaseq/issues/460.\n\n" +
+                 "  If you would like to use the soft-masked Ensembl assembly instead please see:\n" +
+                 "  https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" +
+                 "==================================================================================="
+    }
+
+    // Print a warning if using a UCSC assembly from igenomes.config
+    static void ucsc_genome_warn(log) {
+        log.warn "=============================================================================\n" +
+                 "  When using UCSC assemblies the 'gene_biotype' field is absent from the GTF file.\n" +
+                 "  Auto-activating '--skip_biotype_qc' parameter to circumvent the issue below:\n" +
+                 "  https://github.com/nf-core/rnaseq/issues/460.\n\n" +
+                 "  If you would like to use the soft-masked Ensembl assembly instead please see:\n" +
+                 "  https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" +
+                 "==================================================================================="
+    }
+
+    // Print a warning if both GTF and GFF have been provided
+    static void gtf_gff_warn(log) {
+        log.warn "=============================================================================\n" +
+                 "  Both '--gtf' and '--gff' parameters have been provided.\n" +
+                 "  Using GTF file as priority.\n" +
+                 "==================================================================================="
+    }
+
+    // Print a warning if --skip_alignment has been provided
+    static void skip_alignment_warn(log) {
+        log.warn "=============================================================================\n" +
+                 "  '--skip_alignment' parameter has been provided.\n" +
+                 "  Skipping alignment, quantification and all downstream QC processes.\n" +
+                 "==================================================================================="
+    }
+
+    // Print a warning if using '--aligner star_rsem' and '--with_umi'
+    static void rsem_umi_error(log) {
+        log.error "=============================================================================\n" +
+                  "  When using '--aligner star_rsem', STAR is run by RSEM itself and so it is\n" +
+                  "  not possible to remove UMIs before the quantification.\n\n" +
+                  "  If you would like to remove UMI barcodes using the '--with_umi' option\n" + 
+                  "  please use either '--aligner star' or '--aligner hisat2'.\n" +
+                  "============================================================================="
+    }
+
+    // Function that parses and returns the alignment rate from the STAR log output
+    static ArrayList get_star_percent_mapped(workflow, params, log, align_log) {
+        def percent_aligned = 0
+        def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/
+        align_log.eachLine { line ->
+            def matcher = line =~ pattern
+            if (matcher) {
+                percent_aligned = matcher[0][1].toFloat()
+            }
+        }
+
+        def pass = false
+        def logname = align_log.getBaseName() - '.Log.final'
+        Map colors = Headers.log_colours(params.monochrome_logs)
+        if (percent_aligned <= params.min_mapped_reads.toFloat()) {
+            log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} [FAIL] STAR ${params.min_mapped_reads}% mapped threshold. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS: ${percent_aligned}% - $logname${colors.reset}."
+        } else {
+            pass = true
+            log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} [PASS] STAR ${params.min_mapped_reads}% mapped threshold: ${percent_aligned}% - $logname${colors.reset}."
+        }
+        return [ percent_aligned, pass ]
+    }
+
+    // Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output
+    static ArrayList get_inferexperiment_strandedness(inferexperiment_file, cutoff=30) {
+        def sense        = 0
+        def antisense    = 0
+        def undetermined = 0
+        inferexperiment_file.eachLine { line ->
+            def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/
+            def se_sense_matcher     = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/
+            def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/
+            def pe_sense_matcher     = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/
+            def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/
+            if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100
+            if (se_sense_matcher)     sense        = se_sense_matcher[0][1].toFloat() * 100
+            if (se_antisense_matcher) antisense    = se_antisense_matcher[0][1].toFloat() * 100
+            if (pe_sense_matcher)     sense        = pe_sense_matcher[0][1].toFloat() * 100
+            if (pe_antisense_matcher) antisense    = pe_antisense_matcher[0][1].toFloat() * 100
+        }
+        def strandedness = 'unstranded'
+        if (sense >= 100-cutoff) {
+            strandedness = 'forward'
+        } else if (antisense >= 100-cutoff) {
+            strandedness = 'reverse'
+        }
+        return [ strandedness, sense, antisense, undetermined ]
+    }
+}
diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/Completion.groovy b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/Completion.groovy
@@ -0,0 +1,155 @@
+/*
+ * Functions to be run on completion of pipeline
+ */
+
+class Completion {
+    static void email(workflow, params, summary_params, baseDir, log, multiqc_report=[], fail_percent_mapped=[:]) {
-    static void email(workflow, params, summary_params, baseDir, log, multiqc_report=[], fail_percent_mapped=[:]) {
+    static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[], fail_percent_mapped=[:]) {
-    static void email(workflow, params, summary_params, baseDir, log, multiqc_report=[], fail_percent_mapped=[:]) {
+    static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[], fail_percent_mapped=[:]) {
+
+        // Set up the e-mail variables
+        def subject = "[$workflow.manifest.name] Successful: $workflow.runName"
+        if (fail_percent_mapped.size() > 0) {
+            subject = "[$workflow.manifest.name] Partially successful (${fail_percent_mapped.size()} skipped): $workflow.runName"
+        }
+        if (!workflow.success) {
+            subject = "[$workflow.manifest.name] FAILED: $workflow.runName"
+        }
+
+        def summary = [:]
+        for (group in summary_params.keySet()) {
+            summary << summary_params[group]
+        }
+
+        def misc_fields = [:]
+        misc_fields['Date Started']              = workflow.start
+        misc_fields['Date Completed']            = workflow.complete
+        misc_fields['Pipeline script file path'] = workflow.scriptFile
+        misc_fields['Pipeline script hash ID']   = workflow.scriptId
+        if (workflow.repository) misc_fields['Pipeline repository Git URL']    = workflow.repository
+        if (workflow.commitId)   misc_fields['Pipeline repository Git Commit'] = workflow.commitId
+        if (workflow.revision)   misc_fields['Pipeline Git branch/tag']        = workflow.revision
+        misc_fields['Nextflow Version']           = workflow.nextflow.version
+        misc_fields['Nextflow Build']             = workflow.nextflow.build
+        misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
+
+        def email_fields = [:]
+        email_fields['version']             = workflow.manifest.version
+        email_fields['runName']             = workflow.runName
+        email_fields['success']             = workflow.success
+        email_fields['dateComplete']        = workflow.complete
+        email_fields['duration']            = workflow.duration
+        email_fields['exitStatus']          = workflow.exitStatus
+        email_fields['errorMessage']        = (workflow.errorMessage ?: 'None')
+        email_fields['errorReport']         = (workflow.errorReport ?: 'None')
+        email_fields['commandLine']         = workflow.commandLine
+        email_fields['projectDir']          = workflow.projectDir
+        email_fields['summary']             = summary << misc_fields
+        email_fields['fail_percent_mapped'] = fail_percent_mapped.keySet()
+        email_fields['min_mapped_reads']    = params.min_mapped_reads
+
+        // On success try attach the multiqc report
+        def mqc_report = null
+        try {
+            if (workflow.success && !params.skip_multiqc) {
+                mqc_report = multiqc_report.getVal()
+                if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) {
+                    if (mqc_report.size() > 1) {
+                        log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one"
+                    }
+                    mqc_report = mqc_report[0]
+                }
+            }
+        } catch (all) {
+            log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email"
+        }
+
+        // Check if we are only sending emails on failure
+        def email_address = params.email
+        if (!params.email && params.email_on_fail && !workflow.success) {
+            email_address = params.email_on_fail
+        }
+
+        // Render the TXT template
+        def engine       = new groovy.text.GStringTemplateEngine()
+        def tf           = new File("$baseDir/assets/email_template.txt")
+        def txt_template = engine.createTemplate(tf).make(email_fields)
+        def email_txt    = txt_template.toString()
+
+        // Render the HTML template
+        def hf            = new File("$baseDir/assets/email_template.html")
+        def html_template = engine.createTemplate(hf).make(email_fields)
+        def email_html    = html_template.toString()
+
+        // Render the sendmail template
+        def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit 
+        def smail_fields           = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize:  max_multiqc_email_size.toBytes()]
+        def sf                     = new File("$baseDir/assets/sendmail_template.txt")
+        def sendmail_template      = engine.createTemplate(sf).make(smail_fields)
+        def sendmail_html          = sendmail_template.toString()
+
+        // Send the HTML e-mail
+        Map colors = Headers.log_colours(params.monochrome_logs)
+        if (email_address) {
+            try {
+                if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
+                // Try to send HTML e-mail using sendmail
+                [ 'sendmail', '-t' ].execute() << sendmail_html
+                log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
+            } catch (all) {
+                // Catch failures and try with plaintext
+                def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
+                if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
+                    mail_cmd += [ '-A', mqc_report ]
+                }
+                mail_cmd.execute() << email_html
+                log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-"
+            }
+        }
+
+        // Write summary e-mail HTML to a file
+        def output_d = new File("${params.outdir}/pipeline_info/")
+        if (!output_d.exists()) {
+            output_d.mkdirs()
+        }
+        def output_hf = new File(output_d, "pipeline_report.html")
+        output_hf.withWriter { w -> w << email_html }
+        def output_tf = new File(output_d, "pipeline_report.txt")
+        output_tf.withWriter { w -> w << email_txt }
+    }
+
+    static void summary(workflow, params, log, fail_percent_mapped=[:], pass_percent_mapped=[:]) {
+        Map colors = Headers.log_colours(params.monochrome_logs)
+
+        if (pass_percent_mapped.size() > 0) {
+            def idx = 0
+            def samp_aln = ''
+            def total_aln_count = pass_percent_mapped.size() + fail_percent_mapped.size()
+            for (samp in pass_percent_mapped) {
+                samp_aln += "    ${samp.value}%: ${samp.key}\n"
+                idx += 1
+                if (idx > 5) {
+                    samp_aln += "    ..see pipeline reports for full list\n"
+                    break;
+                }
+            }
+            log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} ${pass_percent_mapped.size()}/$total_aln_count samples passed STAR ${params.min_mapped_reads}% mapped threshold:\n${samp_aln}${colors.reset}-"
+        }
+        if (fail_percent_mapped.size() > 0) {
+            def samp_aln = ''
+            for (samp in fail_percent_mapped) {
+                samp_aln += "    ${samp.value}%: ${samp.key}\n"
+            }
+            log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} ${fail_percent_mapped.size()} samples skipped since they failed STAR ${params.min_mapped_reads}% mapped threshold:\n${samp_aln}${colors.reset}-"
+        }
+
+        if (workflow.success) {
+            if (workflow.stats.ignoredCount == 0) {
+                log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
+            } else {
+                log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
+            }
+        } else {
+            Checks.hostname(workflow, params, log)
+            log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
+        }
+    }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,7 +21,6 @@ dist/ @@
     downloads/
     eggs/
     .eggs/
-    lib/
     lib64/
     parts/
     sdist/
@@ Expand Down @@