nf-core · jfy133 · Mar 12, 2021 · Feb 5, 2021 · Feb 5, 2021 · Feb 5, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -48,9 +48,6 @@ jobs:
         run: |
           wget -qO- get.nextflow.io | bash
           sudo mv nextflow /usr/local/bin/
-      - name: HELPTEXT Run with the help flag
-        run: |
-          nextflow run ${GITHUB_WORKSPACE} --help
       - name: Get test data for cases where we don't use TSV input
         run: |
           git clone --single-branch --branch eager https://github.com/nf-core/test-datasets.git data

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ### `Added`
 
+- [#676](https://github.com/nf-core/eager/issues/676) - Added Lib Checks and automatic help message / summary message formatting
+
 ### `Fixed`
 
 - [#666](https://github.com/nf-core/eager/issues/666) - Fixed input file staging for `print_nuclear_contamination`

diff --git a/lib/Checks.groovy b/lib/Checks.groovy
@@ -0,0 +1,213 @@
+import org.yaml.snakeyaml.Yaml
+
+/*
+ * This file holds several functions used to perform standard checks for the nf-core pipeline template.
+ */
+
+class Checks {
+
+    static void check_conda_channels(log) {
+        Yaml parser = new Yaml()
+        def channels = []
+        try {
+            def config = parser.load("conda config --show channels".execute().text)
+            channels = config.channels
+        } catch(NullPointerException | IOException e) {
+            log.warn "Could not verify conda channel configuration."
+            return
+        }
+
+        // Check that all channels are present
+        def required_channels = ['conda-forge', 'bioconda', 'defaults']
+        def conda_check_failed = !required_channels.every { ch -> ch in channels }
+
+        // Check that they are in the right order
+        conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda'))
+        conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults'))
+
+        if (conda_check_failed) {
+            log.warn "=============================================================================\n" +
+                     "  There is a problem with your Conda configuration!\n\n" + 
+                     "  You will need to set-up the conda-forge and bioconda channels correctly.\n" +
+                     "  Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" +
+                     "  NB: The order of the channels matters!\n" +
+                     "==================================================================================="
+        }
+    }
+
+    static void aws_batch(workflow, params) {
+        if (workflow.profile.contains('awsbatch')) {
+            assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
+            // Check outdir paths to be S3 buckets if running on AWSBatch
+            // related: https://github.com/nextflow-io/nextflow/issues/813
+            assert params.outdir.startsWith('s3:')       : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!"
+            // Prevent trace files to be stored on S3 since S3 does not support rolling files.
+            assert !params.tracedir.startsWith('s3:')    :  "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles."
+        }
+    }
+
+    static void hostname(workflow, params, log) {
+        Map colors = Headers.log_colours(params.monochrome_logs)
+        if (params.hostnames) {
+            def hostname = "hostname".execute().text.trim()
+            params.hostnames.each { prof, hnames ->
+                hnames.each { hname ->
+                    if (hostname.contains(hname) && !workflow.profile.contains(prof)) {
+                        log.info "=${colors.yellow}====================================================${colors.reset}=\n" +
+                                  "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" +
+                                  "      but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" +
+                                  "      ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" +
+                                  "=${colors.yellow}====================================================${colors.reset}="
+                    }
+                }
+            }
+        }
+    }
+
+    // Citation string
+    private static String citation(workflow) {
+        return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
+               "* The pipeline\n" + 
+               "  https://doi.org/10.1101/2020.06.11.145615\n\n" +
+               "* The nf-core framework\n" +
+               "  https://dx.doi.org/10.1038/s41587-020-0439-x\n" +
+               "  https://rdcu.be/b1GjZ\n\n" +
+               "* Software dependencies\n" +
+               "  https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
+    }
+
+    // Exit pipeline if incorrect --genome key provided
+    static void genome_exists(params, log) {
+        if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
+            log.error "=============================================================================\n" +
+                      "  Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
+                      "  Currently, the available genome keys are:\n" +
+                      "  ${params.genomes.keySet().join(", ")}\n" +
+                      "============================================================================="
+            System.exit(0)
+        }
+    }
+
+    // Get attribute from genome config file e.g. fasta
+    static String get_genome_attribute(params, attribute) {
+        def val = ''
+        if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
+            if (params.genomes[ params.genome ].containsKey(attribute)) {
+                val = params.genomes[ params.genome ][ attribute ]
+            }
+        }
+        return val
+    }
+
+    // Print a warning after SRA download has completed
+    static void sra_download(log) {
+        log.warn "=============================================================================\n" +
+                 "  THIS IS AN EXPERIMENTAL FEATURE!\n\n" + 
+                 "  Please double-check the samplesheet that has been auto-created using the\n" +
+                 "  public database ids provided via the '--public_data_ids' parameter.\n\n" +
+                 "  Public databases don't reliably hold information such as experimental group,\n" +
+                 "  replicate identifiers or strandedness information.\n\n" +  
+                 "  All of the sample metadata obtained from the ENA has been appended\n" +
+                 "  as additional columns to help you manually curate the samplesheet before\n" +
+                 "  you run the pipeline.\n" +
+                 "==================================================================================="
+    }
+
+    // Print a warning if using GRCh38 assembly from igenomes.config
+    static void ncbi_genome_warn(log) {
+        log.warn "=============================================================================\n" +
+                 "  When using '--genome GRCh38' the assembly is from the NCBI and NOT Ensembl.\n" +
+                 "  Auto-activating '--skip_biotype_qc' parameter to circumvent the issue below:\n" +
+                 "  https://github.com/nf-core/rnaseq/issues/460.\n\n" +
+                 "  If you would like to use the soft-masked Ensembl assembly instead please see:\n" +
+                 "  https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" +
+                 "==================================================================================="
+    }
+
+    // Print a warning if using a UCSC assembly from igenomes.config
+    static void ucsc_genome_warn(log) {
+        log.warn "=============================================================================\n" +
+                 "  When using UCSC assemblies the 'gene_biotype' field is absent from the GTF file.\n" +
+                 "  Auto-activating '--skip_biotype_qc' parameter to circumvent the issue below:\n" +
+                 "  https://github.com/nf-core/rnaseq/issues/460.\n\n" +
+                 "  If you would like to use the soft-masked Ensembl assembly instead please see:\n" +
+                 "  https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" +
+                 "==================================================================================="
+    }
+
+    // Print a warning if both GTF and GFF have been provided
+    static void gtf_gff_warn(log) {
+        log.warn "=============================================================================\n" +
+                 "  Both '--gtf' and '--gff' parameters have been provided.\n" +
+                 "  Using GTF file as priority.\n" +
+                 "==================================================================================="
+    }
+
+    // Print a warning if --skip_alignment has been provided
+    static void skip_alignment_warn(log) {
+        log.warn "=============================================================================\n" +
+                 "  '--skip_alignment' parameter has been provided.\n" +
+                 "  Skipping alignment, quantification and all downstream QC processes.\n" +
+                 "==================================================================================="
+    }
+
+    // Print a warning if using '--aligner star_rsem' and '--with_umi'
+    static void rsem_umi_error(log) {
+        log.error "=============================================================================\n" +
+                  "  When using '--aligner star_rsem', STAR is run by RSEM itself and so it is\n" +
+                  "  not possible to remove UMIs before the quantification.\n\n" +
+                  "  If you would like to remove UMI barcodes using the '--with_umi' option\n" + 
+                  "  please use either '--aligner star_salmon' or '--aligner hisat2'.\n" +
+                  "============================================================================="
+        System.exit(0)
+    }
+
+    // Function that parses and returns the alignment rate from the STAR log output
+    static ArrayList get_star_percent_mapped(workflow, params, log, align_log) {
+        def percent_aligned = 0
+        def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/
+        align_log.eachLine { line ->
+            def matcher = line =~ pattern
+            if (matcher) {
+                percent_aligned = matcher[0][1].toFloat()
+            }
+        }
+
+        def pass = false
+        def logname = align_log.getBaseName() - '.Log.final'
+        Map colors = Headers.log_colours(params.monochrome_logs)
+        if (percent_aligned <= params.min_mapped_reads.toFloat()) {
+            log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} [FAIL] STAR ${params.min_mapped_reads}% mapped threshold. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS: ${percent_aligned}% - $logname${colors.reset}."
+        } else {
+            pass = true
+            log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} [PASS] STAR ${params.min_mapped_reads}% mapped threshold: ${percent_aligned}% - $logname${colors.reset}."
+        }
+        return [ percent_aligned, pass ]
+    }
+
+    // Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output
+    static ArrayList get_inferexperiment_strandedness(inferexperiment_file, cutoff=30) {
+        def sense        = 0
+        def antisense    = 0
+        def undetermined = 0
+        inferexperiment_file.eachLine { line ->
+            def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/
+            def se_sense_matcher     = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/
+            def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/
+            def pe_sense_matcher     = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/
+            def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/
+            if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100
+            if (se_sense_matcher)     sense        = se_sense_matcher[0][1].toFloat() * 100
+            if (se_antisense_matcher) antisense    = se_antisense_matcher[0][1].toFloat() * 100
+            if (pe_sense_matcher)     sense        = pe_sense_matcher[0][1].toFloat() * 100
+            if (pe_antisense_matcher) antisense    = pe_antisense_matcher[0][1].toFloat() * 100
+        }
+        def strandedness = 'unstranded'
+        if (sense >= 100-cutoff) {
+            strandedness = 'forward'
+        } else if (antisense >= 100-cutoff) {
+            strandedness = 'reverse'
+        }
+        return [ strandedness, sense, antisense, undetermined ]
+    }
+}
diff --git a/lib/Completion.groovy b/lib/Completion.groovy
@@ -0,0 +1,129 @@
+/*
+ * Functions to be run on completion of pipeline
+ */
+
+class Completion {
+    static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) {
+
+        // Set up the e-mail variables
+        def subject = "[$workflow.manifest.name] Successful: $workflow.runName"
+
+        if (!workflow.success) {
+            subject = "[$workflow.manifest.name] FAILED: $workflow.runName"
+        }
+
+        def summary = [:]
+        for (group in summary_params.keySet()) {
+            summary << summary_params[group]
+        }
+
+        def misc_fields = [:]
+        misc_fields['Date Started']              = workflow.start
+        misc_fields['Date Completed']            = workflow.complete
+        misc_fields['Pipeline script file path'] = workflow.scriptFile
+        misc_fields['Pipeline script hash ID']   = workflow.scriptId
+        if (workflow.repository) misc_fields['Pipeline repository Git URL']    = workflow.repository
+        if (workflow.commitId)   misc_fields['Pipeline repository Git Commit'] = workflow.commitId
+        if (workflow.revision)   misc_fields['Pipeline Git branch/tag']        = workflow.revision
+        misc_fields['Nextflow Version']           = workflow.nextflow.version
+        misc_fields['Nextflow Build']             = workflow.nextflow.build
+        misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
+
+        def email_fields = [:]
+        email_fields['version']             = workflow.manifest.version
+        email_fields['runName']             = workflow.runName
+        email_fields['success']             = workflow.success
+        email_fields['dateComplete']        = workflow.complete
+        email_fields['duration']            = workflow.duration
+        email_fields['exitStatus']          = workflow.exitStatus
+        email_fields['errorMessage']        = (workflow.errorMessage ?: 'None')
+        email_fields['errorReport']         = (workflow.errorReport ?: 'None')
+        email_fields['commandLine']         = workflow.commandLine
+        email_fields['projectDir']          = workflow.projectDir
+        email_fields['summary']             = summary << misc_fields
+
+        // On success try attach the multiqc report
+        def mqc_report = null
+        try {
+            if (workflow.success) {
+                mqc_report = multiqc_report.getVal()
+                if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) {
+                    if (mqc_report.size() > 1) {
+                        log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one"
+                    }
+                    mqc_report = mqc_report[0]
+                }
+            }
+        } catch (all) {
+            log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email"
+        }
+
+        // Check if we are only sending emails on failure
+        def email_address = params.email
+        if (!params.email && params.email_on_fail && !workflow.success) {
+            email_address = params.email_on_fail
+        }
+
+        // Render the TXT template
+        def engine       = new groovy.text.GStringTemplateEngine()
+        def tf           = new File("$projectDir/assets/email_template.txt")
+        def txt_template = engine.createTemplate(tf).make(email_fields)
+        def email_txt    = txt_template.toString()
+
+        // Render the HTML template
+        def hf            = new File("$projectDir/assets/email_template.html")
+        def html_template = engine.createTemplate(hf).make(email_fields)
+        def email_html    = html_template.toString()
+
+        // Render the sendmail template
+        def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit 
+        def smail_fields           = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize:  max_multiqc_email_size.toBytes()]
+        def sf                     = new File("$projectDir/assets/sendmail_template.txt")
+        def sendmail_template      = engine.createTemplate(sf).make(smail_fields)
+        def sendmail_html          = sendmail_template.toString()
+
+        // Send the HTML e-mail
+        Map colors = Headers.log_colours(params.monochrome_logs)
+        if (email_address) {
+            try {
+                if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
+                // Try to send HTML e-mail using sendmail
+                [ 'sendmail', '-t' ].execute() << sendmail_html
+                log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
+            } catch (all) {
+                // Catch failures and try with plaintext
+                def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
+                if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
+                    mail_cmd += [ '-A', mqc_report ]
+                }
+                mail_cmd.execute() << email_html
+                log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-"
+            }
+        }
+
+        // Write summary e-mail HTML to a file
+        def output_d = new File("${params.outdir}/pipeline_info/")
+        if (!output_d.exists()) {
+            output_d.mkdirs()
+        }
+        def output_hf = new File(output_d, "pipeline_report.html")
+        output_hf.withWriter { w -> w << email_html }
+        def output_tf = new File(output_d, "pipeline_report.txt")
+        output_tf.withWriter { w -> w << email_txt }
+    }
+
+    static void summary(workflow, params, log, fail_percent_mapped=[:], pass_percent_mapped=[:]) {
+        Map colors = Headers.log_colours(params.monochrome_logs)
+
+        if (workflow.success) {
+            if (workflow.stats.ignoredCount == 0) {
+                log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
+            } else {
+                log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
+            }
+        } else {
+            Checks.hostname(workflow, params, log)
+            log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
+        }
+    }
+}