diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b4778ee4..f1390cc42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` +- [#676](https://github.com/nf-core/eager/issues/676) - Added Lib Checks and automatic help message / summary message formatting + ### `Fixed` - [#666](https://github.com/nf-core/eager/issues/666) - Fixed input file staging for `print_nuclear_contamination` diff --git a/lib/Checks.groovy b/lib/Checks.groovy new file mode 100644 index 000000000..4f804ec01 --- /dev/null +++ b/lib/Checks.groovy @@ -0,0 +1,85 @@ +import org.yaml.snakeyaml.Yaml + +/* + * This file holds several functions used to perform standard checks for the nf-core pipeline template. + */ + +class Checks { + + static void check_conda_channels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + def required_channels = ['conda-forge', 'bioconda', 'defaults'] + def conda_check_failed = !required_channels.every { ch -> ch in channels } + + // Check that they are in the right order + conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) + conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + + if (conda_check_failed) { + log.warn "=============================================================================\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + + " NB: The order of the channels matters!\n" + + "===================================================================================" + } + } + + static void aws_batch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + // related: https://github.com/nextflow-io/nextflow/issues/813 + assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + // Prevent trace files to be stored on S3 since S3 does not support rolling files. + assert !params.tracedir.startsWith('s3:') : "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." + } + } + + static void hostname(workflow, params, log) { + Map colors = Headers.log_colours(params.monochrome_logs) + if (params.hostnames) { + def hostname = "hostname".execute().text.trim() + params.hostnames.each { prof, hnames -> + hnames.each { hname -> + if (hostname.contains(hname) && !workflow.profile.contains(prof)) { + log.info "=${colors.yellow}====================================================${colors.reset}=\n" + + "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" + + " but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" + + " ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" + + "=${colors.yellow}====================================================${colors.reset}=" + } + } + } + } + } + + // Citation string + private static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " https://doi.org/10.1101/2020.06.11.145615\n\n" + + "* The nf-core framework\n" + + " https://dx.doi.org/10.1038/s41587-020-0439-x\n" + + " https://rdcu.be/b1GjZ\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + + + + + + + +} diff --git a/lib/Completion.groovy b/lib/Completion.groovy new file mode 100644 index 000000000..2348fa440 --- /dev/null +++ b/lib/Completion.groovy @@ -0,0 +1,129 @@ +/* + * Functions to be run on completion of pipeline + */ + +class Completion { + static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = workflow.manifest.version + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = Headers.log_colours(params.monochrome_logs) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + } + + static void summary(workflow, params, log, fail_percent_mapped=[:], pass_percent_mapped=[:]) { + Map colors = Headers.log_colours(params.monochrome_logs) + + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + Checks.hostname(workflow, params, log) + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } +} diff --git a/lib/Headers.groovy b/lib/Headers.groovy new file mode 100644 index 000000000..15d1d3880 --- /dev/null +++ b/lib/Headers.groovy @@ -0,0 +1,43 @@ +/* + * This file holds several functions used to render the nf-core ANSI header. + */ + +class Headers { + + private static Map log_colours(Boolean monochrome_logs) { + Map colorcodes = [:] + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['yellow_bold'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + colorcodes['red'] = monochrome_logs ? '' : "\033[1;91m" + return colorcodes + } + + static String dashed_line(monochrome_logs) { + Map colors = log_colours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + static String nf_core(workflow, monochrome_logs) { + Map colors = log_colours(monochrome_logs) + String.format( + """\n + ${dashed_line(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${dashed_line(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy new file mode 100644 index 000000000..8f892174a --- /dev/null +++ b/lib/NfcoreSchema.groovy @@ -0,0 +1,459 @@ +/* + * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. + */ + +import org.everit.json.schema.Schema +import org.everit.json.schema.loader.SchemaLoader +import org.everit.json.schema.ValidationException +import org.json.JSONObject +import org.json.JSONTokener +import org.json.JSONArray +import groovy.json.JsonSlurper +import groovy.json.JsonBuilder + +class NfcoreSchema { + + /* + * Function to loop over all parameters defined in schema and check + * whether the given paremeters adhere to the specificiations + */ + /* groovylint-disable-next-line UnusedPrivateMethodParameter */ + private static ArrayList validateParameters(params, jsonSchema, log) { + def has_error = false + //=====================================================================// + // Check for nextflow core params and unexpected params + def json = new File(jsonSchema).text + def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') + def specifiedParamKeys = params.keySet() + def nf_params = [ + // Options for base `nextflow` command + 'bg', + 'c', + 'C', + 'config', + 'd', + 'D', + 'dockerize', + 'h', + 'log', + 'q', + 'quiet', + 'syslog', + 'v', + 'version', + + // Options for `nextflow run` command + 'ansi', + 'ansi-log', + 'bg', + 'bucket-dir', + 'c', + 'cache', + 'config', + 'dsl2', + 'dump-channels', + 'dump-hashes', + 'E', + 'entry', + 'latest', + 'lib', + 'main-script', + 'N', + 'name', + 'offline', + 'params-file', + 'pi', + 'plugins', + 'poll-interval', + 'pool-size', + 'profile', + 'ps', + 'qs', + 'queue-size', + 'r', + 'resume', + 'revision', + 'stdin', + 'stub', + 'stub-run', + 'test', + 'w', + 'with-charliecloud', + 'with-conda', + 'with-dag', + 'with-docker', + 'with-mpi', + 'with-notification', + 'with-podman', + 'with-report', + 'with-singularity', + 'with-timeline', + 'with-tower', + 'with-trace', + 'with-weblog', + 'without-docker', + 'without-podman', + 'work-dir' + ] + def unexpectedParams = [] + + // Collect expected parameters from the schema + def expectedParams = [] + for (group in schemaParams) { + for (p in group.value['properties']) { + expectedParams.push(p.key) + } + } + + for (specifiedParam in specifiedParamKeys) { + // nextflow params + if (nf_params.contains(specifiedParam)) { + log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" + has_error = true + } + // unexpected params + def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' + if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam)) { + unexpectedParams.push(specifiedParam) + } + } + + //=====================================================================// + // Validate parameters against the schema + InputStream inputStream = new File(jsonSchema).newInputStream() + JSONObject rawSchema = new JSONObject(new JSONTokener(inputStream)) + Schema schema = SchemaLoader.load(rawSchema) + + // Clean the parameters + def cleanedParams = cleanParameters(params) + + // Convert to JSONObject + def jsonParams = new JsonBuilder(cleanedParams) + JSONObject paramsJSON = new JSONObject(jsonParams.toString()) + + // Validate + try { + schema.validate(paramsJSON) + } catch (ValidationException e) { + println '' + log.error 'ERROR: Validation of pipeline parameters failed!' + JSONObject exceptionJSON = e.toJSON() + printExceptions(exceptionJSON, paramsJSON, log) + println '' + has_error = true + } + + // Check for unexpected parameters + // Getting this message a lot for parameters that you *do* expect? + // You can make a csv list of expected params not in the schema with 'params.schema_ignore_params' + // for example, in your institutional config + if (unexpectedParams.size() > 0) { + Map colors = log_colours(params.monochrome_logs) + println '' + def warn_msg = 'Found unexpected parameters:' + for (unexpectedParam in unexpectedParams) { + warn_msg = warn_msg + "\n* --${unexpectedParam}: ${paramsJSON[unexpectedParam].toString()}" + } + log.warn warn_msg + log.info "- ${colors.dim}(Hide this message with 'params.schema_ignore_params')${colors.reset} -" + println '' + } + + if (has_error) { + System.exit(1) + } + + return unexpectedParams + } + + // Loop over nested exceptions and print the causingException + private static void printExceptions(exJSON, paramsJSON, log) { + def causingExceptions = exJSON['causingExceptions'] + if (causingExceptions.length() == 0) { + def m = exJSON['message'] =~ /required key \[([^\]]+)\] not found/ + // Missing required param + if (m.matches()) { + log.error "* Missing required parameter: --${m[0][1]}" + } + // Other base-level error + else if (exJSON['pointerToViolation'] == '#') { + log.error "* ${exJSON['message']}" + } + // Error with specific param + else { + def param = exJSON['pointerToViolation'] - ~/^#\// + def param_val = paramsJSON[param].toString() + log.error "* --${param}: ${exJSON['message']} (${param_val})" + } + } + for (ex in causingExceptions) { + printExceptions(ex, paramsJSON, log) + } + } + + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + + /* + * This method tries to read a JSON params file + */ + private static LinkedHashMap params_load(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = params_read(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + private static Map log_colours(Boolean monochrome_logs) { + Map colorcodes = [:] + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['yellow_bold'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + colorcodes['red'] = monochrome_logs ? '' : "\033[1;91m" + return colorcodes + } + + static String dashed_line(monochrome_logs) { + Map colors = log_colours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + /* + Method to actually read in JSON file using Groovy. + Group (as Key), values are all parameters + - Parameter1 as Key, Description as Value + - Parameter2 as Key, Description as Value + .... + Group + - + */ + private static LinkedHashMap params_read(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map json_params = (Map) new JsonSlurper().parseText(json).get('definitions') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + */ + def params_map = new LinkedHashMap() + json_params.each { key, val -> + def Map group = json_params."$key".properties // Gets the property object of the group + def title = json_params."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + return params_map + } + + /* + * Get maximum number of characters across all parameter names + */ + private static Integer params_max_chars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } + + /* + * Beautify parameters for --help + */ + private static String params_help(workflow, params, json_schema, command) { + String output = '' + output += 'Typical pipeline command:\n\n' + output += " ${command}\n\n" + def params_map = params_load(json_schema) + def max_chars = params_max_chars(params_map) + 1 + for (group in params_map.keySet()) { + output += group + '\n' + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + def type = '[' + group_params.get(param).type + ']' + def description = group_params.get(param).description + def defaultValue = group_params.get(param).default ? " [default: " + group_params.get(param).default.toString() + "]" : '' + output += " \u001B[1m--" + param.padRight(max_chars) + "\u001B[1m" + type.padRight(10) + description + defaultValue + '\n' + } + output += "\n" + } + output += dashed_line(params.monochrome_logs) + output += '\n\n' + dashed_line(params.monochrome_logs) + return output + } + + /* + * Groovy Map summarising parameters/workflow options used by the pipeline + */ + private static LinkedHashMap params_summary_map(workflow, params, json_schema) { + // Get a selection of core Nextflow workflow options + def Map workflow_summary = [:] + if (workflow.revision) { + workflow_summary['revision'] = workflow.revision + } + workflow_summary['runName'] = workflow.runName + if (workflow.containerEngine) { + workflow_summary['containerEngine'] = "$workflow.containerEngine" + } + if (workflow.container) { + workflow_summary['container'] = "$workflow.container" + } + workflow_summary['launchDir'] = workflow.launchDir + workflow_summary['workDir'] = workflow.workDir + workflow_summary['projectDir'] = workflow.projectDir + workflow_summary['userName'] = workflow.userName + workflow_summary['profile'] = workflow.profile + workflow_summary['configFiles'] = workflow.configFiles.join(', ') + + // Get pipeline parameters defined in JSON Schema + def Map params_summary = [:] + def blacklist = ['hostnames'] + def params_map = params_load(json_schema) + for (group in params_map.keySet()) { + def sub_params = new LinkedHashMap() + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (params.containsKey(param) && !blacklist.contains(param)) { + def params_value = params.get(param) + def schema_value = group_params.get(param).default + def param_type = group_params.get(param).type + if (schema_value == null) { + if (param_type == 'boolean') { + schema_value = false + } + if (param_type == 'string') { + schema_value = '' + } + if (param_type == 'integer') { + schema_value = 0 + } + } else { + if (param_type == 'string') { + if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { + def sub_string = schema_value.replace('\$projectDir', '') + sub_string = sub_string.replace('\${projectDir}', '') + if (params_value.contains(sub_string)) { + schema_value = params_value + } + } + if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { + def sub_string = schema_value.replace('\$params.outdir', '') + sub_string = sub_string.replace('\${params.outdir}', '') + if ("${params.outdir}${sub_string}" == params_value) { + schema_value = params_value + } + } + } + } + + if (params_value != schema_value) { + sub_params.put("$param", params_value) + } + } + } + params_summary.put(group, sub_params) + } + return [ 'Core Nextflow options' : workflow_summary ] << params_summary + } + + /* + * Beautify parameters for summary and return as string + */ + private static String params_summary_log(workflow, params, json_schema) { + String output = '' + def params_map = params_summary_map(workflow, params, json_schema) + def max_chars = params_max_chars(params_map) + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + if (group_params) { + output += group + '\n' + for (param in group_params.keySet()) { + output += " \u001B[1m" + param.padRight(max_chars) + ": \u001B[1m" + group_params.get(param) + '\n' + } + output += '\n' + } + } + output += "[Only displaying parameters that differ from pipeline default]\n" + output += dashed_line(params.monochrome_logs) + output += '\n\n' + dashed_line(params.monochrome_logs) + return output + } + + static String params_summary_multiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

$group

\n" + summary_section += "
\n" + for (param in group_params.keySet()) { + summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + return yaml_file_text + } + +} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar new file mode 100644 index 000000000..805c8bb5e Binary files /dev/null and b/lib/nfcore_external_java_deps.jar differ diff --git a/main.nf b/main.nf index 727c1c2fb..31f818941 100644 --- a/main.nf +++ b/main.nf @@ -1,262 +1,38 @@ #!/usr/bin/env nextflow /* -============================================================================================================ +------------------------------------------------------------------------------------------------------------ nf-core/eager -============================================================================================================ +------------------------------------------------------------------------------------------------------------ EAGER Analysis Pipeline. Started 2018-06-05 #### Homepage / Documentation https://github.com/nf-core/eager #### Authors For a list of authors and contributors, see: https://github.com/nf-core/eager/tree/dev#authors-alphabetical -============================================================================================================ +------------------------------------------------------------------------------------------------------------ */ -def helpMessage() { - log.info nfcoreHeader() - log.info""" - ========================================= - eager v${workflow.manifest.version} - ========================================= - Usage: - - The typical command for running the pipeline is as follows: - - nextflow run nf-core/eager -profile --reads'*_R{1,2}.fastq.gz' --fasta '.fasta' - - Mandatory arguments: - -profile [str] Configuration profile to use. Can use multiple (comma separated). Ask system administrator if unsure. - Available: conda, docker, singularity, test, awsbatch, and more - Input - --input [file] Either paths or URLs to FASTQ/BAM data (must be surrounded with quotes). Indicate multiple files with wildcards (*). For paired end data, the path must use '{1,2}' notation to specify read pairs. - OR - A path to a TSV file (ending .tsv) containing file paths and sequencing/sample metadata. Allows for merging of multiple lanes/libraries/samples. Please see documentation for template. - - --udg_type [str] Specify here if you have UDG treated libraries, Set to 'half' for partial treatment, or 'full' for UDG. If not set, libraries are assumed to have no UDG treatment ('none'). Not required for TSV input. Default: ${params.udg_type} - --single_stranded [bool] Specifies that libraries are single stranded. Only effects MaltExtract and genotyping pileupCaller. Not required for TSV input. - --single_end [bool] Specifies that the input is single end reads. Not required for TSV input. - --colour_chemistry [num] Specifies what Illumina sequencing chemistry was used. Used to inform whether to poly-G trim if turned on (see below). Not required for TSV input. Options: 2, 4. Default: ${params.colour_chemistry} - --bam [bool] Specifies that the input is in BAM format. Not required for TSV input. - - - Additional Options: - --snpcapture_bed [file] If library result of SNP capture, path to BED file containing SNPs positions on reference genome. - --run_convertinputbam [bool] Turns on conversion of an input BAM file into FASTQ format to allow re-preprocessing (e.g. AdapterRemoval etc.). - - References - --fasta [file] Path or URL to a FASTA reference file (required if not iGenome reference). File suffixes can be: '.fa', '.fn', '.fna', '.fasta' - --genome [str] Name of iGenomes reference (required if not FASTA reference). - --bwa_index [dir] Path to directory containing pre-made BWA indices (i.e. everything before the endings '.amb' '.ann' '.bwt'. Most likely the same path as --fasta). If not supplied will be made for you. - --bt2_index [dir] Path to directory containing pre-made Bowtie2 indices (i.e. everything before the endings e.g. '.1.bt2', '.2.bt2', '.rev.1.bt2'. Most likely the same value as --fasta). If not supplied will be made for you. - --fasta_index [file] Path to samtools FASTA index (typically ending in '.fai'). - --seq_dict [file] Path to picard sequence dictionary file (typically ending in '.dict'). - --large_ref [bool] Specify to generate more recent '.csi' BAM indices. If your reference genome is larger than 3.5GB, this is recommended due to more efficient data handling with the '.csi' format over the older '.bai'. - --save_reference [bool] Turns on saving reference genome indices for later re-usage. - - Output options: - --outdir [dir] The output directory where the results will be saved. Default: ${params.outdir} - -w [dir] The directory where intermediate files will be stored. Recommended: '/work/' - - Skipping Skip any of the mentioned steps. - --skip_fastqc [bool] Skips both pre- and post-Adapter Removal FastQC steps. - --skip_adapterremoval [bool] - --skip_preseq [bool] - --skip_deduplication [bool] - --skip_damage_calculation [bool] - --skip_qualimap [bool] - - Complexity Filtering - --complexity_filter_poly_g [bool] Turn on running poly-G removal on FASTQ files. Will only be performed on 2 colour chemistry machine sequenced libraries. - --complexity_filter_poly_g_min [num] Specify length of poly-g min for clipping to be performed. Default: ${params.complexity_filter_poly_g_min} - - Clipping / Merging - --clip_forward_adaptor [str] Specify adapter sequence to be clipped off (forward strand). Default: '${params.clip_forward_adaptor}' - --clip_reverse_adaptor [str] Specify adapter sequence to be clipped off (reverse strand). Default: '${params.clip_reverse_adaptor}' - --clip_readlength [num] Specify read minimum length to be kept for downstream analysis. Default: ${params.clip_readlength} - --clip_min_read_quality [num] Specify minimum base quality for trimming off bases. Default: ${params.clip_min_read_quality} - --min_adap_overlap [num] Specify minimum adapter overlap: Default: ${params.min_adap_overlap} - --skip_collapse [bool] Skip merging forward and reverse reads together. Only applicable for paired-end libraries. - --skip_trim [bool] Skip adapter and quality trimming - --preserve5p [bool] Skip 5p quality base trimming (n, score, window) of 5 prime end. - --mergedonly [bool] Only use merged reads downstream (un-merged reads and singletons are discarded). - - Mapping - --mapper [str] Specify which mapper to use. Options: 'bwaaln', 'bwamem', 'circularmapper', 'bowtie2'. Default: '${params.mapper}' - --bwaalnn [num] Specify the -n parameter for BWA aln, i.e. amount of allowed mismatches in alignments. Default: ${params.bwaalnn} - --bwaalnk [num] Specify the -k parameter for BWA aln, i.e. maximum edit distance allowed in a seed. Default: ${params.bwaalnk} - --bwaalnl [num] Specify the -l parameter for BWA aln, i.e. length of seeds to be used. Set to 1024 for whole read. Default: ${params.bwaalnl} - --circularextension [num] Specify the number of bases to extend reference by (circularmapper only). Default: ${params.circularextension} - --circulartarget [chr] Specify the FASTA header of the target chromosome to extend(circularmapper only). Default: '${params.circulartarget}' - --circularfilter [bool] Turn on to remove reads that did not map to the circularised genome (circularmapper only). - --bt2_alignmode [str] Specify the bowtie2 alignment mode. Options: 'local', 'end-to-end'. Default: '${params.bt2_alignmode}' - --bt2_sensitivity [str] Specify the level of sensitivity for the bowtie2 alignment mode. Options: 'no-preset', 'very-fast', 'fast', 'sensitive', 'very-sensitive'. Default: '${params.bt2_sensitivity}' - --bt2n [num] Specify the -N parameter for bowtie2 (mismatches in seed). This will override defaults from alignmode/sensitivity. Default: ${params.bt2n} - --bt2l [num] Specify the -L parameter for bowtie2 (length of seed substrings). This will override defaults from alignmode/sensitivity. Default: ${params.bt2l} - --bt2_trim5 [num] Specify number of bases to trim off from 5' (left) end of read before alignment. Default: ${params.bt2_trim5} - --bt2_trim3 [num] Specify number of bases to trim off from 3' (right) end of read before alignment. Default: ${params.bt2_trim3} - - Host removal - --hostremoval_input_fastq [bool] Turn on creating pre-Adapter Removal FASTQ files without reads that mapped to reference (e.g. for public upload of privacy sensitive non-host data) - --hostremoval_mode [str] Host DNA Removal mode. Remove mapped reads completely from FASTQ (remove) or just mask mapped reads sequence by N (replace). Default: '${params.hostremoval_mode}' - - BAM Filtering - --run_bam_filtering [bool] Turn on filtering of mapping quality, read lengths, or unmapped reads of BAM files. - --bam_mapping_quality_threshold [num] Minimum mapping quality for reads filter. Default: ${params.bam_mapping_quality_threshold} - --bam_filter_minreadlength [num] Specify minimum read length to be kept after mapping. - --bam_unmapped_type [str] Defines whether to discard all unmapped reads, keep both mapped and unmapped together, or save as bam and/or only fastq format Options: 'discard', 'bam', 'keep', 'fastq', 'both'. Default: '${params.bam_unmapped_type}' - - DeDuplication - --dedupper [str] Deduplication method to use. Options: 'markduplicates', 'dedup'. Default: '${params.dedupper}' - --dedup_all_merged [bool] Turn on treating all reads as merged reads. - - Library Complexity Estimation - --preseq_step_size [num] Specify the step size of Preseq. Default: ${params.preseq_step_size} - - (aDNA) Damage Analysis - --damageprofiler_length [num] Specify length filter for DamageProfiler. Default: ${params.damageprofiler_length} - --damageprofiler_threshold [num] Specify number of bases of each read to consider for DamageProfiler calculations. Default: ${params.damageprofiler_threshold} - --damageprofiler_yaxis [float] Specify the maximum misincorporation frequency that should be displayed on damage plot. Set to 0 to 'autoscale'. Default: ${params.damageprofiler_yaxis} - --run_mapdamage_rescaling Turn on damage rescaling of BAM files using mapDamage2 to probabilistically remove damage. - --rescale_length_5p Length of read for mapDamage2 to rescale from 5p end. Default: ${params.rescale_length_5p} - --rescale_length_3p Length of read for mapDamage2 to rescale from 5p end. Default: ${params.rescale_length_3p} - --run_pmdtools [bool] Turn on PMDtools - --pmdtools_range [num] Specify range of bases for PMDTools. Default: ${params.pmdtools_range} - --pmdtools_threshold [num] Specify PMDScore threshold for PMDTools. Default: ${params.pmdtools_threshold} - --pmdtools_reference_mask [file] Specify a path to reference mask for PMDTools. - --pmdtools_max_reads [num] Specify the maximum number of reads to consider for metrics generation. Default: ${params.pmdtools_max_reads} - - Annotation Statistics - --run_bedtools_coverage [bool] Turn on ability to calculate no. reads, depth and breadth coverage of features in reference. - --anno_file [file] Path to GFF or BED file containing positions of features in reference file (--fasta). Path should be enclosed in quotes. - - BAM Trimming - --run_trim_bam [bool] Turn on BAM trimming. Will only run on full-UDG or half-UDG libraries. - --bamutils_clip_half_udg_left [num] Specify the number of bases to clip off reads from 'left' end of read for half-UDG libraries. Default: ${params.bamutils_clip_half_udg_left} - --bamutils_clip_half_udg_right [num] Specify the number of bases to clip off reads from 'right' end of read for half-UDG libraries. Default: ${params.bamutils_clip_half_udg_right} - --bamutils_clip_none_udg_left [num] Specify the number of bases to clip off reads from 'left' end of read for non-UDG libraries. Default: ${params.bamutils_clip_none_udg_left} - --bamutils_clip_none_udg_right [num] Specify the number of bases to clip off reads from 'right' end of read for non-UDG libraries. Default: ${params.bamutils_clip_none_udg_right} - --bamutils_softclip [bool] Turn on using softclip instead of hard masking. - - Genotyping - --run_genotyping [bool] Turn on genotyping of BAM files. - --genotyping_tool [str] Specify which genotyper to use either GATK UnifiedGenotyper, GATK HaplotypeCaller, Freebayes, or pileupCaller. Options: 'ug', 'hc', 'freebayes', 'pileupcaller', 'angsd'. - --genotyping_source [str] Specify which input BAM to use for genotyping. Options: 'raw', 'trimmed', 'pmd', 'rescaled'. Default: '${params.genotyping_source}' - --gatk_call_conf [num] Specify GATK phred-scaled confidence threshold. Default: ${params.gatk_call_conf} - --gatk_ploidy [num] Specify GATK organism ploidy. Default: ${params.gatk_ploidy} - --gatk_downsample [num] Maximum depth coverage allowed for genotyping before down-sampling is turned on. Default: ${params.gatk_downsample} - --gatk_dbsnp [file] Specify VCF file for output VCF SNP annotation. Optional. Gzip not accepted. - --gatk_hc_out_mode [str] Specify GATK output mode. Options: 'EMIT_VARIANTS_ONLY', 'EMIT_ALL_CONFIDENT_SITES', 'EMIT_ALL_ACTIVE_SITES'. Default: '${params.gatk_hc_out_mode}' - --gatk_hc_emitrefconf [str] Specify HaplotypeCaller mode for emitting reference confidence calls . Options: 'NONE', 'BP_RESOLUTION', 'GVCF'. Default: '${params.gatk_hc_emitrefconf}' - --gatk_ug_out_mode [str] Specify GATK output mode. Options: 'EMIT_VARIANTS_ONLY', 'EMIT_ALL_CONFIDENT_SITES', 'EMIT_ALL_SITES'. Default: '${params.gatk_ug_out_mode}' - --gatk_ug_genotype_model [str] Specify UnifiedGenotyper likelihood model. Options: 'SNP', 'INDEL', 'BOTH', 'GENERALPLOIDYSNP', 'GENERALPLOIDYINDEL'. Default: '${params.gatk_ug_genotype_model}' - --gatk_ug_keep_realign_bam [bool] Specify to keep the BAM output of re-alignment around variants from GATK UnifiedGenotyper. - --gatk_ug_defaultbasequalities [num] Supply a default base quality if a read is missing a base quality score. Setting to -1 turns this off. - --freebayes_C [num] Specify minimum required supporting observations to consider a variant. Default: ${params.freebayes_C} - --freebayes_g [num] Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than specified in --freebayes_C. Default: ${params.freebayes_g} - --freebayes_p [num] Specify ploidy of sample in FreeBayes. Default: ${params.freebayes_p} - --pileupcaller_bedfile [file] Specify path to SNP panel in bed format for pileupCaller. - --pileupcaller_snpfile [file] Specify path to SNP panel in EIGENSTRAT format for pileupCaller. - --pileupcaller_method [str] Specify calling method to use. Options: 'randomHaploid', 'randomDiploid', 'majorityCall'. Default: '${params.pileupcaller_method}' - --pileupcaller_transitions_mode [str] Specify the calling mode for transitions. Options: 'AllSites', 'TransitionsMissing', 'SkipTransitions'. Default: '${params.pileupcaller_transitions_mode}' - --angsd_glmodel [str] Specify which ANGSD genotyping likelihood model to use. Options: 'samtools', 'gatk', 'soapsnp', 'syk'. Default: '${params.angsd_glmodel}' - --angsd_glformat [str] Specify which output type to output ANGSD genotyping likelihood results: Options: 'text', 'binary', 'binary_three', 'beagle'. Default: '${params.angsd_glformat}' - --angsd_createfasta [bool] Turn on creation of FASTA from ANGSD genotyping likelhoood. - --angsd_fastamethod [str] Specify which genotype type of 'base calling' to use for ANGSD FASTA generation. Options: 'random', 'common'. Default: '${params.angsd_fastamethod}' - - Consensus Sequence Generation - --run_vcf2genome [bool] Turns on ability to create a consensus sequence FASTA file based on a UnifiedGenotyper VCF file and the original reference (only considers SNPs). - --vcf2genome_outfile [str] Specify name of the output FASTA file containing the consensus sequence. Do not include `.vcf` in the file name. Default: '' - --vcf2genome_header [str] Specify the header name of the consensus sequence entry within the FASTA file. Default: '' - --vcf2genome_minc [num] Minimum depth coverage required for a call to be included (else N will be called). Default: ${params.vcf2genome_minc} - --vcf2genome_minq [num] Minimum genotyping quality of a call to be called. Else N will be called. Default: ${params.vcf2genome_minq} - --vcf2genome_minfreq [float] Minimum fraction of reads supporting a call to be included. Else N will be called. Default: ${params.vcf2genome_minfreq} - - SNP Table Generation - --run_multivcfanalyzer [bool] Turn on MultiVCFAnalyzer. Note: This currently only supports diploid GATK UnifiedGenotyper input. - --write_allele_frequencies [bool] Turn on writing write allele frequencies in the SNP table. - --min_genotype_quality [num] Specify the minimum genotyping quality threshold for a SNP to be called. Default: ${params.min_genotype_quality} - --min_base_coverage [num] Specify the minimum number of reads a position needs to be covered to be considered for base calling. Default: ${params.min_base_coverage} - --min_allele_freq_hom [float] Specify the minimum allele frequency that a base requires to be considered a 'homozygous' call. Default: ${params.min_allele_freq_hom} - --min_allele_freq_het [float] Specify the minimum allele frequency that a base requires to be considered a 'heterozygous' call. Default: ${params.min_allele_freq_het} - --additional_vcf_files [file] Specify paths to additional pre-made VCF files to be included in the SNP table generation. Use wildcard(s) for multiple files. Optional. - --reference_gff_annotations [file] Specify path to the reference genome annotations in '.gff' format. Optional. - --reference_gff_exclude [file] Specify path to the positions to be excluded in '.gff' format. Optional. - --snp_eff_results [file] Specify path to the output file from SNP effect analysis in '.txt' format. Optional. - - Mitochondrial to Nuclear Ratio - --run_mtnucratio [bool] Turn on mitochondrial to nuclear ratio calculation. - --mtnucratio_header [str] Specify the name of the reference FASTA entry corresponding to the mitochondrial genome (up to the first space). Default: '${params.mtnucratio_header}' - - Sex Determination - --run_sexdeterrmine [bool] Turn on sex determination for human reference genomes. - --sexdeterrmine_bedfile [file] Specify path to SNP panel in bed format for error bar calculation. Optional (see documentation). - - Nuclear Contamination for Human DNA - --run_nuclear_contamination [bool] Turn on nuclear contamination estimation for human reference genomes. - --contamination_chrom_name [str] The name of the X chromosome in your bam or FASTA header. 'X' for hs37d5, 'chrX' for HG19. Default: '${params.contamination_chrom_name}' - - Metagenomic Screening - --metagenomic_complexity_filter Turn on removal of low-sequence complexity reads for metagenomic screening with bbduk. - --metagenomic_complexity_entropy Specify the entropy threshold that under which a sequencing read will be complexity filtered out. This should be between 0-1. Default: '${params.metagenomic_complexity_entropy}' - --run_metagenomic_screening [bool] Turn on metagenomic screening module for reference-unmapped reads - --metagenomic_tool [str] Specify which classifier to use. Options: 'malt', 'kraken'. Default: '${params.contamination_chrom_name}' - --database [dir] Specify path to classifer database directory. For Kraken2 this can also be a `.tar.gz` of the directory. - --metagenomic_min_support_reads [num] Specify a minimum number of reads a taxon of sample total is required to have to be retained. Not compatible with . Default: ${params.metagenomic_min_support_reads} - --percent_identity [num] Percent identity value threshold for MALT. Default: ${params.percent_identity} - --malt_mode [str] Specify which alignment method to use for MALT. Options: 'Unknown', 'BlastN', 'BlastP', 'BlastX', 'Classifier'. Default: '${params.malt_mode}' - --malt_alignment_mode [str] Specify alignment method for MALT. Options: 'Local', 'SemiGlobal'. Default: '${params.malt_alignment_mode}' - --malt_top_percent [num] Specify the percent for LCA algorithm for MALT (see MEGAN6 CE manual). Default: ${params.malt_top_percent} - --malt_min_support_mode [str] Specify whether to use percent or raw number of reads for minimum support required for taxon to be retained for MALT. Options: 'percent', 'reads'. Default: '${params.malt_min_support_mode}' - --malt_min_support_percent [num] Specify the minimum percentage of reads a taxon of sample total is required to have to be retained for MALT. Default: Default: ${params.malt_min_support_percent} - --malt_max_queries [num] Specify the maximium number of queries a read can have for MALT. Default: ${params.malt_max_queries} - --malt_memory_mode [str] Specify the memory load method. Do not use 'map' with GPFS file systems for MALT as can be very slow. Options: 'load', 'page', 'map'. Default: '${params.malt_memory_mode}' - --malt_sam_output [bool] Specify to also produce SAM alignment files. Note this includes both aligned and unaligned reads, and are gzipped. Note this will result in very large file sizes. - - Metagenomic Authentication - --run_maltextract [bool] Turn on MaltExtract for MALT aDNA characteristics authentication - --maltextract_taxon_list [file] Path to a txt file with taxa of interest (one taxon per row, NCBI taxonomy name format) - --maltextract_ncbifiles [dir] Path to directory containing containing NCBI resource files (ncbi.tre and ncbi.map; avaliable: https://github.com/rhuebler/HOPS/) - --maltextract_filter [str] Specify which MaltExtract filter to use. Options: 'def_anc', 'ancient', 'default', 'crawl', 'scan', 'srna', 'assignment'. Default: '${params.maltextract_filter}' - --maltextract_toppercent [num] Specify percent of top alignments to use. Default: ${params.maltextract_toppercent} - --maltextract_destackingoff [bool] Turn off destacking. - --maltextract_downsamplingoff [bool] Turn off downsampling. - --maltextract_duplicateremovaloff [bool] Turn off duplicate removal. - --maltextract_matches [bool] Turn on exporting alignments of hits in BLAST format. - --maltextract_megansummary [bool] Turn on export of MEGAN summary files. - --maltextract_percentidentity [num] Minimum percent identity alignments are required to have to be reported. Recommended to set same as MALT parameter. Default: ${params.maltextract_percentidentity} - --maltextract_topalignment [int] Turn on using top alignments per read after filtering. - - Other options: - -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. - --max_memory [str] Memory limit for each step of pipeline. Should be in form e.g. --max_memory '8.GB'. Default: '${params.max_memory}' - --max_time [str] Time limit for each step of the pipeline. Should be in form e.g. --max_time '2.h'. Default: '${params.max_time}' - --max_cpus [str] Maximum number of CPUs to use for each step of the pipeline. Should be in form e.g. Default: '${params.max_cpus}' - --publish_dir_mode [str] Mode for publishing results in the output directory. Available: symlink, rellink, link, copy, copyNoFollow, move. Default: '${params.publish_dir_mode}' - --email [email] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - --email_on_fail [email] Same as --email, except only send mail if the workflow is not successful - --plaintext_email [email] Receive plain text emails rather than HTML - --max_multiqc_email_size [str] Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) - - AWSBatch options: - --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion [str] The AWS Region for your AWS Batch job to run on - --awscli [str] Path to the AWS CLI tool - - For a full list and more information of available parameters, consider the documentation (https://github.com/nf-core/eager/). - """.stripIndent() -} - -/////////////////////////////////////////////////////////////////////////////// -/* -- SET UP CONFIGURATION VARIABLES -- */ -/////////////////////////////////////////////////////////////////////////////// - // Show help message params.help = false -if (params.help){ - helpMessage() +def json_schema = "$projectDir/nextflow_schema.json" +if (params.help) { + def command = "nextflow run nf-core/eager -profile --reads'*_R{1,2}.fastq.gz' --fasta '.fasta'" + log.info NfcoreSchema.params_help(workflow, params, json_schema, command) exit 0 } +//////////////////////////////////////////////////// +/* -- VALIDATE PARAMETERS -- */ +//////////////////////////////////////////////////// + +def unexpectedParams = [] +if (params.validate_params) { + unexpectedParams = NfcoreSchema.validateParameters(params, json_schema, log) +} + +// Info required for completion email and summary +def multiqc_report = [] + // Small console separator to make it easier to read errors after launch println "" @@ -604,22 +380,10 @@ if ( params.maltextract_ncbifiles == '' ) { // Has the run name been specified by the user? // this has the bonus effect of catching both -name and --name -custom_runName = params.name if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { custom_runName = workflow.runName } -// Check AWS batch settings -if (workflow.profile.contains('awsbatch')) { - // AWSBatch sanity checking - if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (params.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." -} - //////////////////////////////////////////////////// /* -- CONFIG FILES -- */ //////////////////////////////////////////////////// @@ -724,100 +488,23 @@ ch_fastq_channel /* -- HEADER LOG INFO -- */ /////////////////////////////////////////////////// -log.info nfcoreHeader() -def summary = [:] -summary['Pipeline Name'] = 'nf-core/eager' -summary['Pipeline Version'] = workflow.manifest.version -if (workflow.revision) summary['Pipeline Release'] = workflow.revision -summary['Run Name'] = custom_runName ?: workflow.runName -summary['Input'] = params.input -summary['Convert input BAM?'] = params.run_convertinputbam ? 'Yes' : 'No' -summary['Fasta Ref'] = params.fasta -summary['BAM Index Type'] = (params.large_ref == "") ? 'BAI' : 'CSI' -if(params.bwa_index || params.bt2_index ) summary['BWA Index'] = "Yes" -summary['Skipping FASTQC?'] = params.skip_fastqc ? 'Yes' : 'No' -summary['Skipping AdapterRemoval?'] = params.skip_adapterremoval ? 'Yes' : 'No' -if (!params.skip_adapterremoval) { - summary['Skip Read Merging'] = params.skip_collapse ? 'Yes' : 'No' - summary['Skip Adapter Trimming'] = params.skip_trim ? 'Yes' : 'No' -} -summary['Running BAM filtering'] = params.run_bam_filtering ? 'Yes' : 'No' -if (params.run_bam_filtering) { - summary['Skip Read Merging'] = params.bam_unmapped_type +//Add header +log.info Headers.nf_core(workflow, params.monochrome_logs) + +//Add Summary Parameters +def summary_params = NfcoreSchema.params_summary_map(workflow, params, json_schema) +log.info NfcoreSchema.params_summary_log(workflow, params, json_schema) + +// Check that conda channels are set-up correctly +if (params.enable_conda) { + Checks.check_conda_channels(log) } -summary['Run Fastq Host Removal'] = params.hostremoval_input_fastq ? 'Yes' : 'No' -if (params.hostremoval_input_fastq){ - summary['Host removal mode'] = params.hostremoval_mode -} -summary['Skipping Preseq?'] = params.skip_preseq ? 'Yes' : 'No' -summary['Skipping Deduplication?'] = params.skip_deduplication ? 'Yes' : 'No' -summary['Skipping DamageProfiler?'] = params.skip_damage_calculation ? 'Yes' : 'No' -summary['Skipping Qualimap?'] = params.skip_qualimap ? 'Yes' : 'No' -summary['Run BAM Trimming?'] = params.run_trim_bam ? 'Yes' : 'No' -summary['Run PMDtools?'] = params.run_pmdtools ? 'Yes' : 'No' -summary['Run Genotyping?'] = params.run_genotyping ? 'Yes' : 'No' -if (params.run_genotyping){ - summary['Genotyping Tool?'] = params.genotyping_tool - summary['Genotyping BAM Input?'] = params.genotyping_source -} -summary['Run MultiVCFAnalyzer'] = params.run_multivcfanalyzer ? 'Yes' : 'No' -summary['Run VCF2Genome'] = params.run_vcf2genome ? 'Yes' : 'No' -summary['Run SexDetErrMine'] = params.run_sexdeterrmine ? 'Yes' : 'No' -summary['Run Nuclear Contamination Estimation'] = params.run_nuclear_contamination ? 'Yes' : 'No' -summary['Run Bedtools Coverage'] = params.run_bedtools_coverage ? 'Yes' : 'No' -summary['Run Metagenomic Binning'] = params.run_metagenomic_screening ? 'Yes' : 'No' -if (params.run_metagenomic_screening) { - summary['Metagenomic Tool'] = params.metagenomic_tool - summary['Run MaltExtract'] = params.run_maltextract ? 'Yes' : 'No' -} -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -summary['Output Dir'] = params.outdir -summary['Working Dir'] = workflow.workDir -summary['Container Engine'] = workflow.containerEngine -if(workflow.containerEngine) summary['Container'] = workflow.container -summary['Current Home'] = workflow.homeDir -summary['Current User'] = workflow.userName -summary['Working Dir'] = workflow.workDir -summary['Output Dir'] = params.outdir -summary['Script Dir'] = workflow.projectDir -summary['Config Profile'] = workflow.profile -summary['User'] = workflow.userName -if (workflow.profile.contains('awsbatch')) { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue - summary['AWS CLI'] = params.awscli -} -if(params.email) summary['E-mail Address'] = params.email -summary['Config Profile'] = workflow.profile -if (params.config_profile_description) summary['Config Description'] = params.config_profile_description -if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact -if (params.config_profile_url) summary['Config URL'] = params.config_profile_url -if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC maxsize'] = params.max_multiqc_email_size -} -log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") -log.info "-\033[2m--------------------------------------------------\033[0m-" + +// Check AWS batch settings +Checks.aws_batch(workflow, params) // Check the hostnames against configured profiles -checkHostname() - -Channel.from(summary.collect{ [it.key, it.value] }) - .map { k,v -> "
$k
${v ?: 'N/A'}
" } - .reduce { a, b -> return [a, b].join("\n ") } - .map { x -> """ - id: 'nf-core-eager-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/eager Workflow Summary' - section_href: 'https://github.com/nf-core/eager' - plot_type: 'html' - data: | -
- $x -
- """.stripIndent() } - .set { ch_workflow_summary } +Checks.hostname(workflow, params, log) log.info "Schaffa, Schaffa, Genome Baua!" @@ -3261,6 +2948,8 @@ process get_software_versions { } // MultiQC file generation for pipeline report +def workflow_summary = NfcoreSchema.params_summary_multiqc(workflow, summary_params) +ch_workflow_summary = Channel.value(workflow_summary) process multiqc { label 'sc_medium' @@ -3294,7 +2983,6 @@ process multiqc { file ('hops/*') from ch_hops_for_multiqc.collect().ifEmpty([]) file ('nuclear_contamination/*') from ch_nuclear_contamination_for_multiqc.collect().ifEmpty([]) file ('genotyping/*') from ch_eigenstrat_snp_cov_for_multiqc.collect().ifEmpty([]) - file workflow_summary from ch_workflow_summary.collectFile(name: "workflow_summary_mqc.yaml") output: @@ -3302,8 +2990,13 @@ process multiqc { file "*_data" script: - def rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - def rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' + rtitle = '' + rfilename = '' + if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { + rtitle = "--title \"${workflow.runName}\"" + rfilename = "--filename " + workflow.runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" + } + def custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : '' """ multiqc -f $rtitle $rfilename $multiqc_config $custom_config_file . @@ -3313,171 +3006,21 @@ process multiqc { // Send completion emails if requested, so user knows data is ready workflow.onComplete { + Completion.email(workflow, params, summary_params, projectDir, log, multiqc_report) + Completion.summary(workflow, params, log, fail_percent_mapped, pass_percent_mapped) +} - // Set up the e-mail variables - def subject = "[nf-core/eager] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[nf-core/eager] FAILED: $workflow.runName" - } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = custom_runName ?: workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = ch_multiqc_report.getVal() - if (mqc_report instanceof ArrayList) { - log.warn "[nf-core/eager] Found multiple reports from process 'multiqc', will use only one" - mqc_report = mqc_report[0] - } - } - } catch (all) { - log.warn "[nf-core/eager] Could not attach MultiQC report to summary email" - } - - // Check if we are only sending emails on failure - email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/eager] Sent summary e-mail to $email_address (sendmail)" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if (mqc_report == NULL) { - log.warn "[nf-core/eager] Could not attach MultiQC report to summary email" - } else if ( mqc_report.size() <= params.max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "[nf-core/eager] Sent summary e-mail to $email_address (mail)" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; - - if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-" - log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-" - log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}-" - } - - if (workflow.success) { - log.info "-${c_purple}[nf-core/eager]${c_green} Pipeline completed successfully${c_reset}-" - } else { - checkHostname() - log.info "-${c_purple}[nf-core/eager]${c_red} Pipeline completed with errors${c_reset}-" +workflow.onError { + // Print unexpected parameters + for (p in unexpectedParams) { + log.warn "Unexpected parameter: ${p}" } - } ///////////////////////////////////// /* -- AUXILARY FUNCTIONS -- */ ///////////////////////////////////// -def nfcoreHeader() { - // Log colors ANSI codes - c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_blue = params.monochrome_logs ? '' : "\033[0;34m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; - c_dim = params.monochrome_logs ? '' : "\033[2m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_white = params.monochrome_logs ? '' : "\033[0;37m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; - - return """ -${c_dim}--------------------------------------------------${c_reset}- - ${c_green},--.${c_black}/${c_green},-.${c_reset} - ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} - ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} - ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} - ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/eager v${workflow.manifest.version}${c_reset} - -${c_dim}--------------------------------------------------${c_reset}- - """.stripIndent() -} - - -def checkHostname() { - def c_reset = params.monochrome_logs ? '' : "\033[0m" - def c_white = params.monochrome_logs ? '' : "\033[0;37m" - def c_red = params.monochrome_logs ? '' : "\033[1;91m" - def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if (params.hostnames) { - def hostname = "hostname".execute().text.trim() - params.hostnames.each { prof, hnames -> - hnames.each { hname -> - if (hostname.contains(hname) && !workflow.profile.contains(prof)) { - log.error "====================================================\n" + - " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + - " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + - " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + - "============================================================" - } - } - } - } -} - // Channelling the TSV file containing FASTQ or BAM def extract_data(tsvFile) { Channel.fromPath(tsvFile) diff --git a/nextflow.config b/nextflow.config index f63152347..5f6509a93 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,6 +12,7 @@ params { single_end = false outdir = './results' publish_dir_mode = 'copy' + config_profile_name = '' // aws awsqueue = '' @@ -19,6 +20,9 @@ params { awscli = '' //Pipeline options + enable_conda = false + validate_params = true + schema_ignore_params = 'genomes' //Input reads input = null @@ -40,8 +44,6 @@ params { seq_dict = '' large_ref = false save_reference = false - saveTrimmed = true - saveAlignedIntermediates = false //Skipping parts of the pipeline for impatient users skip_fastqc = false @@ -224,7 +226,6 @@ params { maltextract_topalignment = false // Boilerplate options - name = false multiqc_config = false email = false email_on_fail = false @@ -271,7 +272,10 @@ try { } profiles { - conda { process.conda = "$projectDir/environment.yml" } + conda { + process.conda = "$projectDir/environment.yml" + params.enable_conda = true + } debug { process.beforeScript = 'echo $HOSTNAME' } docker { docker.enabled = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 1c3006fcd..1d938153b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -206,13 +206,6 @@ "hidden": true, "fa_icon": "fas fa-question-circle" }, - "name": { - "type": "string", - "description": "Workflow name of run, for future reference.", - "fa_icon": "fas fa-fingerprint", - "hidden": true, - "help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles." - }, "email": { "type": "string", "description": "Email address for completion summary.", @@ -262,6 +255,31 @@ "default": "${params.outdir}/pipeline_info", "fa_icon": "fas fa-cogs", "hidden": true + }, + "enable_conda": { + "type": "boolean", + "hidden": true, + "description": "Parameter used for checking conda channels to be set correctly." + }, + "validate_params": { + "type": "boolean", + "default": "true", + "description": "Boolean whether to validate parameters against the schema at runtime", + "fa_icon": "fab fa-angellist", + "hidden": true + }, + "schema_ignore_params": { + "type": "string", + "fa_icon": "fas fa-not-equal", + "description": "String to specify ignored parameters for parameter validation", + "hidden": true, + "default": "genomes" + }, + "config_profile_name": { + "type": "string", + "description": "String to describe the config profile that is run.", + "fa_icon": "fas fa-id-badge", + "hidden": true } }, "fa_icon": "fas fa-file-import", @@ -640,7 +658,8 @@ "help_text": "Read removal mode. Remove mapped reads completely (`'remove'`) or just replace mapped reads sequence by N (`'replace'`)\n\n> Modifies extract_map_reads.py parameter: `-m`", "enum": [ "strip", - "replace" + "replace", + "remove" ] } }, @@ -661,8 +680,8 @@ }, "bam_mapping_quality_threshold": { "type": "integer", - "description": "Minimum mapping quality for reads filter.", "default": 0, + "description": "Minimum mapping quality for reads filter.", "fa_icon": "fas fa-greater-than-equal", "help_text": "Specify a mapping quality threshold for mapped reads to be kept for downstream analysis. By default keeps all reads and is therefore set to `0` (basically doesn't filter anything).\n\n> Modifies samtools view parameter: `-q`" }, @@ -1007,7 +1026,7 @@ ] }, "gatk_ug_keep_realign_bam": { - "type": "string", + "type": "boolean", "description": "Specify to keep the BAM output of re-alignment around variants from GATK UnifiedGenotyper.", "fa_icon": "fas fa-align-left", "help_text": "If provided when running GATK's UnifiedGenotyper, this will put into the output folder the BAMs that have realigned reads (with GATK's (v3) IndelRealigner) around possible variants for improved genotyping.\n\nThese BAMs will be stored in the same folder as the corresponding VCF files." @@ -1346,7 +1365,6 @@ "type": "string", "description": "Specify which classifier to use. Options: 'malt', 'kraken'.", "fa_icon": "fas fa-tools", - "default": "undefined", "help_text": "Specify which taxonomic classifier to use. There are two options available:\n\n- `kraken` for [Kraken2](https://ccb.jhu.edu/software/kraken2)\n- `malt` for [MALT](https://software-ab.informatik.uni-tuebingen.de/download/malt/welcome.html)\n\n:warning: **Important** It is very important to run `nextflow clean -f` on your\nNextflow run directory once completed. RMA6 files are VERY large and are\n_copied_ from a `work/` directory into the results folder. You should clean the\nwork directory with the command to ensure non-redundancy and large HDD\nfootprints!" }, "database": { @@ -1620,4 +1638,4 @@ "$ref": "#/definitions/metagenomic_authentication" } ] -} +} \ No newline at end of file