diff --git a/.gitignore b/.gitignore index e11134949a..e981592bef 100644 --- a/.gitignore +++ b/.gitignore @@ -21,7 +21,6 @@ dist/ downloads/ eggs/ .eggs/ -lib/ lib64/ parts/ sdist/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f5aa9769e..4c0c9c903b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,17 @@ ### Template +* **Major new feature** - Validation of pipeline parameters [[#426]](https://github.com/nf-core/tools/issues/426) + * The addition runs as soon as the pipeline launches and checks the pipeline input parameters two main things: + * No parameters are supplied that share a name with core Nextflow options (eg. `--resume` instead of `-resume`) + * Supplied parameters validate against the pipeline JSON schema (eg. correct variable types, required values) + * If either parameter validation fails or the pipeline has errors, a warning is given about any unexpected parameters found which are not described in the pipeline schema. + * This behaviour can be disabled by using `--validate_params false` * Added profiles to support the [Charliecloud](https://hpc.github.io/charliecloud/) and [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) container engines [[#824](https://github.com/nf-core/tools/issues/824)] * Fixed typo in nf-core-lint CI that prevented the markdown summary from being automatically posted on PRs as a comment. +* Changed default for `--input` from `data/*{1,2}.fastq.gz` to `null`, as this is now validated by the schema as a required value. +* Removed support for `--name` parameter for custom run names. + * The same functionality for MultiQC still exists with the core Nextflow `-name` option. * Added to template docs about how to identify process name for resource customisation ### Modules diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/NfcoreSchema.groovy b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/NfcoreSchema.groovy new file mode 100644 index 0000000000..174e5c54ac --- /dev/null +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/NfcoreSchema.groovy @@ -0,0 +1,208 @@ +/* + * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. + */ + +import org.everit.json.schema.Schema +import org.everit.json.schema.loader.SchemaLoader +import org.everit.json.schema.ValidationException +import org.json.JSONObject +import org.json.JSONTokener +import org.json.JSONArray +import groovy.json.JsonSlurper +import groovy.json.JsonBuilder + +class NfcoreSchema { + /* + * Function to loop over all parameters defined in schema and check + * whether the given paremeters adhere to the specificiations + */ + /* groovylint-disable-next-line UnusedPrivateMethodParameter */ + private static ArrayList validateParameters(params, jsonSchema, log) { + def has_error = false + //=====================================================================// + // Check for nextflow core params and unexpected params + def json = new File(jsonSchema).text + def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') + def specifiedParamKeys = params.keySet() + def nf_params = [ + // Options for base `nextflow` command + 'bg', + 'c', + 'C', + 'config', + 'd', + 'D', + 'dockerize', + 'h', + 'log', + 'q', + 'quiet', + 'syslog', + 'v', + 'version', + + // Options for `nextflow run` command + 'ansi', + 'ansi-log', + 'bg', + 'bucket-dir', + 'c', + 'cache', + 'config', + 'dsl2', + 'dump-channels', + 'dump-hashes', + 'E', + 'entry', + 'latest', + 'lib', + 'main-script', + 'N', + 'name', + 'offline', + 'params-file', + 'pi', + 'plugins', + 'poll-interval', + 'pool-size', + 'profile', + 'ps', + 'qs', + 'queue-size', + 'r', + 'resume', + 'revision', + 'stdin', + 'stub', + 'stub-run', + 'test', + 'w', + 'with-charliecloud', + 'with-conda', + 'with-dag', + 'with-docker', + 'with-mpi', + 'with-notification', + 'with-podman', + 'with-report', + 'with-singularity', + 'with-timeline', + 'with-tower', + 'with-trace', + 'with-weblog', + 'without-docker', + 'without-podman', + 'work-dir' + ] + def unexpectedParams = [] + + // Collect expected parameters from the schema + def expectedParams = [] + for (group in schemaParams) { + for (p in group.value['properties']) { + expectedParams.push(p.key) + } + } + + for (specifiedParam in specifiedParamKeys) { + // nextflow params + if (nf_params.contains(specifiedParam)) { + log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" + has_error = true + } + // unexpected params + def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' + if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam)) { + unexpectedParams.push(specifiedParam) + } + } + + //=====================================================================// + // Validate parameters against the schema + InputStream inputStream = new File(jsonSchema).newInputStream() + JSONObject rawSchema = new JSONObject(new JSONTokener(inputStream)) + Schema schema = SchemaLoader.load(rawSchema) + + // Clean the parameters + def cleanedParams = cleanParameters(params) + + // Convert to JSONObject + def jsonParams = new JsonBuilder(cleanedParams) + JSONObject paramsJSON = new JSONObject(jsonParams.toString()) + + // Validate + try { + schema.validate(paramsJSON) + } catch (ValidationException e) { + println "" + log.error 'ERROR: Validation of pipeline parameters failed!' + JSONObject exceptionJSON = e.toJSON() + printExceptions(exceptionJSON, paramsJSON, log) + if (unexpectedParams.size() > 0){ + println "" + def warn_msg = 'Found unexpected parameters:' + for (unexpectedParam in unexpectedParams){ + warn_msg = warn_msg + "\n* --${unexpectedParam}: ${paramsJSON[unexpectedParam].toString()}" + } + log.warn warn_msg + } + println "" + has_error = true + } + + if(has_error){ + System.exit(1) + } + + return unexpectedParams + } + + // Loop over nested exceptions and print the causingException + private static void printExceptions(exJSON, paramsJSON, log) { + def causingExceptions = exJSON['causingExceptions'] + if (causingExceptions.length() == 0) { + def m = exJSON['message'] =~ /required key \[([^\]]+)\] not found/ + // Missing required param + if(m.matches()){ + log.error "* Missing required parameter: --${m[0][1]}" + } + // Other base-level error + else if(exJSON['pointerToViolation'] == '#'){ + log.error "* ${exJSON['message']}" + } + // Error with specific param + else { + def param = exJSON['pointerToViolation'] - ~/^#\// + def param_val = paramsJSON[param].toString() + log.error "* --${param}: ${exJSON['message']} (${param_val})" + } + } + for (ex in causingExceptions) { + printExceptions(ex, paramsJSON, log) + } + } + + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + +} diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/nfcore_external_java_deps.jar b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/nfcore_external_java_deps.jar new file mode 100644 index 0000000000..805c8bb5e4 Binary files /dev/null and b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/lib/nfcore_external_java_deps.jar differ diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/main.nf b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/main.nf index e8f861f054..818e4f8fd1 100644 --- a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/main.nf +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/main.nf @@ -9,9 +9,10 @@ ---------------------------------------------------------------------------------------- */ +log.info nfcoreHeader() + def helpMessage() { // TODO nf-core: Add to this help message with new command line parameters - log.info nfcoreHeader() log.info""" Usage: @@ -53,6 +54,16 @@ if (params.help) { exit 0 } +//////////////////////////////////////////////////// +/* -- VALIDATE PARAMETERS -- */ +////////////////////////////////////////////////////+ +def json_schema = "$baseDir/nextflow_schema.json" +def unexpectedParams = [] +if (params.validate_params) { + unexpectedParams = NfcoreSchema.validateParameters(params, json_schema, log) +} +//////////////////////////////////////////////////// + /* * SET UP CONFIGURATION VARIABLES */ @@ -73,13 +84,6 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false if (params.fasta) { ch_fasta = file(params.fasta, checkIfExists: true) } -// Has the run name been specified by the user? -// this has the bonus effect of catching both -name and --name -custom_runName = params.name -if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { - custom_runName = workflow.runName -} - // Check AWS batch settings if (workflow.profile.contains('awsbatch')) { // AWSBatch sanity checking @@ -122,10 +126,9 @@ if (params.input_paths) { } // Header log info -log.info nfcoreHeader() def summary = [:] if (workflow.revision) summary['Pipeline Release'] = workflow.revision -summary['Run Name'] = custom_runName ?: workflow.runName +summary['Run Name'] = workflow.runName // TODO nf-core: Report custom parameters here summary['Input'] = params.input summary['Fasta Ref'] = params.fasta @@ -242,8 +245,12 @@ process multiqc { file "multiqc_plots" script: - rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' + rtitle = '' + rfilename = '' + if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { + rtitle = "--title \"${workflow.runName}\"" + rfilename = "--filename " + workflow.runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" + } custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : '' // TODO nf-core: Specify which MultiQC modules to use with -m for a faster run time """ @@ -282,7 +289,7 @@ workflow.onComplete { } def email_fields = [:] email_fields['version'] = workflow.manifest.version - email_fields['runName'] = custom_runName ?: workflow.runName + email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete email_fields['duration'] = workflow.duration @@ -389,6 +396,12 @@ workflow.onComplete { } +workflow.onError { + // Print unexpected parameters + for (p in unexpectedParams) { + log.warn "Unexpected parameter: ${p}" + } +} def nfcoreHeader() { // Log colors ANSI codes diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow.config b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow.config index b107284482..5f0f2e7c35 100644 --- a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow.config +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow.config @@ -11,13 +11,12 @@ params { // Workflow flags // TODO nf-core: Specify your pipeline's command line flags genome = false - input = "data/*{1,2}.fastq.gz" + input = null single_end = false outdir = './results' publish_dir_mode = 'copy' // Boilerplate options - name = false multiqc_config = false email = false email_on_fail = false @@ -34,6 +33,8 @@ params { config_profile_description = false config_profile_contact = false config_profile_url = false + validate_params = true + schema_ignore_params = 'genomes' // Defaults only, expecting to be overwritten max_memory = 128.GB diff --git a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json index 0a6e83a49e..6035b7acd8 100644 --- a/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json +++ b/nf_core/pipeline-template/{{cookiecutter.name_noslash}}/nextflow_schema.json @@ -104,6 +104,11 @@ "move" ] }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true + }, "name": { "type": "string", "description": "Workflow name.", @@ -256,4 +261,4 @@ "$ref": "#/definitions/institutional_config_options" } ] -} +} \ No newline at end of file diff --git a/nf_core/schema.py b/nf_core/schema.py index bb762796b8..e0506b2821 100644 --- a/nf_core/schema.py +++ b/nf_core/schema.py @@ -459,9 +459,11 @@ def add_schema_found_configs(self): Add anything that's found in the Nextflow params that's missing in the pipeline schema """ params_added = [] + params_ignore = self.pipeline_params.get("schema_ignore_params", "").strip("\"'").split(",") + params_ignore.append("schema_ignore_params") for p_key, p_val in self.pipeline_params.items(): # Check if key is in schema parameters - if not p_key in self.schema_params: + if p_key not in self.schema_params and p_key not in params_ignore: if ( self.no_prompts or self.schema_from_scratch