diff --git a/lib/Schema.groovy b/lib/Schema.groovy index e241f0c5..33a4175b 100644 --- a/lib/Schema.groovy +++ b/lib/Schema.groovy @@ -2,9 +2,17 @@ * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. */ +import org.everit.json.schema.Schema as JsonSchema +import org.everit.json.schema.loader.SchemaLoader +import org.everit.json.schema.ValidationException +import org.json.JSONObject +import org.json.JSONTokener +import org.json.JSONArray import groovy.json.JsonSlurper +import groovy.json.JsonBuilder class Schema { + /* * This method tries to read a JSON params file */ @@ -84,24 +92,24 @@ class Schema { * Beautify parameters for --help */ private static String params_help(workflow, params, json_schema, command) { - String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n" - output += "Typical pipeline command:\n\n" + String output = Headers.nf_core(workflow, params.monochrome_logs) + '\n' + output += 'Typical pipeline command:\n\n' output += " ${command}\n\n" def params_map = params_load(json_schema) def max_chars = params_max_chars(params_map) + 1 for (group in params_map.keySet()) { - output += group + "\n" + output += group + '\n' def group_params = params_map.get(group) // This gets the parameters of that particular group for (param in group_params.keySet()) { - def type = "[" + group_params.get(param).type + "]" + def type = '[' + group_params.get(param).type + ']' def description = group_params.get(param).description - output += " \u001B[1m--" + param.padRight(max_chars) + "\u001B[1m" + type.padRight(10) + description + "\n" + output += " \u001B[1m--" + param.padRight(max_chars) + "\u001B[1m" + type.padRight(10) + description + '\n' } - output += "\n" + output += '\n' } output += Headers.dashed_line(params.monochrome_logs) - output += "\n\n" + Checks.citation(workflow) - output += "\n\n" + Headers.dashed_line(params.monochrome_logs) + output += '\n\n' + Checks.citation(workflow) + output += '\n\n' + Headers.dashed_line(params.monochrome_logs) return output } @@ -110,7 +118,7 @@ class Schema { */ private static LinkedHashMap params_summary_map(workflow, params, json_schema) { // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] + def Map workflow_summary = [:] if (workflow.revision) { workflow_summary['revision'] = workflow.revision } @@ -127,7 +135,7 @@ class Schema { workflow_summary['userName'] = workflow.userName workflow_summary['profile'] = workflow.profile workflow_summary['configFiles'] = workflow.configFiles.join(', ') - + // Get pipeline parameters defined in JSON Schema def Map params_summary = [:] def blacklist = ['hostnames'] @@ -153,15 +161,15 @@ class Schema { } else { if (param_type == 'string') { if (schema_value.contains('$baseDir') || schema_value.contains('${baseDir}')) { - def sub_string = schema_value.replace('\$baseDir','') - sub_string = sub_string.replace('\${baseDir}','') + def sub_string = schema_value.replace('\$baseDir', '') + sub_string = sub_string.replace('\${baseDir}', '') if (params_value.contains(sub_string)) { schema_value = params_value } } if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir','') - sub_string = sub_string.replace('\${params.outdir}','') + def sub_string = schema_value.replace('\$params.outdir', '') + sub_string = sub_string.replace('\${params.outdir}', '') if ("${params.outdir}${sub_string}" == params_value) { schema_value = params_value } @@ -183,22 +191,22 @@ class Schema { * Beautify parameters for summary and return as string */ private static String params_summary_log(workflow, params, json_schema) { - String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n" + String output = Headers.nf_core(workflow, params.monochrome_logs) + '\n' def params_map = params_summary_map(workflow, params, json_schema) def max_chars = params_max_chars(params_map) for (group in params_map.keySet()) { def group_params = params_map.get(group) // This gets the parameters of that particular group if (group_params) { - output += group + "\n" + output += group + '\n' for (param in group_params.keySet()) { - output += " \u001B[1m" + param.padRight(max_chars) + ": \u001B[1m" + group_params.get(param) + "\n" + output += " \u001B[1m" + param.padRight(max_chars) + ": \u001B[1m" + group_params.get(param) + '\n' } - output += "\n" + output += '\n' } } output += Headers.dashed_line(params.monochrome_logs) - output += "\n\n" + Checks.citation(workflow) - output += "\n\n" + Headers.dashed_line(params.monochrome_logs) + output += '\n\n' + Checks.citation(workflow) + output += '\n\n' + Headers.dashed_line(params.monochrome_logs) return output } @@ -212,17 +220,115 @@ class Schema { for (param in group_params.keySet()) { summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" } - summary_section += " \n" + summary_section += ' \n' } } - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + String yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" + yaml_file_text += 'data: |\n' yaml_file_text += "${summary_section}" return yaml_file_text } -} \ No newline at end of file + + /* + * Function to loop over all parameters defined in schema and check + * whether the given paremeters adhere to the specificiations + */ + /* groovylint-disable-next-line UnusedPrivateMethodParameter */ + private static ArrayList validateParameters(params, jsonSchema, log) { + //=====================================================================// + // Validate parameters against the schema + InputStream inputStream = new File(jsonSchema).newInputStream() + JSONObject rawSchema = new JSONObject(new JSONTokener(inputStream)) + JsonSchema schema = SchemaLoader.load(rawSchema) + + // Clean the parameters + def cleanedParams = cleanParameters(params) + + // Convert to JSONObject + def jsonParams = new JsonBuilder(cleanedParams) + JSONObject paramsJSON = new JSONObject(jsonParams.toString()) + + // Validate + try { + schema.validate(paramsJSON) + } catch (ValidationException e) { + log.error 'Found parameter violations!' + JSONObject exceptionJSON = e.toJSON() + printExceptions(exceptionJSON, log) + System.exit(1) + } + + // Check for nextflow core params and unexpected params + def json = new File(jsonSchema).text + def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') + def specifiedParamKeys = params.keySet() + def nf_params = ['profile', 'config', 'c', 'C', 'syslog', 'd', 'dockerize', + 'bg', 'h', 'log', 'quiet', 'q', 'v', 'version'] + def unexpectedParams = [] + + // Collect expected parameters from the schema + def expectedParams = [] + for (group in schemaParams) { + for (p in group.value['properties']) { + expectedParams.push(p.key) + } + } + + for (specifiedParam in specifiedParamKeys) { + // nextflow params + if (nf_params.contains(specifiedParam)) { + log.error "ERROR: You used a core Nextflow option with two hyphens: --${specifiedParam}! Please resubmit with one." + System.exit(1) + } + // unexpected params + if (!expectedParams.contains(specifiedParam)) { + unexpectedParams.push(specifiedParam) + } + } + + return unexpectedParams + } + + // Loop over nested exceptions and print the causingException + private static void printExceptions(exJSON, log) { + def causingExceptions = exJSON['causingExceptions'] + if (causingExceptions.length() == 0) { + log.error "${exJSON['message']} ${exJSON['pointerToViolation']}" + } + else { + log.error exJSON['message'] + for (ex in causingExceptions) { + printExceptions(ex, log) + } + } + } + + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + +} diff --git a/lib/external_java_deps.jar b/lib/external_java_deps.jar new file mode 100644 index 00000000..805c8bb5 Binary files /dev/null and b/lib/external_java_deps.jar differ diff --git a/main.nf b/main.nf index 951d528f..8b321a88 100644 --- a/main.nf +++ b/main.nf @@ -17,11 +17,18 @@ nextflow.enable.dsl = 2 def json_schema = "$baseDir/nextflow_schema.json" if (params.help) { - def command = "nextflow run nf-core/cageseq --input samplesheet.csv --genome GRCh38 -profile docker" + def command = 'nextflow run nf-core/cageseq --input samplesheet.csv --genome GRCh38 -profile docker' log.info Schema.params_help(workflow, params, json_schema, command) exit 0 } +//////////////////////////////////////////////////// +/* -- VALIDATE PARAMETERS -- */ +////////////////////////////////////////////////////+ +def unexpectedParams = [] +if (params.validate_params) { + unexpectedParams = Schema.validateParameters(params, json_schema, log) +} //////////////////////////////////////////////////// /* -- PRINT PARAMETER SUMMARY -- */ //////////////////////////////////////////////////// @@ -39,7 +46,6 @@ Checks.aws_batch(workflow, params) // Check the hostnames against configured profiles Checks.hostname(workflow, params, log) - ///////////////////////////// /* -- RUN MAIN WORKFLOW -- */ ///////////////////////////// @@ -49,6 +55,13 @@ workflow { CAGESEQ () } +workflow.onError { + // Print unexpected parameters + for (p in unexpectedParams) { + log.warn "Unexpected parameter: ${p}" + } +} + ///////////////////////////// /* -- THE END -- */ -///////////////////////////// \ No newline at end of file +///////////////////////////// diff --git a/nextflow.config b/nextflow.config index 03fea8b4..19301e97 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,10 +10,11 @@ params { // Workflow flags genome = false - input = 'data/*R1.fastq.gz' + input = false outdir = './results' publish_dir_mode = 'copy' - + validate_params = true + // CAGE-seq specific options // Alignment options aligner = 'star' @@ -48,7 +49,7 @@ params { ribo_database_manifest = "$baseDir/assets/rrna-db-defaults.txt" //Output options - bigwig=false + bigwig = false //Clustering options min_cluster = 30 diff --git a/nextflow_schema.json b/nextflow_schema.json index f107552e..b7a3e6f8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,5 +1,5 @@ { - "$schema": "https://json-schema.org/draft-07/schema", + "$schema": "http://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/nf-core/cageseq/master/nextflow_schema.json", "title": "nf-core/cageseq pipeline parameters", "description": "CAGE-seq pipeline", @@ -307,6 +307,11 @@ "move" ] }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true + }, "name": { "type": "string", "description": "Workflow name.", @@ -334,6 +339,7 @@ "description": "File size limit when attaching MultiQC reports to summary emails.", "default": "25.MB", "fa_icon": "fas fa-file-upload", + "pattern": "\\d+\\.?\\s*[KMGT]?B?", "hidden": true, "help_text": "If file generated by pipeline exceeds the threshold, it will not be attached." }, @@ -392,6 +398,7 @@ "description": "Maximum amount of memory that can be requested for any single job.", "default": "128.GB", "fa_icon": "fas fa-memory", + "pattern": "\\d+\\.?\\s*[GT]?B?", "hidden": true, "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" }, @@ -401,6 +408,7 @@ "default": "240.h", "fa_icon": "far fa-clock", "hidden": true, + "pattern": "\\d+\\.?\\s*[mhd]", "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } }