nf-core · KevinMenden · May 4, 2021 · Dec 15, 2020 · Dec 15, 2020 · Dec 15, 2020
diff --git a/lib/Schema.groovy b/lib/Schema.groovy
@@ -2,9 +2,17 @@
  * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template.
  */
 
+import org.everit.json.schema.Schema as JsonSchema
+import org.everit.json.schema.loader.SchemaLoader
+import org.everit.json.schema.ValidationException
+import org.json.JSONObject
+import org.json.JSONTokener
+import org.json.JSONArray
 import groovy.json.JsonSlurper
+import groovy.json.JsonBuilder
 
 class Schema {
+
     /*
      * This method tries to read a JSON params file
      */
@@ -84,24 +92,24 @@ class Schema {
      * Beautify parameters for --help
      */
     private static String params_help(workflow, params, json_schema, command) {
-        String output  = Headers.nf_core(workflow, params.monochrome_logs) + "\n"
-        output        += "Typical pipeline command:\n\n"
+        String output  = Headers.nf_core(workflow, params.monochrome_logs) + '\n'
+        output        += 'Typical pipeline command:\n\n'
         output        += "    ${command}\n\n"
         def params_map = params_load(json_schema)
         def max_chars  = params_max_chars(params_map) + 1
         for (group in params_map.keySet()) {
-            output += group + "\n"
+            output += group + '\n'
             def group_params = params_map.get(group)  // This gets the parameters of that particular group
             for (param in group_params.keySet()) {
-                def type = "[" + group_params.get(param).type + "]"
+                def type = '[' + group_params.get(param).type + ']'
                 def description = group_params.get(param).description
-                output += "    \u001B[1m--" +  param.padRight(max_chars) + "\u001B[1m" + type.padRight(10) + description + "\n"
+                output += "    \u001B[1m--" +  param.padRight(max_chars) + "\u001B[1m" + type.padRight(10) + description + '\n'
             }
-            output += "\n"
+            output += '\n'
         }
         output += Headers.dashed_line(params.monochrome_logs)
-        output += "\n\n" + Checks.citation(workflow)
-        output += "\n\n" + Headers.dashed_line(params.monochrome_logs)
+        output += '\n\n' + Checks.citation(workflow)
+        output += '\n\n' + Headers.dashed_line(params.monochrome_logs)
         return output
     }
 
@@ -110,7 +118,7 @@ class Schema {
      */
     private static LinkedHashMap params_summary_map(workflow, params, json_schema) {
         // Get a selection of core Nextflow workflow options
-        def Map workflow_summary = [:]        
+        def Map workflow_summary = [:]
         if (workflow.revision) {
             workflow_summary['revision'] = workflow.revision
         }
@@ -127,7 +135,7 @@ class Schema {
         workflow_summary['userName']     = workflow.userName
         workflow_summary['profile']      = workflow.profile
         workflow_summary['configFiles']  = workflow.configFiles.join(', ')
-        
+
         // Get pipeline parameters defined in JSON Schema
         def Map params_summary = [:]
         def blacklist  = ['hostnames']
@@ -153,15 +161,15 @@ class Schema {
                     } else {
                         if (param_type == 'string') {
                             if (schema_value.contains('$baseDir') || schema_value.contains('${baseDir}')) {
-                                def sub_string = schema_value.replace('\$baseDir','')
-                                sub_string     = sub_string.replace('\${baseDir}','')
+                                def sub_string = schema_value.replace('\$baseDir', '')
+                                sub_string     = sub_string.replace('\${baseDir}', '')
                                 if (params_value.contains(sub_string)) {
                                     schema_value = params_value
                                 }
                             }
                             if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) {
-                                def sub_string = schema_value.replace('\$params.outdir','')
-                                sub_string     = sub_string.replace('\${params.outdir}','')
+                                def sub_string = schema_value.replace('\$params.outdir', '')
+                                sub_string     = sub_string.replace('\${params.outdir}', '')
                                 if ("${params.outdir}${sub_string}" == params_value) {
                                     schema_value = params_value
                                 }
@@ -183,22 +191,22 @@ class Schema {
      * Beautify parameters for summary and return as string
      */
     private static String params_summary_log(workflow, params, json_schema) {
-        String output  = Headers.nf_core(workflow, params.monochrome_logs) + "\n"
+        String output  = Headers.nf_core(workflow, params.monochrome_logs) + '\n'
         def params_map = params_summary_map(workflow, params, json_schema)
         def max_chars  = params_max_chars(params_map)
         for (group in params_map.keySet()) {
             def group_params = params_map.get(group)  // This gets the parameters of that particular group
             if (group_params) {
-                output += group + "\n"
+                output += group + '\n'
                 for (param in group_params.keySet()) {
-                    output += "    \u001B[1m" +  param.padRight(max_chars) + ": \u001B[1m" + group_params.get(param) + "\n"
+                    output += "    \u001B[1m" +  param.padRight(max_chars) + ": \u001B[1m" + group_params.get(param) + '\n'
                 }
-                output += "\n"
+                output += '\n'
             }
         }
         output += Headers.dashed_line(params.monochrome_logs)
-        output += "\n\n" + Checks.citation(workflow)
-        output += "\n\n" + Headers.dashed_line(params.monochrome_logs)
+        output += '\n\n' + Checks.citation(workflow)
+        output += '\n\n' + Headers.dashed_line(params.monochrome_logs)
         return output
     }
 
@@ -212,17 +220,115 @@ class Schema {
                 for (param in group_params.keySet()) {
                     summary_section += "        <dt>$param</dt><dd><samp>${group_params.get(param) ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>\n"
                 }
-                summary_section += "    </dl>\n"
+                summary_section += '    </dl>\n'
             }
         }
 
-        String yaml_file_text  = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n"
+        String yaml_file_text  = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n"
         yaml_file_text        += "description: ' - this information is collected when the pipeline is started.'\n"
         yaml_file_text        += "section_name: '${workflow.manifest.name} Workflow Summary'\n"
         yaml_file_text        += "section_href: 'https://github.com/${workflow.manifest.name}'\n"
         yaml_file_text        += "plot_type: 'html'\n"
-        yaml_file_text        += "data: |\n"
+        yaml_file_text        += 'data: |\n'
         yaml_file_text        += "${summary_section}"
         return yaml_file_text
     }
-}
+
+    /*
+    * Function to loop over all parameters defined in schema and check
+    * whether the given paremeters adhere to the specificiations
+    */
+    /* groovylint-disable-next-line UnusedPrivateMethodParameter */
+    private static ArrayList validateParameters(params, jsonSchema, log) {
+        //=====================================================================//
+        // Validate parameters against the schema
+        InputStream inputStream = new File(jsonSchema).newInputStream()
+        JSONObject rawSchema = new JSONObject(new JSONTokener(inputStream))
+        JsonSchema schema = SchemaLoader.load(rawSchema)
+
+        // Clean the parameters
+        def cleanedParams = cleanParameters(params)
+
+        // Convert to JSONObject
+        def jsonParams = new JsonBuilder(cleanedParams)
+        JSONObject paramsJSON = new JSONObject(jsonParams.toString())
+
+        // Validate
+        try {
+            schema.validate(paramsJSON)
+        } catch (ValidationException e) {
+            log.error 'Found parameter violations!'
+            JSONObject exceptionJSON = e.toJSON()
+            printExceptions(exceptionJSON, log)
+            System.exit(1)
+        }
+
+        // Check for nextflow core params and unexpected params
+        def json = new File(jsonSchema).text
+        def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions')
+        def specifiedParamKeys = params.keySet()
+        def nf_params = ['profile', 'config', 'c', 'C', 'syslog', 'd', 'dockerize',
+                        'bg', 'h', 'log', 'quiet', 'q', 'v', 'version']
+        def unexpectedParams = []
+
+        // Collect expected parameters from the schema
+        def expectedParams = []
+        for (group in schemaParams) {
+            for (p in group.value['properties']) {
+                expectedParams.push(p.key)
+            }
+        }
+
+        for (specifiedParam in specifiedParamKeys) {
+            // nextflow params
+            if (nf_params.contains(specifiedParam)) {
+                log.error "ERROR: You used a core Nextflow option with two hyphens: --${specifiedParam}! Please resubmit with one."
+                System.exit(1)
+            }
+            // unexpected params
+            if (!expectedParams.contains(specifiedParam)) {
+                unexpectedParams.push(specifiedParam)
+            }
+        }
+
+        return unexpectedParams
+    }
+
+    // Loop over nested exceptions and print the causingException
+    private static void printExceptions(exJSON, log) {
+        def causingExceptions = exJSON['causingExceptions']
+        if (causingExceptions.length() == 0) {
+            log.error "${exJSON['message']} ${exJSON['pointerToViolation']}"
+        }
+        else {
+            log.error exJSON['message']
+            for (ex in causingExceptions) {
+                printExceptions(ex, log)
+            }
+        }
+    }
+
+    private static Map cleanParameters(params) {
+        def new_params = params.getClass().newInstance(params)
+        for (p in params) {
+            // remove anything evaluating to false
+            if (!p['value']) {
+                new_params.remove(p.key)
+            }
+            // Cast MemoryUnit to String
+            if (p['value'].getClass() == nextflow.util.MemoryUnit) {
+                new_params.replace(p.key, p['value'].toString())
+            }
+            // Cast Duration to String
+            if (p['value'].getClass() == nextflow.util.Duration) {
+                new_params.replace(p.key, p['value'].toString())
+            }
+            // Cast LinkedHashMap to String
+            if (p['value'].getClass() == LinkedHashMap) {
+                new_params.replace(p.key, p['value'].toString())
+            }
+        }
+        return new_params
+    }
+
+}
diff --git a/lib/external_java_deps.jar b/lib/external_java_deps.jar
diff --git a/main.nf b/main.nf
@@ -17,11 +17,18 @@ nextflow.enable.dsl = 2
 
 def json_schema = "$baseDir/nextflow_schema.json"
 if (params.help) {
-    def command = "nextflow run nf-core/cageseq --input samplesheet.csv --genome GRCh38 -profile docker"
+    def command = 'nextflow run nf-core/cageseq --input samplesheet.csv --genome GRCh38 -profile docker'
     log.info Schema.params_help(workflow, params, json_schema, command)
     exit 0
 }
 
+////////////////////////////////////////////////////
+/* --         VALIDATE PARAMETERS              -- */
+////////////////////////////////////////////////////+
+def unexpectedParams = []
+if (params.validate_params) {
+    unexpectedParams = Schema.validateParameters(params, json_schema, log)
+}
 ////////////////////////////////////////////////////
 /* --         PRINT PARAMETER SUMMARY          -- */
 ////////////////////////////////////////////////////
@@ -39,7 +46,6 @@ Checks.aws_batch(workflow, params)
 // Check the hostnames against configured profiles
 Checks.hostname(workflow, params, log)
 
-
 /////////////////////////////
 /* -- RUN MAIN WORKFLOW -- */
 /////////////////////////////
@@ -49,6 +55,13 @@ workflow {
     CAGESEQ ()
 }
 
+workflow.onError {
+    // Print unexpected parameters
+    for (p in unexpectedParams) {
+        log.warn "Unexpected parameter: ${p}"
+    }
+}
+
 /////////////////////////////
 /* -- THE END -- */
-/////////////////////////////
+/////////////////////////////
diff --git a/nextflow.config b/nextflow.config
@@ -10,10 +10,11 @@ params {
 
   // Workflow flags
   genome = false
-  input = 'data/*R1.fastq.gz'
+  input = false
   outdir = './results'
   publish_dir_mode = 'copy'
-
+  validate_params = true
+
   // CAGE-seq specific options
   // Alignment options
   aligner = 'star'
@@ -48,7 +49,7 @@ params {
   ribo_database_manifest = "$baseDir/assets/rrna-db-defaults.txt"
 
   //Output options
-  bigwig=false
+  bigwig = false
 
   //Clustering options
   min_cluster = 30

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -1,5 +1,5 @@
 {
-    "$schema": "https://json-schema.org/draft-07/schema",
+    "$schema": "http://json-schema.org/draft-07/schema",
     "$id": "https://raw.githubusercontent.com/nf-core/cageseq/master/nextflow_schema.json",
     "title": "nf-core/cageseq pipeline parameters",
     "description": "CAGE-seq pipeline",
@@ -307,6 +307,11 @@
                         "move"
                     ]
                 },
+                "validate_params": {
+                    "type": "boolean",
+                    "description": "Boolean whether to validate parameters against the schema at runtime",
+                    "default": true
+                },
                 "name": {
                     "type": "string",
                     "description": "Workflow name.",
@@ -334,6 +339,7 @@
                     "description": "File size limit when attaching MultiQC reports to summary emails.",
                     "default": "25.MB",
                     "fa_icon": "fas fa-file-upload",
+                    "pattern": "\\d+\\.?\\s*[KMGT]?B?",
                     "hidden": true,
                     "help_text": "If file generated by pipeline exceeds the threshold, it will not be attached."
                 },
@@ -392,6 +398,7 @@
                     "description": "Maximum amount of memory that can be requested for any single job.",
                     "default": "128.GB",
                     "fa_icon": "fas fa-memory",
+                    "pattern": "\\d+\\.?\\s*[GT]?B?",
                     "hidden": true,
                     "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`"
                 },
@@ -401,6 +408,7 @@
                     "default": "240.h",
                     "fa_icon": "far fa-clock",
                     "hidden": true,
+                    "pattern": "\\d+\\.?\\s*[mhd]",
                     "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
                 }
             }