Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
694afec
Adding in the first bits on that one
apeltzer Feb 5, 2021
5f31576
Check conda channels
apeltzer Feb 5, 2021
20f7d57
Fixed CI, this should work alreadyd well
apeltzer Feb 5, 2021
a041256
No more help checks, this should be fine
apeltzer Feb 7, 2021
d0a60df
Adding in enable_conda for automatic conda channel chceking :wq!
apeltzer Feb 7, 2021
fc48d9b
Proper DOI
apeltzer Feb 7, 2021
3cd8e9c
Hopefully helps with summary_parsm
apeltzer Feb 7, 2021
f464b62
Add TODO
apeltzer Feb 7, 2021
c18a049
Fixed all issues - please check
apeltzer Feb 8, 2021
6d237ed
Add extra deps required for validation of schema
apeltzer Feb 11, 2021
4c784e8
Fix issue in schema + main
apeltzer Feb 11, 2021
a2ac8eb
Forgot a piece of code
apeltzer Feb 12, 2021
c49cf6d
Switching over to newest new schema lib
apeltzer Feb 15, 2021
cf80f35
Adjusted to latest version in tools
apeltzer Feb 18, 2021
045734a
Fix for log_colours
apeltzer Feb 18, 2021
f73d74f
Adjust checks to only contain generic stuff
apeltzer Feb 25, 2021
b845ba6
Add in headers
apeltzer Feb 25, 2021
83b1e5d
Add in headers
apeltzer Feb 25, 2021
b19640c
Replace main.nf header equals to dashes to make merge conflicts easier
jfy133 Feb 25, 2021
eec51b8
Fix nextflow config and schema inconsistencies
jfy133 Feb 25, 2021
f9fd270
Readd previously deleted stuff
jfy133 Feb 25, 2021
1461dfd
Add previously removed CI test
jfy133 Feb 25, 2021
fa8e153
Guess we're good now
apeltzer Mar 12, 2021
f1c6c5b
Merge branch 'add-fancy-help-json-for-james' of https://github.com/nf…
apeltzer Mar 12, 2021
00f5782
Should be fine now
apeltzer Mar 12, 2021
06ef70d
Drop that trimmed too
apeltzer Mar 12, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,6 @@ jobs:
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- name: HELPTEXT Run with the help flag
run: |
nextflow run ${GITHUB_WORKSPACE} --help
- name: Get test data for cases where we don't use TSV input
run: |
git clone --single-branch --branch eager https://github.com/nf-core/test-datasets.git data
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### `Added`

- [#676](https://github.com/nf-core/eager/issues/676) - Added Lib Checks and automatic help message / summary message formatting

### `Fixed`

- [#666](https://github.com/nf-core/eager/issues/666) - Fixed input file staging for `print_nuclear_contamination`
Expand Down
213 changes: 213 additions & 0 deletions lib/Checks.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
import org.yaml.snakeyaml.Yaml

/*
* This file holds several functions used to perform standard checks for the nf-core pipeline template.
*/

class Checks {

static void check_conda_channels(log) {
Yaml parser = new Yaml()
def channels = []
try {
def config = parser.load("conda config --show channels".execute().text)
channels = config.channels
} catch(NullPointerException | IOException e) {
log.warn "Could not verify conda channel configuration."
return
}

// Check that all channels are present
def required_channels = ['conda-forge', 'bioconda', 'defaults']
def conda_check_failed = !required_channels.every { ch -> ch in channels }

// Check that they are in the right order
conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda'))
conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults'))

if (conda_check_failed) {
log.warn "=============================================================================\n" +
" There is a problem with your Conda configuration!\n\n" +
" You will need to set-up the conda-forge and bioconda channels correctly.\n" +
" Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" +
" NB: The order of the channels matters!\n" +
"==================================================================================="
}
}

static void aws_batch(workflow, params) {
if (workflow.profile.contains('awsbatch')) {
assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
// Check outdir paths to be S3 buckets if running on AWSBatch
// related: https://github.com/nextflow-io/nextflow/issues/813
assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!"
// Prevent trace files to be stored on S3 since S3 does not support rolling files.
assert !params.tracedir.startsWith('s3:') : "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles."
}
}

static void hostname(workflow, params, log) {
Map colors = Headers.log_colours(params.monochrome_logs)
if (params.hostnames) {
def hostname = "hostname".execute().text.trim()
params.hostnames.each { prof, hnames ->
hnames.each { hname ->
if (hostname.contains(hname) && !workflow.profile.contains(prof)) {
log.info "=${colors.yellow}====================================================${colors.reset}=\n" +
"${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" +
" but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" +
" ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" +
"=${colors.yellow}====================================================${colors.reset}="
}
}
}
}
}

// Citation string
private static String citation(workflow) {
return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
"* The pipeline\n" +
" https://doi.org/10.1101/2020.06.11.145615\n\n" +
"* The nf-core framework\n" +
" https://dx.doi.org/10.1038/s41587-020-0439-x\n" +
" https://rdcu.be/b1GjZ\n\n" +
"* Software dependencies\n" +
" https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
}

// Exit pipeline if incorrect --genome key provided
static void genome_exists(params, log) {
if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
Comment thread
apeltzer marked this conversation as resolved.
Outdated
log.error "=============================================================================\n" +
" Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
" Currently, the available genome keys are:\n" +
" ${params.genomes.keySet().join(", ")}\n" +
"============================================================================="
System.exit(0)
}
}

// Get attribute from genome config file e.g. fasta
static String get_genome_attribute(params, attribute) {
def val = ''
if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
if (params.genomes[ params.genome ].containsKey(attribute)) {
val = params.genomes[ params.genome ][ attribute ]
}
}
return val
}

// Print a warning after SRA download has completed
static void sra_download(log) {
log.warn "=============================================================================\n" +
" THIS IS AN EXPERIMENTAL FEATURE!\n\n" +
" Please double-check the samplesheet that has been auto-created using the\n" +
" public database ids provided via the '--public_data_ids' parameter.\n\n" +
" Public databases don't reliably hold information such as experimental group,\n" +
" replicate identifiers or strandedness information.\n\n" +
" All of the sample metadata obtained from the ENA has been appended\n" +
" as additional columns to help you manually curate the samplesheet before\n" +
" you run the pipeline.\n" +
"==================================================================================="
}

// Print a warning if using GRCh38 assembly from igenomes.config
static void ncbi_genome_warn(log) {
log.warn "=============================================================================\n" +
" When using '--genome GRCh38' the assembly is from the NCBI and NOT Ensembl.\n" +
" Auto-activating '--skip_biotype_qc' parameter to circumvent the issue below:\n" +
" https://github.com/nf-core/rnaseq/issues/460.\n\n" +
" If you would like to use the soft-masked Ensembl assembly instead please see:\n" +
" https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" +
"==================================================================================="
}

// Print a warning if using a UCSC assembly from igenomes.config
static void ucsc_genome_warn(log) {
log.warn "=============================================================================\n" +
" When using UCSC assemblies the 'gene_biotype' field is absent from the GTF file.\n" +
" Auto-activating '--skip_biotype_qc' parameter to circumvent the issue below:\n" +
" https://github.com/nf-core/rnaseq/issues/460.\n\n" +
" If you would like to use the soft-masked Ensembl assembly instead please see:\n" +
" https://github.com/nf-core/rnaseq/issues/159#issuecomment-501184312\n" +
"==================================================================================="
}

// Print a warning if both GTF and GFF have been provided
static void gtf_gff_warn(log) {
log.warn "=============================================================================\n" +
" Both '--gtf' and '--gff' parameters have been provided.\n" +
" Using GTF file as priority.\n" +
"==================================================================================="
}

// Print a warning if --skip_alignment has been provided
static void skip_alignment_warn(log) {
log.warn "=============================================================================\n" +
" '--skip_alignment' parameter has been provided.\n" +
" Skipping alignment, quantification and all downstream QC processes.\n" +
"==================================================================================="
}

// Print a warning if using '--aligner star_rsem' and '--with_umi'
static void rsem_umi_error(log) {
log.error "=============================================================================\n" +
" When using '--aligner star_rsem', STAR is run by RSEM itself and so it is\n" +
" not possible to remove UMIs before the quantification.\n\n" +
" If you would like to remove UMI barcodes using the '--with_umi' option\n" +
" please use either '--aligner star_salmon' or '--aligner hisat2'.\n" +
"============================================================================="
System.exit(0)
}
Comment thread
apeltzer marked this conversation as resolved.
Outdated

// Function that parses and returns the alignment rate from the STAR log output
static ArrayList get_star_percent_mapped(workflow, params, log, align_log) {
def percent_aligned = 0
def pattern = /Uniquely mapped reads %\s*\|\s*([\d\.]+)%/
align_log.eachLine { line ->
def matcher = line =~ pattern
if (matcher) {
percent_aligned = matcher[0][1].toFloat()
}
}

def pass = false
def logname = align_log.getBaseName() - '.Log.final'
Map colors = Headers.log_colours(params.monochrome_logs)
if (percent_aligned <= params.min_mapped_reads.toFloat()) {
log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} [FAIL] STAR ${params.min_mapped_reads}% mapped threshold. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS: ${percent_aligned}% - $logname${colors.reset}."
} else {
pass = true
log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} [PASS] STAR ${params.min_mapped_reads}% mapped threshold: ${percent_aligned}% - $logname${colors.reset}."
}
return [ percent_aligned, pass ]
}

// Function that parses and returns the predicted strandedness from the RSeQC infer_experiment.py output
static ArrayList get_inferexperiment_strandedness(inferexperiment_file, cutoff=30) {
def sense = 0
def antisense = 0
def undetermined = 0
inferexperiment_file.eachLine { line ->
def undetermined_matcher = line =~ /Fraction of reads failed to determine:\s([\d\.]+)/
def se_sense_matcher = line =~ /Fraction of reads explained by "\++,--":\s([\d\.]+)/
def se_antisense_matcher = line =~ /Fraction of reads explained by "\+-,-\+":\s([\d\.]+)/
def pe_sense_matcher = line =~ /Fraction of reads explained by "1\++,1--,2\+-,2-\+":\s([\d\.]+)/
def pe_antisense_matcher = line =~ /Fraction of reads explained by "1\+-,1-\+,2\+\+,2--":\s([\d\.]+)/
if (undetermined_matcher) undetermined = undetermined_matcher[0][1].toFloat() * 100
if (se_sense_matcher) sense = se_sense_matcher[0][1].toFloat() * 100
if (se_antisense_matcher) antisense = se_antisense_matcher[0][1].toFloat() * 100
if (pe_sense_matcher) sense = pe_sense_matcher[0][1].toFloat() * 100
if (pe_antisense_matcher) antisense = pe_antisense_matcher[0][1].toFloat() * 100
}
def strandedness = 'unstranded'
if (sense >= 100-cutoff) {
strandedness = 'forward'
} else if (antisense >= 100-cutoff) {
strandedness = 'reverse'
}
return [ strandedness, sense, antisense, undetermined ]
}
}
129 changes: 129 additions & 0 deletions lib/Completion.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Functions to be run on completion of pipeline
*/

class Completion {
static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) {

// Set up the e-mail variables
def subject = "[$workflow.manifest.name] Successful: $workflow.runName"

if (!workflow.success) {
subject = "[$workflow.manifest.name] FAILED: $workflow.runName"
}

def summary = [:]
for (group in summary_params.keySet()) {
summary << summary_params[group]
}

def misc_fields = [:]
misc_fields['Date Started'] = workflow.start
misc_fields['Date Completed'] = workflow.complete
misc_fields['Pipeline script file path'] = workflow.scriptFile
misc_fields['Pipeline script hash ID'] = workflow.scriptId
if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository
if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId
if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision
misc_fields['Nextflow Version'] = workflow.nextflow.version
misc_fields['Nextflow Build'] = workflow.nextflow.build
misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp

def email_fields = [:]
email_fields['version'] = workflow.manifest.version
email_fields['runName'] = workflow.runName
email_fields['success'] = workflow.success
email_fields['dateComplete'] = workflow.complete
email_fields['duration'] = workflow.duration
email_fields['exitStatus'] = workflow.exitStatus
email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
email_fields['errorReport'] = (workflow.errorReport ?: 'None')
email_fields['commandLine'] = workflow.commandLine
email_fields['projectDir'] = workflow.projectDir
email_fields['summary'] = summary << misc_fields

// On success try attach the multiqc report
def mqc_report = null
try {
if (workflow.success) {
mqc_report = multiqc_report.getVal()
if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) {
if (mqc_report.size() > 1) {
log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one"
}
mqc_report = mqc_report[0]
}
}
} catch (all) {
log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email"
}

// Check if we are only sending emails on failure
def email_address = params.email
if (!params.email && params.email_on_fail && !workflow.success) {
email_address = params.email_on_fail
}

// Render the TXT template
def engine = new groovy.text.GStringTemplateEngine()
def tf = new File("$projectDir/assets/email_template.txt")
def txt_template = engine.createTemplate(tf).make(email_fields)
def email_txt = txt_template.toString()

// Render the HTML template
def hf = new File("$projectDir/assets/email_template.html")
def html_template = engine.createTemplate(hf).make(email_fields)
def email_html = html_template.toString()

// Render the sendmail template
def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit
def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()]
def sf = new File("$projectDir/assets/sendmail_template.txt")
def sendmail_template = engine.createTemplate(sf).make(smail_fields)
def sendmail_html = sendmail_template.toString()

// Send the HTML e-mail
Map colors = Headers.log_colours(params.monochrome_logs)
if (email_address) {
try {
if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
// Try to send HTML e-mail using sendmail
[ 'sendmail', '-t' ].execute() << sendmail_html
log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
} catch (all) {
// Catch failures and try with plaintext
def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
mail_cmd += [ '-A', mqc_report ]
}
mail_cmd.execute() << email_html
log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-"
}
}

// Write summary e-mail HTML to a file
def output_d = new File("${params.outdir}/pipeline_info/")
if (!output_d.exists()) {
output_d.mkdirs()
}
def output_hf = new File(output_d, "pipeline_report.html")
output_hf.withWriter { w -> w << email_html }
def output_tf = new File(output_d, "pipeline_report.txt")
output_tf.withWriter { w -> w << email_txt }
}

static void summary(workflow, params, log, fail_percent_mapped=[:], pass_percent_mapped=[:]) {
Map colors = Headers.log_colours(params.monochrome_logs)

if (workflow.success) {
if (workflow.stats.ignoredCount == 0) {
log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
} else {
log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
}
} else {
Checks.hostname(workflow, params, log)
log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
}
}
}
Loading