Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
2ad8e73
Added a SAMTOOLS_PIPELINE to run multiple samtools commands at once
muffato Dec 11, 2023
9c3ec9d
Just call it sormadup
muffato Dec 11, 2023
a2f9f58
Better text
muffato Dec 11, 2023
12085a4
Added a stub
muffato Dec 11, 2023
56f81e8
Introduced helper variables
muffato Dec 11, 2023
c8332a5
Directly modify the main variable
muffato Dec 11, 2023
a85140a
More commands (and syntaxes)
muffato Dec 11, 2023
41d9fba
Expanded support
muffato Dec 11, 2023
d409456
Made .command.sh more legible
muffato Dec 11, 2023
f33f441
Documentation update
muffato Dec 11, 2023
6d7722d
bugfix: reheader cannot use multiple CPUs
muffato Dec 11, 2023
c102c93
Updated the tests and added one that uses all possible commands
muffato Dec 11, 2023
915e5af
Can't use TMPDIR as it's set to a non-writable location in Singularity
muffato Dec 12, 2023
4f10688
Need to update the checksum to include the new -T option that is in t…
muffato Dec 12, 2023
a8859c7
Documentation update
muffato Dec 12, 2023
09d8694
Use collect() + switch + join()
mahesh-panchal Dec 13, 2023
e646055
off-by-one bugfix
mahesh-panchal Dec 15, 2023
1dd6933
These need to be the input file
muffato Dec 15, 2023
64fc726
Use the boolean here too
muffato Dec 15, 2023
c925779
Updated the test suite
muffato Dec 15, 2023
620ed0e
Documentation update
muffato Dec 15, 2023
9f82a3a
bugfix: Groovy was complaining that "commands" is defined multiple times
muffato Dec 15, 2023
d9d3351
bugfix: operator precedence means we need brackets
muffato Dec 15, 2023
e4b3e9e
The exclamation mark doesn't work
muffato Dec 15, 2023
22f27cc
Should default to the same format as the input
muffato Dec 16, 2023
11fea2c
Added a test for CRAM
muffato Dec 15, 2023
e977015
Added a test for the conversion from BAM to CRAM
muffato Dec 16, 2023
a3cb483
Added support for output the BAM/CRAM index too
muffato Dec 16, 2023
33ac78e
Added an option to support the input faidx file
muffato Dec 16, 2023
5aa27cd
typo
muffato Dec 16, 2023
f6008fe
Updated MD5
muffato Dec 16, 2023
4318da1
Removed the test with the fasta file because it leads to CRAM files w…
muffato Dec 16, 2023
bdc3cfc
Updated MD5
muffato Dec 16, 2023
8c41765
Fixed the stub
muffato Dec 16, 2023
bcc9429
Updated MD5
muffato Dec 16, 2023
7f8d203
Removed the bam2cram test as it makes different files on Conda
muffato Dec 16, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/samtools/pipeline/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: samtools_pipeline
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::samtools=1.18
106 changes: 106 additions & 0 deletions modules/nf-core/samtools/pipeline/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
process SAMTOOLS_PIPELINE {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1':
'biocontainers/samtools:1.18--h50ea8bc_1' }"

input:
tuple val(meta), path(input)
tuple val(meta2), path(fasta), path(fai)
val commands

output:
tuple val(meta), path("*.{bam,cram,sam}"), emit: output
tuple val(meta), path("*.{bai,csi,crai}"), emit: index, optional: true
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: "${meta.id}"

// Check that we are asked to run more than 1 command
def cmd_size = commands.size()
assert cmd_size > 1

def last_args = task.ext."args$cmd_size" ?: ''
def extension = last_args.contains("--output-fmt sam") ? "sam" :
last_args.contains("--output-fmt bam") ? "bam" :
last_args.contains("--output-fmt cram") ? "cram" :
input.extension
assert "$input" != "${prefix}.${extension}" : "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"

// Compose pipe
def cmds = commands.indexed().collect { index, cmd ->
def first = index == 0
def last = index == cmd_size-1
def command = [
"samtools $cmd",
task.ext."args${first ? '' : index+1}" ?: ''
]
// First the common options
if (cmd != "reheader") {
Comment thread
mahesh-panchal marked this conversation as resolved.
// "reheader" is the only command not to offer these
command << "-@ $task.cpus"
command << (fasta && last ? "--reference ${fasta}" : '')
command << (!last ? '-u' : '')
}
// Then the input/ouput parameters, which differ between commands
switch(cmd){
case "collate":
// [-o OUTPUT|-O] [INPUT|-]
command << (last ? "-o ${prefix}.${extension}" : "-O")
command << (first ? input : '-')
break
case ["addreplacerg", "sort", "view"]:
// [-o OUTPUT] [INPUT|-]
command << (last ? "-o ${prefix}.${extension}" : "")
command << (first ? input : '-')
break
case "reheader":
// [INPUT|-]
command << (first ? input : '-')
break
case ["fixmate", "markdup"]:
// [INPUT|-] [OUTPUT|-]
command << (first ? input : '-')
command << (last ? "${prefix}.${extension}" : "-")
break
default:
assert false: "$cmd is not supported"
}
command.join(' ')
}

"""
${cmds.join(' | ')}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def cmd_size = commands.size()
def last_args = task.ext."args$cmd_size" ?: ''
def extension = last_args.contains("--output-fmt sam") ? "sam" :
last_args.contains("--output-fmt bam") ? "bam" :
last_args.contains("--output-fmt cram") ? "cram" :
input.extension
assert "$input" != "${prefix}.${extension}" : "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"

"""
touch ${prefix}.${extension}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
66 changes: 66 additions & 0 deletions modules/nf-core/samtools/pipeline/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: "samtools_pipeline"
description: custom bash pipeline made only of samtools commands
keywords:
- pipeline
- bam
- sam
- cram
tools:
- "samtools":
description: "Tools for dealing with SAM, BAM and CRAM files"
homepage: "http://www.htslib.org"
documentation: "https://www.htslib.org/doc/samtools.html"
tool_dev_url: "https://github.com/samtools/samtools"
doi: "10.1093/bioinformatics/btp352"
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- meta2:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- fasta:
type: file
description: FASTA reference file
pattern: "*.{fasta,fa}"
- fai:
type: file
description: Index of the reference file for the CRAM
pattern: "*.fai"
- commands:
type: list
description: |
List of the samtools command names to run (in order). Currently
supported: addreplacerg, collate, fixmate, markdup, reheader, sort,
view
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- output:
type: file
description: Output BAM/CRAM/SAM file (defaults to the same extension as the input)
pattern: "*.{bam,cram,sam}"
- index:
type: file
description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional)
pattern: "*.{.bai,.csi,.crai}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@muffato"
maintainers:
- "@muffato"
3 changes: 3 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2132,6 +2132,9 @@ samtools/import:
samtools/markdup:
- modules/nf-core/samtools/markdup/**
- tests/modules/nf-core/samtools/markdup/**
samtools/pipeline:
- modules/nf-core/samtools/pipeline/**
- tests/modules/nf-core/samtools/pipeline/**
samtools/reheader:
- modules/nf-core/samtools/reheader/**
- tests/modules/nf-core/samtools/reheader/**
Expand Down
58 changes: 58 additions & 0 deletions tests/modules/nf-core/samtools/pipeline/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { SAMTOOLS_PIPELINE as SAMTOOLS_PIPELINE_SORMADUP } from '../../../../../modules/nf-core/samtools/pipeline/main.nf'
include { SAMTOOLS_PIPELINE as SAMTOOLS_PIPELINE_COLLFIXMSORT } from '../../../../../modules/nf-core/samtools/pipeline/main.nf'
include { SAMTOOLS_PIPELINE as SAMTOOLS_PIPELINE_COLLFIXM } from '../../../../../modules/nf-core/samtools/pipeline/main.nf'
include { SAMTOOLS_PIPELINE as SAMTOOLS_PIPELINE_ALL } from '../../../../../modules/nf-core/samtools/pipeline/main.nf'

workflow test_samtools_pipeline_sormadup {

input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
]
commands = ['collate', 'fixmate', 'sort', 'markdup']
SAMTOOLS_PIPELINE_SORMADUP ( input, [[],[],[]], commands )
}

workflow test_samtools_pipeline_collate_fixmate_sort {

input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
]
commands = ['collate', 'fixmate', 'sort']
SAMTOOLS_PIPELINE_COLLFIXMSORT ( input, [[],[],[]], commands )
}

workflow test_samtools_pipeline_collate_fixmate {

input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
]
commands = ['collate', 'fixmate']
SAMTOOLS_PIPELINE_COLLFIXM ( input, [[],[],[]], commands )
}

workflow test_samtools_pipeline_all {

input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
]
commands = ['collate', 'addreplacerg', 'fixmate', 'reheader', 'sort', 'markdup', 'view']
SAMTOOLS_PIPELINE_ALL ( input, [[],[],[]], commands )
}

workflow test_samtools_pipeline_collate_fixmate_cram {

input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true)
]
commands = ['collate', 'fixmate']
SAMTOOLS_PIPELINE_COLLFIXM ( input, [[],[],[]], commands )
}
33 changes: 33 additions & 0 deletions tests/modules/nf-core/samtools/pipeline/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

// NOTE: collate needs to creata temporary files. CI sets TMPDIR to ~,
// which is not mounted any more in Singularity since Nextflow 23.10. We
// add "-T ." so that the temporary files are created in a writable
// location.

withName: SAMTOOLS_PIPELINE_SORMADUP {
ext.args = '-T .' // collate
ext.args2 = '-m' // fixmate
}

withName: SAMTOOLS_PIPELINE_COLLFIXMSORT {
ext.args = '-T .' // collate
ext.args2 = '-m' // fixmate
ext.args3 = '--write-index' // sort
}

withName: SAMTOOLS_PIPELINE_COLLFIXM {
ext.args = '-T .' // collate
ext.args2 = '-m' // fixmate
}

withName: SAMTOOLS_PIPELINE_ALL {
ext.args = '-T .' // collate
ext.args2 = '-r ID:FOO' // addreplacerg
ext.args3 = '-m' // fixmate
ext.args4 = '-c "sed s/FOO/BAR/"' // reheader
ext.args7 = '-F 0x08' // view
}
}
50 changes: 50 additions & 0 deletions tests/modules/nf-core/samtools/pipeline/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
- name: samtools pipeline test_samtools_pipeline_sormadup
command: nextflow run ./tests/modules/nf-core/samtools/pipeline -entry test_samtools_pipeline_sormadup -c ./tests/config/nextflow.config
tags:
- samtools
- samtools/pipeline
files:
- path: output/samtools/test.bam
md5sum: de5d6a0d54d580b71c946c13b58c5d66
- path: output/samtools/versions.yml

- name: samtools pipeline test_samtools_pipeline_collate_fixmate_sort
command: nextflow run ./tests/modules/nf-core/samtools/pipeline -entry test_samtools_pipeline_collate_fixmate_sort -c ./tests/config/nextflow.config
tags:
- samtools
- samtools/pipeline
files:
- path: output/samtools/test.bam
md5sum: 4ba760209400274dee8d67961e66a28b
- path: output/samtools/test.bam.csi
md5sum: b9378d1e55b1dfecc5610a61cfd80724
- path: output/samtools/versions.yml

- name: samtools pipeline test_samtools_pipeline_collate_fixmate
command: nextflow run ./tests/modules/nf-core/samtools/pipeline -entry test_samtools_pipeline_collate_fixmate -c ./tests/config/nextflow.config
tags:
- samtools
- samtools/pipeline
files:
- path: output/samtools/test.bam
md5sum: e574ac78aef3617888697733a01de8e3
- path: output/samtools/versions.yml

- name: samtools pipeline test_samtools_pipeline_all
command: nextflow run ./tests/modules/nf-core/samtools/pipeline -entry test_samtools_pipeline_all -c ./tests/config/nextflow.config
tags:
- samtools
- samtools/pipeline
files:
- path: output/samtools/test.bam
md5sum: 9815772ce3b23468858f1e76a6123dca
- path: output/samtools/versions.yml
- name: samtools pipeline test_samtools_pipeline_collate_fixmate_cram
command: nextflow run ./tests/modules/nf-core/samtools/pipeline -entry test_samtools_pipeline_collate_fixmate_cram -c ./tests/config/nextflow.config
tags:
- samtools
- samtools/pipeline
files:
- path: output/samtools/test.cram
md5sum: b71fc2e9b7c04e32b13e6071f955434d
- path: output/samtools/versions.yml