-
Notifications
You must be signed in to change notification settings - Fork 1k
Added a SAMTOOLS_PIPELINE to run multiple samtools commands at once #4571
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
36 commits
Select commit
Hold shift + click to select a range
2ad8e73
Added a SAMTOOLS_PIPELINE to run multiple samtools commands at once
muffato 9c3ec9d
Just call it sormadup
muffato a2f9f58
Better text
muffato 12085a4
Added a stub
muffato 56f81e8
Introduced helper variables
muffato c8332a5
Directly modify the main variable
muffato a85140a
More commands (and syntaxes)
muffato 41d9fba
Expanded support
muffato d409456
Made .command.sh more legible
muffato f33f441
Documentation update
muffato 6d7722d
bugfix: reheader cannot use multiple CPUs
muffato c102c93
Updated the tests and added one that uses all possible commands
muffato 915e5af
Can't use TMPDIR as it's set to a non-writable location in Singularity
muffato 4f10688
Need to update the checksum to include the new -T option that is in t…
muffato a8859c7
Documentation update
muffato 09d8694
Use collect() + switch + join()
mahesh-panchal e646055
off-by-one bugfix
mahesh-panchal 1dd6933
These need to be the input file
muffato 64fc726
Use the boolean here too
muffato c925779
Updated the test suite
muffato 620ed0e
Documentation update
muffato 9f82a3a
bugfix: Groovy was complaining that "commands" is defined multiple times
muffato d9d3351
bugfix: operator precedence means we need brackets
muffato e4b3e9e
The exclamation mark doesn't work
muffato 22f27cc
Should default to the same format as the input
muffato 11fea2c
Added a test for CRAM
muffato e977015
Added a test for the conversion from BAM to CRAM
muffato a3cb483
Added support for output the BAM/CRAM index too
muffato 33ac78e
Added an option to support the input faidx file
muffato 5aa27cd
typo
muffato f6008fe
Updated MD5
muffato 4318da1
Removed the test with the fasta file because it leads to CRAM files w…
muffato bdc3cfc
Updated MD5
muffato 8c41765
Fixed the stub
muffato bcc9429
Updated MD5
muffato 7f8d203
Removed the bam2cram test as it makes different files on Conda
muffato File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| name: samtools_pipeline | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| - defaults | ||
| dependencies: | ||
| - bioconda::samtools=1.18 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,106 @@ | ||
| process SAMTOOLS_PIPELINE { | ||
| tag "$meta.id" | ||
| label 'process_medium' | ||
|
|
||
| conda "${moduleDir}/environment.yml" | ||
| container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
| 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1': | ||
| 'biocontainers/samtools:1.18--h50ea8bc_1' }" | ||
|
|
||
| input: | ||
| tuple val(meta), path(input) | ||
| tuple val(meta2), path(fasta), path(fai) | ||
| val commands | ||
|
|
||
| output: | ||
| tuple val(meta), path("*.{bam,cram,sam}"), emit: output | ||
| tuple val(meta), path("*.{bai,csi,crai}"), emit: index, optional: true | ||
| path "versions.yml", emit: versions | ||
|
|
||
| when: | ||
| task.ext.when == null || task.ext.when | ||
|
|
||
| script: | ||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||
|
|
||
| // Check that we are asked to run more than 1 command | ||
| def cmd_size = commands.size() | ||
| assert cmd_size > 1 | ||
|
|
||
| def last_args = task.ext."args$cmd_size" ?: '' | ||
| def extension = last_args.contains("--output-fmt sam") ? "sam" : | ||
| last_args.contains("--output-fmt bam") ? "bam" : | ||
| last_args.contains("--output-fmt cram") ? "cram" : | ||
| input.extension | ||
| assert "$input" != "${prefix}.${extension}" : "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" | ||
|
|
||
| // Compose pipe | ||
| def cmds = commands.indexed().collect { index, cmd -> | ||
| def first = index == 0 | ||
| def last = index == cmd_size-1 | ||
| def command = [ | ||
| "samtools $cmd", | ||
| task.ext."args${first ? '' : index+1}" ?: '' | ||
| ] | ||
| // First the common options | ||
| if (cmd != "reheader") { | ||
| // "reheader" is the only command not to offer these | ||
| command << "-@ $task.cpus" | ||
| command << (fasta && last ? "--reference ${fasta}" : '') | ||
| command << (!last ? '-u' : '') | ||
| } | ||
| // Then the input/ouput parameters, which differ between commands | ||
| switch(cmd){ | ||
| case "collate": | ||
| // [-o OUTPUT|-O] [INPUT|-] | ||
| command << (last ? "-o ${prefix}.${extension}" : "-O") | ||
| command << (first ? input : '-') | ||
| break | ||
| case ["addreplacerg", "sort", "view"]: | ||
| // [-o OUTPUT] [INPUT|-] | ||
| command << (last ? "-o ${prefix}.${extension}" : "") | ||
| command << (first ? input : '-') | ||
| break | ||
| case "reheader": | ||
| // [INPUT|-] | ||
| command << (first ? input : '-') | ||
| break | ||
| case ["fixmate", "markdup"]: | ||
| // [INPUT|-] [OUTPUT|-] | ||
| command << (first ? input : '-') | ||
| command << (last ? "${prefix}.${extension}" : "-") | ||
| break | ||
| default: | ||
| assert false: "$cmd is not supported" | ||
| } | ||
| command.join(' ') | ||
| } | ||
|
|
||
| """ | ||
| ${cmds.join(' | ')} | ||
|
|
||
| cat <<-END_VERSIONS > versions.yml | ||
| "${task.process}": | ||
| samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') | ||
| END_VERSIONS | ||
| """ | ||
|
|
||
| stub: | ||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||
| def cmd_size = commands.size() | ||
| def last_args = task.ext."args$cmd_size" ?: '' | ||
| def extension = last_args.contains("--output-fmt sam") ? "sam" : | ||
| last_args.contains("--output-fmt bam") ? "bam" : | ||
| last_args.contains("--output-fmt cram") ? "cram" : | ||
| input.extension | ||
| assert "$input" != "${prefix}.${extension}" : "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" | ||
|
|
||
| """ | ||
| touch ${prefix}.${extension} | ||
|
|
||
| cat <<-END_VERSIONS > versions.yml | ||
| "${task.process}": | ||
| samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') | ||
| END_VERSIONS | ||
| """ | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| name: "samtools_pipeline" | ||
| description: custom bash pipeline made only of samtools commands | ||
| keywords: | ||
| - pipeline | ||
| - bam | ||
| - sam | ||
| - cram | ||
| tools: | ||
| - "samtools": | ||
| description: "Tools for dealing with SAM, BAM and CRAM files" | ||
| homepage: "http://www.htslib.org" | ||
| documentation: "https://www.htslib.org/doc/samtools.html" | ||
| tool_dev_url: "https://github.com/samtools/samtools" | ||
| doi: "10.1093/bioinformatics/btp352" | ||
| licence: ["MIT"] | ||
| input: | ||
| - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - input: | ||
| type: file | ||
| description: BAM/CRAM/SAM file | ||
| pattern: "*.{bam,cram,sam}" | ||
| - meta2: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing reference information | ||
| e.g. [ id:'test' ] | ||
| - fasta: | ||
| type: file | ||
| description: FASTA reference file | ||
| pattern: "*.{fasta,fa}" | ||
| - fai: | ||
| type: file | ||
| description: Index of the reference file for the CRAM | ||
| pattern: "*.fai" | ||
| - commands: | ||
| type: list | ||
| description: | | ||
| List of the samtools command names to run (in order). Currently | ||
| supported: addreplacerg, collate, fixmate, markdup, reheader, sort, | ||
| view | ||
| output: | ||
| - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test', single_end:false ] | ||
| - output: | ||
| type: file | ||
| description: Output BAM/CRAM/SAM file (defaults to the same extension as the input) | ||
| pattern: "*.{bam,cram,sam}" | ||
| - index: | ||
| type: file | ||
| description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) | ||
| pattern: "*.{.bai,.csi,.crai}" | ||
| - versions: | ||
| type: file | ||
| description: File containing software versions | ||
| pattern: "versions.yml" | ||
| authors: | ||
| - "@muffato" | ||
| maintainers: | ||
| - "@muffato" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| #!/usr/bin/env nextflow | ||
|
|
||
| nextflow.enable.dsl = 2 | ||
|
|
||
| include { SAMTOOLS_PIPELINE as SAMTOOLS_PIPELINE_SORMADUP } from '../../../../../modules/nf-core/samtools/pipeline/main.nf' | ||
| include { SAMTOOLS_PIPELINE as SAMTOOLS_PIPELINE_COLLFIXMSORT } from '../../../../../modules/nf-core/samtools/pipeline/main.nf' | ||
| include { SAMTOOLS_PIPELINE as SAMTOOLS_PIPELINE_COLLFIXM } from '../../../../../modules/nf-core/samtools/pipeline/main.nf' | ||
| include { SAMTOOLS_PIPELINE as SAMTOOLS_PIPELINE_ALL } from '../../../../../modules/nf-core/samtools/pipeline/main.nf' | ||
|
|
||
| workflow test_samtools_pipeline_sormadup { | ||
|
|
||
| input = [ | ||
| [ id:'test', single_end:false ], // meta map | ||
| file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) | ||
| ] | ||
| commands = ['collate', 'fixmate', 'sort', 'markdup'] | ||
| SAMTOOLS_PIPELINE_SORMADUP ( input, [[],[],[]], commands ) | ||
| } | ||
|
|
||
| workflow test_samtools_pipeline_collate_fixmate_sort { | ||
|
|
||
| input = [ | ||
| [ id:'test', single_end:false ], // meta map | ||
| file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) | ||
| ] | ||
| commands = ['collate', 'fixmate', 'sort'] | ||
| SAMTOOLS_PIPELINE_COLLFIXMSORT ( input, [[],[],[]], commands ) | ||
| } | ||
|
|
||
| workflow test_samtools_pipeline_collate_fixmate { | ||
|
|
||
| input = [ | ||
| [ id:'test', single_end:false ], // meta map | ||
| file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) | ||
| ] | ||
| commands = ['collate', 'fixmate'] | ||
| SAMTOOLS_PIPELINE_COLLFIXM ( input, [[],[],[]], commands ) | ||
| } | ||
|
|
||
| workflow test_samtools_pipeline_all { | ||
|
|
||
| input = [ | ||
| [ id:'test', single_end:false ], // meta map | ||
| file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) | ||
| ] | ||
| commands = ['collate', 'addreplacerg', 'fixmate', 'reheader', 'sort', 'markdup', 'view'] | ||
| SAMTOOLS_PIPELINE_ALL ( input, [[],[],[]], commands ) | ||
| } | ||
|
|
||
| workflow test_samtools_pipeline_collate_fixmate_cram { | ||
|
|
||
| input = [ | ||
| [ id:'test', single_end:false ], // meta map | ||
| file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) | ||
| ] | ||
| commands = ['collate', 'fixmate'] | ||
| SAMTOOLS_PIPELINE_COLLFIXM ( input, [[],[],[]], commands ) | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| process { | ||
|
|
||
| publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } | ||
|
|
||
| // NOTE: collate needs to creata temporary files. CI sets TMPDIR to ~, | ||
| // which is not mounted any more in Singularity since Nextflow 23.10. We | ||
| // add "-T ." so that the temporary files are created in a writable | ||
| // location. | ||
|
|
||
| withName: SAMTOOLS_PIPELINE_SORMADUP { | ||
| ext.args = '-T .' // collate | ||
| ext.args2 = '-m' // fixmate | ||
| } | ||
|
|
||
| withName: SAMTOOLS_PIPELINE_COLLFIXMSORT { | ||
| ext.args = '-T .' // collate | ||
| ext.args2 = '-m' // fixmate | ||
| ext.args3 = '--write-index' // sort | ||
| } | ||
|
|
||
| withName: SAMTOOLS_PIPELINE_COLLFIXM { | ||
| ext.args = '-T .' // collate | ||
| ext.args2 = '-m' // fixmate | ||
| } | ||
|
|
||
| withName: SAMTOOLS_PIPELINE_ALL { | ||
| ext.args = '-T .' // collate | ||
| ext.args2 = '-r ID:FOO' // addreplacerg | ||
| ext.args3 = '-m' // fixmate | ||
| ext.args4 = '-c "sed s/FOO/BAR/"' // reheader | ||
| ext.args7 = '-F 0x08' // view | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| - name: samtools pipeline test_samtools_pipeline_sormadup | ||
| command: nextflow run ./tests/modules/nf-core/samtools/pipeline -entry test_samtools_pipeline_sormadup -c ./tests/config/nextflow.config | ||
| tags: | ||
| - samtools | ||
| - samtools/pipeline | ||
| files: | ||
| - path: output/samtools/test.bam | ||
| md5sum: de5d6a0d54d580b71c946c13b58c5d66 | ||
| - path: output/samtools/versions.yml | ||
|
|
||
| - name: samtools pipeline test_samtools_pipeline_collate_fixmate_sort | ||
| command: nextflow run ./tests/modules/nf-core/samtools/pipeline -entry test_samtools_pipeline_collate_fixmate_sort -c ./tests/config/nextflow.config | ||
| tags: | ||
| - samtools | ||
| - samtools/pipeline | ||
| files: | ||
| - path: output/samtools/test.bam | ||
| md5sum: 4ba760209400274dee8d67961e66a28b | ||
| - path: output/samtools/test.bam.csi | ||
| md5sum: b9378d1e55b1dfecc5610a61cfd80724 | ||
| - path: output/samtools/versions.yml | ||
|
|
||
| - name: samtools pipeline test_samtools_pipeline_collate_fixmate | ||
| command: nextflow run ./tests/modules/nf-core/samtools/pipeline -entry test_samtools_pipeline_collate_fixmate -c ./tests/config/nextflow.config | ||
| tags: | ||
| - samtools | ||
| - samtools/pipeline | ||
| files: | ||
| - path: output/samtools/test.bam | ||
| md5sum: e574ac78aef3617888697733a01de8e3 | ||
| - path: output/samtools/versions.yml | ||
|
|
||
| - name: samtools pipeline test_samtools_pipeline_all | ||
| command: nextflow run ./tests/modules/nf-core/samtools/pipeline -entry test_samtools_pipeline_all -c ./tests/config/nextflow.config | ||
| tags: | ||
| - samtools | ||
| - samtools/pipeline | ||
| files: | ||
| - path: output/samtools/test.bam | ||
| md5sum: 9815772ce3b23468858f1e76a6123dca | ||
| - path: output/samtools/versions.yml | ||
| - name: samtools pipeline test_samtools_pipeline_collate_fixmate_cram | ||
| command: nextflow run ./tests/modules/nf-core/samtools/pipeline -entry test_samtools_pipeline_collate_fixmate_cram -c ./tests/config/nextflow.config | ||
| tags: | ||
| - samtools | ||
| - samtools/pipeline | ||
| files: | ||
| - path: output/samtools/test.cram | ||
| md5sum: b71fc2e9b7c04e32b13e6071f955434d | ||
| - path: output/samtools/versions.yml |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.