Skip to content

Commit e11b956

Browse files
Update eggnogmapper (#10274)
* checkpoint * main updated, topics syntax, sarscov2 - proteome - diamond test updated * stub test added, meta yml description updated * meta updated * update meta.yml * snapshot updated for conda matching * re-update snapshot * fixed db dir logic and added mmseqs test * no_Search test added * updated meta.yml descriptions and ontologies * Update modules/nf-core/eggnogmapper/main.nf Co-authored-by: Martín Beracochea <mbc@ebi.ac.uk> * spaces pushing --------- Co-authored-by: Martín Beracochea <mbc@ebi.ac.uk>
1 parent 62011b9 commit e11b956

5 files changed

Lines changed: 309 additions & 216 deletions

File tree

modules/nf-core/eggnogmapper/environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ channels:
44
- conda-forge
55
- bioconda
66
dependencies:
7-
- bioconda::eggnog-mapper=2.1.12
7+
- bioconda::eggnog-mapper=2.1.13

modules/nf-core/eggnogmapper/main.nf

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,64 +4,55 @@ process EGGNOGMAPPER {
44

55
conda "${moduleDir}/environment.yml"
66
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
7-
'https://depot.galaxyproject.org/singularity/eggnog-mapper:2.1.12--pyhdfd78af_0':
8-
'biocontainers/eggnog-mapper:2.1.12--pyhdfd78af_0' }"
7+
'https://depot.galaxyproject.org/singularity/eggnog-mapper:2.1.13--pyhdfd78af_2':
8+
'biocontainers/eggnog-mapper:2.1.13--pyhdfd78af_2' }"
99

1010
input:
1111
tuple val(meta), path(fasta)
12-
path(eggnog_db)
12+
tuple val(search_mode), path(db)
1313
path(eggnog_data_dir)
14-
tuple val(meta2), path(eggnog_diamond_db)
1514

1615
output:
1716
tuple val(meta), path("*.emapper.annotations") , emit: annotations
18-
tuple val(meta), path("*.emapper.seed_orthologs"), emit: orthologs
19-
tuple val(meta), path("*.emapper.hits") , emit: hits
20-
path "versions.yml" , emit: versions
17+
tuple val(meta), path("*.emapper.seed_orthologs"), emit: orthologs, optional: true
18+
tuple val(meta), path("*.emapper.hits") , emit: hits , optional: true
19+
tuple val("${task.process}"), val('eggnog-mapper'), eval("emapper.py --version 2>&1 | grep -o 'emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+' | sed 's/emapper-//'"), topic: versions, emit: versions_eggnogmapper
2120

2221
when:
2322
task.ext.when == null || task.ext.when
2423

2524
script:
26-
def args = task.ext.args ?: ''
27-
def prefix = task.ext.prefix ?: "${meta.id}"
28-
def is_compressed = fasta.extension == '.gz' ? true : false
29-
def fasta_name = is_compressed ? fasta.baseName : "$fasta"
30-
def dbmem = task.memory.toMega() > 40000 ? '--dbmem' : ''
31-
def database_arg = eggnog_db ? "--database $eggnog_db" : ''
32-
def dmnd_db_arg = eggnog_diamond_db ? "--dmnd_db $eggnog_diamond_db" : ''
25+
def args = task.ext.args ?: ''
26+
def prefix = task.ext.prefix ?: "${meta.id}"
27+
def is_compressed = fasta.extension == '.gz'
28+
def fasta_name = is_compressed ? fasta.baseName : "$fasta"
29+
def db_flags = ['diamond': '--dmnd_db', 'novel_fams': '--dmnd_db', 'mmseqs': '--mmseqs_db', 'hmmer': '--database', 'no_search': '--annotate_hits_table', 'cache': '--cache']
30+
def db_path = (db instanceof Path && db.isDirectory()) ? "${db}/${db.name}" : "$db"
31+
def db_arg = db && db_flags[search_mode] ? "${db_flags[search_mode]} ${db_path}" : ''
32+
def dbmem = task.memory.toMega() > 40000 ? '--dbmem' : ''
3333
"""
3434
if [ "$is_compressed" == "true" ]; then
3535
gzip -c -d $fasta > $fasta_name
3636
fi
3737
3838
emapper.py \\
39+
$args \\
3940
--cpu ${task.cpus} \\
4041
-i ${fasta_name} \\
4142
--data_dir ${eggnog_data_dir} \\
42-
-m diamond \\
43-
$dmnd_db_arg \\
44-
$database_arg \\
45-
--output ${prefix} \\
43+
-m ${search_mode} \\
44+
$db_arg \\
4645
${dbmem} \\
47-
$args
48-
49-
cat <<-END_VERSIONS > versions.yml
50-
"${task.process}":
51-
eggnog-mapper: \$(echo \$(emapper.py --version) | grep -o "emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+" | sed "s/emapper-//")
52-
END_VERSIONS
46+
--output ${prefix}
5347
"""
5448

5549
stub:
56-
def prefix = task.ext.prefix ?: "${meta.id}"
50+
def prefix = task.ext.prefix ?: "${meta.id}"
5751
"""
52+
echo $args
53+
5854
touch ${prefix}.emapper.annotations
5955
touch ${prefix}.emapper.seed_orthologs
6056
touch ${prefix}.emapper.hits
61-
62-
cat <<-END_VERSIONS > versions.yml
63-
"${task.process}":
64-
eggnog-mapper: \$(echo \$(emapper.py --version) | grep -o "emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+" | sed "s/emapper-//")
65-
END_VERSIONS
6657
"""
6758
}

modules/nf-core/eggnogmapper/meta.yml

Lines changed: 83 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,134 @@
1-
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
21
name: "eggnogmapper"
32
description: Fast genome-wide functional annotation through orthology assignment.
43
keywords:
54
- annotation
5+
- functional annotation
66
- orthology
77
- genomics
8+
- eggnog
89
tools:
910
- "eggnogmapper":
10-
description: "Fast genome-wide functional annotation through orthology assignment."
11+
description: |
12+
EggNOG-mapper is a tool for fast functional annotation of novel sequences.
13+
It uses precomputed orthologous groups and phylogenies from the eggNOG database
14+
to transfer functional information from fine-grained orthologs only.
1115
homepage: "https://github.com/eggnogdb/eggnog-mapper"
1216
documentation: "https://github.com/eggnogdb/eggnog-mapper/wiki"
1317
tool_dev_url: "https://github.com/eggnogdb/eggnog-mapper"
1418
doi: "10.1093/molbev/msab293"
15-
licence: ["AGPL v3"]
16-
identifier: ""
19+
licence:
20+
- "AGPL v3"
21+
identifier: "biotools:eggnog-mapper-v2"
1722
input:
1823
- - meta:
1924
type: map
2025
description: |
2126
Groovy Map containing sample information
22-
e.g. `[ id:'test', single_end:false ]`
27+
e.g. `[ id:'sample1' ]`
2328
- fasta:
2429
type: file
25-
description: Database of sequences in FASTA format
26-
pattern: "*.{fasta,fa,fasta.gz,fa.gz}"
27-
ontologies: []
28-
- eggnog_db:
29-
type: file
30-
description: The eggnog database file (e.g. eggnog-mapper/data/eggnog.db)
31-
pattern: "*.db"
32-
ontologies: []
30+
description: Input sequences in FASTA format (plain or gzip-compressed)
31+
pattern: "*.{fasta,faa,fa}(.gz)?"
32+
ontologies:
33+
- edam: http://edamontology.org/format_1929 # FASTA
34+
- - search_mode:
35+
type: string
36+
description: |
37+
Search mode passed to emapper.py via -m. Determines which backend
38+
is used and which database flag is applied internally.
39+
Supported modes:
40+
- diamond: DIAMOND-based homology search (--dmnd_db)
41+
- novel_fams: DIAMOND search against novel families (--dmnd_db)
42+
- mmseqs: MMseqs2-based search (--mmseqs_db)
43+
- hmmer: HMMER-based search (--database)
44+
- no_search: Skip search step and annotate from a precomputed
45+
*.emapper.seed_orthologs file (--annotate_hits_table)
46+
- cache: Reuse a previously generated hits table (--cache)
47+
enum: [diamond, novel_fams, mmseqs, hmmer, no_search, cache]
48+
- db:
49+
type: file
50+
description: |
51+
Database file, directory, or precomputed results file required by the
52+
selected search_mode. The module automatically assigns the correct
53+
flag depending on search_mode:
54+
- diamond / novel_fams: DIAMOND database (*.dmnd)
55+
- mmseqs: MMseqs2 database directory or prefix
56+
- hmmer: HMM database (*.hmm, *.h3m)
57+
- no_search: Precomputed *.emapper.seed_orthologs file
58+
- cache: Previously generated *.emapper.hits file
59+
This input is mandatory but its expected format depends on search_mode.
60+
pattern: "*"
61+
ontologies:
62+
- edam: http://edamontology.org/format_1370 # HMMER format
63+
- edam: http://edamontology.org/format_3475 # TSV
3364
- eggnog_data_dir:
3465
type: directory
35-
description: Directory containing eggnog database files (e.g. eggnog-mapper/data)
66+
description: |
67+
Directory containing eggnog-mapper database files
68+
(e.g. can be downloaded via download_eggnog_data.py,
69+
found in the eggnog-mapper repository)
3670
pattern: "*"
37-
- - meta2:
38-
type: map
39-
description: |
40-
Groovy Map containing database information
41-
e.g. `[ id:'test' ]`
42-
- eggnog_diamond_db:
43-
type: file
44-
description: The eggnog Diamond protein database file (e.g. eggnog-mapper/data/eggnog_proteins.dmnd)
45-
pattern: "*.dmnd"
46-
ontologies: []
4771
output:
4872
annotations:
4973
- - meta:
5074
type: map
5175
description: |
5276
Groovy Map containing sample information
53-
e.g. `[ id:'test', single_end:false ]`
77+
e.g. `[ id:'sample1' ]`
5478
- "*.emapper.annotations":
5579
type: file
56-
description: TSV with the results from the annotation phase
80+
description: TSV file with the results from the annotation phase, including
81+
functional annotations, GO terms, KEGG pathways, and COG categories
5782
pattern: "*.emapper.annotations"
58-
ontologies: []
83+
ontologies:
84+
- edam: http://edamontology.org/format_3475 # TSV
5985
orthologs:
6086
- - meta:
6187
type: map
6288
description: |
6389
Groovy Map containing sample information
64-
e.g. `[ id:'test', single_end:false ]`
90+
e.g. `[ id:'sample1' ]`
6591
- "*.emapper.seed_orthologs":
6692
type: file
67-
description: TSV with the results from parsing the hits, linking queries with
68-
seed orthologs (with commented metadata)
93+
description: TSV file with the results from parsing the hits, linking queries
94+
with their best seed orthologs (includes commented metadata header)
6995
pattern: "*.emapper.seed_orthologs"
70-
ontologies: []
96+
ontologies:
97+
- edam: http://edamontology.org/format_3475 # TSV
7198
hits:
7299
- - meta:
73100
type: map
74101
description: |
75102
Groovy Map containing sample information
76-
e.g. `[ id:'test', single_end:false ]`
103+
e.g. `[ id:'sample1' ]`
77104
- "*.emapper.hits":
78105
type: file
79-
description: TSV with the results from the Diamond search phase
106+
description: TSV file with the raw search hits from the Diamond/MMseqs2/HMMER
107+
search phase before annotation
80108
pattern: "*.emapper.hits"
81-
ontologies: []
109+
ontologies:
110+
- edam: http://edamontology.org/format_3475 # TSV
111+
versions_eggnogmapper:
112+
- - ${task.process}:
113+
type: string
114+
description: The name of the process
115+
- eggnog-mapper:
116+
type: string
117+
description: The name of the tool
118+
- emapper.py --version 2>&1 | grep -o 'emapper-[0-9]\+\.[0-9]\+\.[0-9]\+' | sed 's/emapper-//':
119+
type: eval
120+
description: The expression to obtain the version of the tool
121+
topics:
82122
versions:
83-
- versions.yml:
84-
type: file
85-
description: File containing software versions
86-
pattern: "versions.yml"
87-
ontologies:
88-
- edam: http://edamontology.org/format_3750 # YAML
123+
- - ${task.process}:
124+
type: string
125+
description: The name of the process
126+
- eggnog-mapper:
127+
type: string
128+
description: The name of the tool
129+
- emapper.py --version 2>&1 | grep -o 'emapper-[0-9]\+\.[0-9]\+\.[0-9]\+' | sed 's/emapper-//':
130+
type: eval
131+
description: The expression to obtain the version of the tool
89132
authors:
90133
- "@vagkaratzas"
91134
maintainers:

0 commit comments

Comments
 (0)