Skip to content

Commit d4b9696

Browse files
authored
Merge pull request #7585 from hugolefeuvre/add-module-multiqc
Add sylph-tax and checkm2 module for multiQC
2 parents a3e4ec6 + 5c3dee7 commit d4b9696

9 files changed

Lines changed: 134 additions & 11 deletions

File tree

tools/multiqc/checkm2_plugin.xml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<macros>
2+
<token name="@CHECKM2_COMMAND@"><![CDATA[
3+
#set $pattern = "Name\tCompleteness\tContamination\tCompleteness_Model_Used\tTranslation_Table_Used"
4+
@LN_FILES@
5+
]]></token>
6+
<xml name="checkm2_form">
7+
<param name="input" type="data" format="tabular" multiple="true" label="Output of CheckM2" help="It should be the quality report from CheckM2"/>
8+
</xml>
9+
<xml name="checkm2_test">
10+
<test expect_num_outputs="3">
11+
<repeat name="results">
12+
<conditional name="software_cond">
13+
<param name="software" value="checkm2"/>
14+
<param name="input" value="checkm2.tsv"/>
15+
</conditional>
16+
</repeat>
17+
<param name="title" value="Title of the report"/>
18+
<param name="comment" value="Commment for the report"/>
19+
<param name="flat" value="true"/>
20+
<param name="export" value="true"/>
21+
<output name="html_report">
22+
<assert_contents>
23+
<has_text text="Title of the report"/>
24+
<has_text text="Commment for the report"/>
25+
<has_text text="CheckM2"/>
26+
</assert_contents>
27+
</output>
28+
<output_collection name="plots" type="list" count="0"/>
29+
</test>
30+
<!-- This test use a Checkm2 TSV file with wrong required headers to check error handling.
31+
Some checkm2 column names are parameter-dependent and the checkm2 multiQC module rely on a specific column pattern.
32+
An issue has been opened : https://github.com/MultiQC/MultiQC/issues/3466
33+
There is a stdio rule in macro that need to be removed once the issue is fixed -->
34+
<test expect_failure="true" expect_exit_code="1">
35+
<repeat name="results">
36+
<conditional name="software_cond">
37+
<param name="software" value="checkm2"/>
38+
<param name="input" value="checkm2-wrong.tsv"/>
39+
</conditional>
40+
</repeat>
41+
<param name="title" value="Title of the report"/>
42+
<param name="comment" value="Commment for the report"/>
43+
<param name="flat" value="true"/>
44+
<param name="export" value="true"/>
45+
</test>
46+
</xml>
47+
</macros>

tools/multiqc/fastp_plugin.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
<has_text text="fastp-after_filtering_q30_rate"/>
3939
<has_text text="bwa-mem-fastq1_fq"/>
4040
<has_n_lines n="3"/>
41-
<has_n_columns n="8"/>
41+
<has_n_columns n="10"/>
4242
</assert_contents>
4343
</output>
4444
<output_collection name="plots" type="list" count="15"/>

tools/multiqc/macros.xml

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<macros>
2-
<token name="@TOOL_VERSION@">1.27</token>
3-
<token name="@VERSION_SUFFIX@">4</token>
2+
<token name="@TOOL_VERSION@">1.33</token>
3+
<token name="@VERSION_SUFFIX@">0</token>
44
<xml name="bio_tools">
55
<xrefs>
66
<xref type="bio.tools">multiqc</xref>
@@ -11,6 +11,16 @@
1111
<requirement type="package" version="@TOOL_VERSION@">multiqc</requirement>
1212
</requirements>
1313
</xml>
14+
<!--This stdio rule is temporary because of an issue with CheckM2 report columns, it needs to be removed when the issue is fixed -->
15+
<xml name="stdio">
16+
<stdio>
17+
<regex
18+
match="Module 'checkm2: .*not found in the file.*"
19+
source="both"
20+
level="fatal"
21+
description="Invalid CheckM2 TSV file: required header not found" />
22+
</stdio>
23+
</xml>
1424
<xml name="citations">
1525
<citations>
1626
<citation type="doi">10.1101/gr.244293.118</citation>
@@ -184,11 +194,11 @@ sp:
184194
<has_text text="slamdunk-retained"/>
185195
<has_text text="C2"/>
186196
<has_n_lines n="11"/>
187-
<has_n_columns n="22"/>
197+
<has_n_columns n="24"/>
188198
</assert_contents>
189199
</output>
190200
<output_collection name="plots" type="list" count="29"/>
191-
<output_collection name="png_plot" type="list" count="38"/>
201+
<output_collection name="png_plot" type="list" count="36"/>
192202
</test>
193203
<!--Test 02-->
194204
<test expect_num_outputs="2">
@@ -482,7 +492,7 @@ sp:
482492
<has_text text="qualimap"/>
483493
<has_text text="samblaster_duplicates"/>
484494
<has_text text="quast-stats"/>
485-
<has_text text="samtools-flagstat-dp"/>
495+
<has_text text="samtools-flagstat-pct"/>
486496
<has_text text="snpeff"/>
487497
</assert_contents>
488498
</output>
@@ -507,8 +517,8 @@ sp:
507517
<has_text text="snpeff_csv"/>
508518
<has_text text="bamtools-mapped_reads_pct"/>
509519
<has_text text="bamtools-duplicates_pct"/>
510-
<has_n_lines n="22"/>
511-
<has_n_columns n="50"/>
520+
<has_n_lines n="23"/>
521+
<has_n_columns n="55"/>
512522
</assert_contents>
513523
</output>
514524
</test>
@@ -528,7 +538,7 @@ sp:
528538
</repeat>
529539
<output name="html_report" ftype="html">
530540
<assert_contents>
531-
<has_size value="4766687" delta="500"/>
541+
<has_size value="2142211" delta="500"/>
532542
</assert_contents>
533543
</output>
534544
<!--output name="stats" ftype="tabular">
@@ -576,7 +586,7 @@ sp:
576586
</assert_contents>
577587
</element>
578588
</output_collection>
579-
<output_collection name="png_plot" type="list" count="11"/>
589+
<output_collection name="png_plot" type="list" count="9"/>
580590

581591
</test>
582592
<!--Test 06-->

tools/multiqc/multiqc.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,15 @@
2121
<import>salmon_plugin.xml</import>
2222
<import>samtools_plugin.xml</import>
2323
<import>star_plugin.xml</import>
24+
<import>sylphtax_plugin.xml</import>
2425
<import>trimmomatic_plugin.xml</import>
2526
<import>vcftools_plugin.xml</import>
2627
<import>kraken_plugin.xml</import>
2728
<import>diamond_plugin.xml</import>
2829
<import>bakta_plugin.xml</import>
2930
<import>freyja_plugin.xml</import>
3031
<import>checkm_plugin.xml</import>
32+
<import>checkm2_plugin.xml</import>
3133
<import>pairtools_plugin.xml</import>
3234
<import>porechop_plugin.xml</import>
3335
<import>snippy_plugin.xml</import>
@@ -41,6 +43,7 @@
4143
</macros>
4244
<expand macro="bio_tools"/>
4345
<expand macro="requirements"/>
46+
<expand macro="stdio"/>
4447
<version_command>multiqc --version</version_command>
4548
<command detect_errors="aggressive">
4649
<![CDATA[
@@ -148,6 +151,8 @@ mkdir multiqc_WDir &&
148151
@LN_FILES@
149152
#else if str($repeat.software_cond.software) == "star":
150153
@STAR_COMMAND@
154+
#elif str($repeat.software_cond.software) == "sylphtax"
155+
@SYLPHTAX_COMMAND@
151156
#elif str($repeat.software_cond.software) == "tophat"
152157
#for $file in $repeat.software_cond.input
153158
@ESCAPE_IDENTIFIER@
@@ -168,6 +173,8 @@ mkdir multiqc_WDir &&
168173
@FREYJA_COMMAND@
169174
#elif str($repeat.software_cond.software) == "checkm"
170175
@CHECKM_COMMAND@
176+
#elif str($repeat.software_cond.software) == "checkm2"
177+
@CHECKM2_COMMAND@
171178
#elif str($repeat.software_cond.software) == "pairtools"
172179
@PAIRTOOLS_COMMAND@
173180
#elif str($repeat.software_cond.software) == "porechop"
@@ -297,6 +304,7 @@ cp ./report_data/*plot*.txt ./plots/ | true ## don't fail if no plot files are g
297304
<option value="snpeff">SnpEff</option>
298305
<option value="sortmerna">SortMeRNA</option>
299306
<option value="star">STAR</option>
307+
<option value="sylphtax">Sylph-tax</option>
300308
<!--<option value="supernova">Supernova</option>-->
301309
<!--<option value="theta2">THeTA2</option>-->
302310
<option value="tophat">TopHat2 (TopHat2 is deprecated you should not use it)</option>
@@ -307,6 +315,7 @@ cp ./report_data/*plot*.txt ./plots/ | true ## don't fail if no plot files are g
307315
<option value="bakta">Bakta</option>
308316
<option value="freyja">Freyja</option>
309317
<option value="checkm">CheckM</option>
318+
<option value="checkm2">CheckM2</option>
310319
<option value="pairtools">pairtools</option>
311320
<option value="porechop">Porechop</option>
312321
<option value="snippy">Snippy</option>
@@ -406,6 +415,9 @@ cp ./report_data/*plot*.txt ./plots/ | true ## don't fail if no plot files are g
406415
<when value="star">
407416
<expand macro="star_form"/>
408417
</when>
418+
<when value="sylphtax">
419+
<expand macro="sylphtax_form"/>
420+
</when>
409421
<when value="tophat">
410422
<param name="input" type="data" format="txt,tabular,tsv,csv" multiple="true" label="Output of TopHat2"/>
411423
</when>
@@ -430,6 +442,9 @@ cp ./report_data/*plot*.txt ./plots/ | true ## don't fail if no plot files are g
430442
<when value="checkm">
431443
<expand macro="checkm_form"/>
432444
</when>
445+
<when value="checkm2">
446+
<expand macro="checkm2_form"/>
447+
</when>
433448
<when value="pairtools">
434449
<expand macro="pairtools_form"/>
435450
</when>
@@ -553,6 +568,8 @@ cp ./report_data/*plot*.txt ./plots/ | true ## don't fail if no plot files are g
553568
<expand macro="nonpareil_test"/>
554569
<expand macro="gtdbtk_test"/>
555570
<expand macro="sambamba_test"/>
571+
<expand macro="checkm2_test"/>
572+
<expand macro="sylphtax_test"/>
556573
<!--expand macro="vcftools_test"/> Does not work, did it ever worked? -->
557574

558575
</tests>

tools/multiqc/samtools_plugin.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
<assert_contents>
7979
<has_text text="Title of the report"/>
8080
<has_text text="Commment for the report"/>
81-
<has_text text="samtools-flagstat-dp"/>
81+
<has_text text="samtools-flagstat-pct"/>
8282
</assert_contents>
8383
</output>
8484
<output name="stats">

tools/multiqc/sylphtax_plugin.xml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<macros>
2+
<token name="@SYLPHTAX_COMMAND@"><![CDATA[
3+
#for $file in $repeat.software_cond.input
4+
@ESCAPE_IDENTIFIER@
5+
#set file_path = os.path.join($software_dir, str($identifier) + '.sylphmpa')
6+
ln -s '$file' '$file_path' &&
7+
#end for
8+
]]></token>
9+
<xml name="sylphtax_form">
10+
<param name="input" type="data" format="tabular" multiple="true" label="Output of Sylph-tax" help="It should be the sylphmpa report from Sylph-tax"/>
11+
</xml>
12+
<xml name="sylphtax_test">
13+
<test expect_num_outputs="3">
14+
<repeat name="results">
15+
<conditional name="software_cond">
16+
<param name="software" value="sylphtax"/>
17+
<param name="input" value="sylphtax.sylphmpa"/>
18+
</conditional>
19+
</repeat>
20+
<param name="title" value="Title of the report"/>
21+
<param name="comment" value="Commment for the report"/>
22+
<param name="flat" value="true"/>
23+
<param name="export" value="true"/>
24+
<output name="html_report">
25+
<assert_contents>
26+
<has_text text="Title of the report"/>
27+
<has_text text="Commment for the report"/>
28+
<has_text text="Sylph-tax"/>
29+
</assert_contents>
30+
</output>
31+
<output_collection name="plots" type="list" count="8"/>
32+
</test>
33+
</xml>
34+
</macros>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Name Completeness_General Contamination Completeness_Specific Completeness_Model_Used Additional_Notes
2+
test1.faa 26.36 0.0 15.9 Neural Network (Specific Model) None
3+
test2.faa 96.06 0.05 99.44 Neural Network (Specific Model) None
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Name Completeness Contamination Completeness_Model_Used Translation_Table_Used Coding_Density Contig_N50 Average_Gene_Length Genome_Size GC_Content Total_Coding_Sequences Total_Contigs Max_Contig_Length Additional_Notes
2+
HSMA33OT_bin_430.fasta 80.96 5.95 Neural Network (Specific Model) 11 0.896 3599 275.0511202958451 4224255 0.41 4597 1478 27196 None
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#SampleID raw_reads_metag_test.fastq Taxonomies_used:['OceanDNA']
2+
clade_name relative_abundance sequence_abundance ANI (if strain-level) Coverage (if strain-level)
3+
d__Bacteria 100.0 100.0 NA NA
4+
d__Bacteria|p__Proteobacteria 100.0 100.0 NA NA
5+
d__Bacteria|p__Proteobacteria|c__Gammaproteobacteria 100.0 100.0 NA NA
6+
d__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales 100.0 100.0 NA NA
7+
d__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae 100.0 100.0 NA NA
8+
d__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Escherichia 100.0 100.0 NA NA
9+
d__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia flexneri 100.0 100.0 NA NA
10+
d__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia flexneri|t__OceanDNA-b35151.fa.gz 100.0 100.0 98.0 0.042

0 commit comments

Comments
 (0)