-
Notifications
You must be signed in to change notification settings - Fork 508
Update FastQC/Falco wrappers to make output of pairs compatible with multiQC #7839
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
lldelisle
wants to merge
16
commits into
galaxyproject:main
Choose a base branch
from
lldelisle:update_fastqc
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 15 commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
5ad780e
handle differently pairs to enable to keep both identifiers in report
lldelisle e396f5e
Apply suggestion from @mvdbeek
lldelisle 70686c2
fix URL
lldelisle 7f51cdb
apply same changes to falco
lldelisle 6bb1899
run falco/fastqc on both inputs at the same time
lldelisle 24a5a49
Apply suggestions from code review
lldelisle c8b48b8
do not mention slower
lldelisle 850c0c1
do not allow bam/sam in paired mode
lldelisle 9a0fcbe
Improve selector label
lldelisle c38a1e4
improve selector label in fastqc
lldelisle 9d27e2d
use regex in from_work_dir rather than mv steps from @wm75 suggestion
lldelisle 03b426f
use regex also for summary
lldelisle 8af05ac
do symetric tests between fastqc and falco
lldelisle bd2688c
add png to zip of html
lldelisle b9e2c9a
fix the Images copy
lldelisle 5024d02
fix linting
lldelisle File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,21 @@ | ||
| <tool id="falco" name="Falco" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> | ||
| <description>An alternative, more performant implementation of FastQC for high throughput sequence quality control</description> | ||
| <macros> | ||
| <token name="@TOOL_VERSION@">1.2.4</token> | ||
| <token name="@TOOL_VERSION@">1.2.5</token> | ||
| <token name="@VERSION_SUFFIX@">0</token> | ||
| <token name="@LN_S@"> | ||
| <![CDATA[ | ||
| #if 'bam' in $input_file.ext: | ||
| #set format = 'bam' | ||
| #elif 'sam' in $input_file.ext: | ||
| #set format = 'sam' | ||
| #elif 'gz' in $input_file.ext: | ||
| #set format = 'fastq.gz' | ||
| #else | ||
| #set format = 'fastq' | ||
| #end if | ||
| ln -s '${input_file}' '${input_file_sl}' && | ||
| ]]></token> | ||
| </macros> | ||
| <xrefs> | ||
| <xref type="bio.tools">falco</xref> | ||
|
|
@@ -12,19 +25,22 @@ | |
| </requirements> | ||
| <command detect_errors="aggressive"><![CDATA[ | ||
| #import re | ||
| #set input_name_sl = re.sub('[^\w\-\s]', '_', str($input_file.element_identifier)) | ||
|
|
||
| #if 'bam' in $input_file.ext: | ||
| #set format = 'bam' | ||
| #elif 'sam' in $input_file.ext: | ||
| #set format = 'sam' | ||
| #elif 'gz' in $input_file.ext: | ||
| #set format = 'fastq.gz' | ||
| #set $input_files=[] | ||
| #if str($input_type_select.input_type) == 'individually' | ||
| #set input_file = $input_type_select.input_file | ||
| #set input_file_sl = re.sub('[^\w\-\s]', '_', str($input_file.element_identifier)) | ||
| @LN_S@ | ||
| #set $input_files += [$input_file_sl] | ||
| #else | ||
| #set format = 'fastq' | ||
| #set input_file = $input_type_select.input_col['forward'] | ||
| #set input_file_sl = re.sub('[^\w\-\s]', '_', str($input_type_select.input_col.element_identifier)) + '_forward' | ||
| @LN_S@ | ||
| #set $input_files += [$input_file_sl] | ||
| #set input_file = $input_type_select.input_col['reverse'] | ||
| #set input_file_sl = re.sub('[^\w\-\s]', '_', str($input_type_select.input_col.element_identifier)) + '_reverse' | ||
| @LN_S@ | ||
| #set $input_files += [$input_file_sl] | ||
| #end if | ||
|
|
||
| ln -s '${input_file}' '${input_name_sl}' && | ||
| falco | ||
| #if $contaminants: | ||
| --contaminants '${contaminants}' | ||
|
|
@@ -45,16 +61,31 @@ | |
| $nogroup | ||
| ## --kmers $kmers | ||
| -f '${format}' | ||
| '${input_name_sl}' | ||
| #if $subsample > 1: | ||
| -subsample $subsample | ||
| -subsample $subsample | ||
| #end if | ||
| $bisulfite | ||
| $reverse_complement | ||
| $generate_summary | ||
| #for $input_file_sl in $input_files | ||
| '${input_file_sl}' | ||
| #end for | ||
| ]]></command> | ||
| <inputs> | ||
| <param format="fastq,fastq.gz,bam,sam" name="input_file" type="data" label="Raw read data from your current history"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_type" type="select" label="How is your input data organized?"> | ||
| <option value="individually">As separate datasets (single-end fastq or sam/bam)</option> | ||
| <option value="paired">As a paired collection of forward and reverse reads</option> | ||
| </param> | ||
| <when value="individually"> | ||
| <param format="fastq,fastq.gz,bam,sam" name="input_file" type="data" | ||
| label="Raw read data from your current history" /> | ||
| </when> | ||
| <when value="paired"> | ||
| <param format="fastq,fastq.gz" name="input_col" type="data_collection" | ||
| collection_type="paired" label="Raw read data from your current history in paired collections"/> | ||
| </when> | ||
| </conditional> | ||
| <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list" help="tab delimited file with 2 columns: name and sequence. For example: Illumina Small RNA RT Primer	CAAGCAGAAGACGGCATACGA"/> | ||
| <param argument="--adapters" type="data" format="tabular" optional="true" label="Adapter list" help="List of adapters adapter sequences which will be explicity searched against the library. It should be a tab-delimited file with 2 columns: name and sequence."/> | ||
| <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file" help="a file that specifies which submodules are to be executed (default=all) and also specifies the thresholds for the each submodules warning parameter."/> | ||
|
|
@@ -67,16 +98,37 @@ | |
| <param name="generate_summary" type="boolean" truevalue="" falsevalue="-skip-summary" checked="False" label="Generate summary output of QC test results"/> | ||
| </inputs> | ||
| <outputs> | ||
| <data format="html" name="html_file" from_work_dir="fastqc_report.html" label="${tool.name} on ${on_string}: Webpage"/> | ||
| <data format="txt" name="text_file" from_work_dir="fastqc_data.txt" label="${tool.name} on ${on_string}: RawData"/> | ||
| <data format="html" name="html_file" from_work_dir="fastqc_report.html" label="${tool.name} on ${on_string}: Webpage" > | ||
| <filter>input_type_select['input_type'] == 'individually'</filter> | ||
| </data> | ||
| <data format="txt" name="text_file" from_work_dir="fastqc_data.txt" label="${tool.name} on ${on_string}: RawData" > | ||
| <filter>input_type_select['input_type'] == 'individually'</filter> | ||
| </data> | ||
| <data format="txt" name="summary_file" from_work_dir="summary.txt" label="${tool.name} on ${on_string}: SummaryData"> | ||
| <filter>generate_summary</filter> | ||
| <filter>input_type_select['input_type'] == 'individually' and generate_summary</filter> | ||
| </data> | ||
| <collection name="html_files" format="html" type="paired" label="${tool.name} on ${on_string}: Webpages"> | ||
| <data name="forward" from_work_dir="*forward_fastqc_report.html" /> | ||
| <data name="reverse" from_work_dir="*reverse_fastqc_report.html" /> | ||
| <filter>input_type_select['input_type'] == 'paired'</filter> | ||
| </collection> | ||
| <collection name="text_files" format="txt" type="paired" label="${tool.name} on ${on_string}: RawData text files"> | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe should be type="list" |
||
| <data name="forward" from_work_dir="*forward_fastqc_data.txt" /> | ||
| <data name="reverse" from_work_dir="*reverse_fastqc_data.txt" /> | ||
| <filter>input_type_select['input_type'] == 'paired'</filter> | ||
| </collection> | ||
| <collection name="summary_files" format="txt" type="paired" label="${tool.name} on ${on_string}: SummaryData text files"> | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe should be type="list" |
||
| <data name="forward" from_work_dir="*forward_summary.txt" /> | ||
| <data name="reverse" from_work_dir="*reverse_summary.txt" /> | ||
| <filter>input_type_select['input_type'] == 'paired' and generate_summary</filter> | ||
| </collection> | ||
| </outputs> | ||
| <tests> | ||
| <!-- Test with fastq input --> | ||
| <test expect_num_outputs="2"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| </conditional> | ||
| <output name="html_file" ftype="html"> | ||
| <assert_contents> | ||
| <has_line_matching expression="<html><head>.+<title> 1000trimmed_fastq - report.+"/> | ||
|
|
@@ -87,7 +139,9 @@ | |
| </test> | ||
| <!-- Test with fastq.gz input --> | ||
| <test expect_num_outputs="2"> | ||
| <param name="input_file" value="1000trimmed.fastq.gz"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="1000trimmed.fastq.gz"/> | ||
| </conditional> | ||
| <output name="html_file" ftype="html"> | ||
| <assert_contents> | ||
| <has_line_matching expression="<html><head>.+<title> 1000trimmed_fastq_gz - report.+"/> | ||
|
|
@@ -98,7 +152,9 @@ | |
| </test> | ||
| <!-- Test with BAM input --> | ||
| <test expect_num_outputs="2"> | ||
| <param name="input_file" value="hisat_output_1.bam"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="hisat_output_1.bam"/> | ||
| </conditional> | ||
| <output name="html_file" ftype="html"> | ||
| <assert_contents> | ||
| <has_line_matching expression="<html><head>.+<title> hisat_output_1_bam - report.+"/> | ||
|
|
@@ -109,7 +165,9 @@ | |
| </test> | ||
| <!-- Test summary file option --> | ||
| <test expect_num_outputs="3"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| </conditional> | ||
| <param name="generate_summary" value="true"/> | ||
| <output name="html_file" ftype="html"> | ||
| <assert_contents> | ||
|
|
@@ -120,7 +178,9 @@ | |
| <output name="summary_file" file="fastqc_data_summary.txt" ftype="txt"/> | ||
| </test> | ||
| <test expect_num_outputs="2"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| </conditional> | ||
| <param name="contaminants" value="contaminant_list.txt" ftype="tabular"/> | ||
| <output name="html_file" ftype="html"> | ||
| <assert_contents> | ||
|
|
@@ -130,7 +190,9 @@ | |
| <output name="text_file" file="fastqc_data_contaminants.txt" ftype="txt" lines_diff="2"/> | ||
| </test> | ||
| <test expect_num_outputs="2"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| </conditional> | ||
| <param name="adapters" value="adapter_list.txt" ftype="tabular"/> | ||
| <output name="html_file" ftype="html"> | ||
| <assert_contents> | ||
|
|
@@ -140,7 +202,9 @@ | |
| <output name="text_file" file="fastqc_data_adapters.txt" ftype="txt" lines_diff="2"/> | ||
| </test> | ||
| <test expect_num_outputs="3"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| </conditional> | ||
| <param name="limits" value="limits.txt" ftype="txt"/> | ||
| <param name="generate_summary" value="true"/> | ||
| <output name="html_file" ftype="html"> | ||
|
|
@@ -170,7 +234,9 @@ | |
| <output name="text_file" file="fastqc_data_min_length.txt" ftype="txt"/> | ||
| </test> --> | ||
| <test expect_num_outputs="2"> | ||
| <param name="input_file" value="1000trimmed.fastq" ftype="fastq"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="1000trimmed.fastq" ftype="fastq"/> | ||
| </conditional> | ||
| <param name="nogroup" value="--nogroup"/> | ||
| <output name="html_file" ftype="html"> | ||
| <assert_contents> | ||
|
|
@@ -180,7 +246,9 @@ | |
| <output name="text_file" file="fastqc_data_nogroup.txt" ftype="txt" lines_diff="2"/> | ||
| </test> | ||
| <test expect_num_outputs="3"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| </conditional> | ||
| <param name="subsample" value="10"/> | ||
| <param name="generate_summary" value="true"/> | ||
| <output name="html_file" ftype="html"> | ||
|
|
@@ -192,7 +260,9 @@ | |
| <output name="summary_file" file="fastqc_report_subsample_summary.txt" ftype="txt"/> | ||
| </test> | ||
| <test expect_num_outputs="3"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| </conditional> | ||
| <param name="bisulfite" value="-bisulfite"/> | ||
| <param name="generate_summary" value="true"/> | ||
| <output name="html_file" ftype="html"> | ||
|
|
@@ -204,7 +274,9 @@ | |
| <output name="summary_file" file="fastqc_report_bisulfite_summary.txt" ftype="txt"/> | ||
| </test> | ||
| <test expect_num_outputs="2"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_file" value="1000trimmed.fastq"/> | ||
| </conditional> | ||
| <param name="reverse_complement" value="-reverse-complement"/> | ||
| <output name="html_file" ftype="html"> | ||
| <assert_contents> | ||
|
|
@@ -213,6 +285,53 @@ | |
| </output> | ||
| <output name="text_file" file="fastqc_report_reverse_complement.txt" ftype="txt" lines_diff="2"/> | ||
| </test> | ||
| <!-- Test summary file option paired input --> | ||
| <test expect_num_outputs="9"> | ||
| <conditional name="input_type_select"> | ||
| <param name="input_type" value="paired"/> | ||
| <param name="input_col"> | ||
| <collection type="paired" name="1000trimmed"> | ||
| <element name="forward" value="1000trimmed.fastq" /> | ||
| <element name="reverse" value="1000trimmed.fastq" /> | ||
| </collection> | ||
| </param> | ||
| </conditional> | ||
| <param name="generate_summary" value="true"/> | ||
| <output_collection name="html_files"> | ||
| <element name="forward"> | ||
| <assert_contents> | ||
| <has_line_matching expression="<html><head>.+<title> 1000trimmed_forward - report.+"/> | ||
| </assert_contents> | ||
| </element> | ||
| <element name="reverse"> | ||
| <assert_contents> | ||
| <has_line_matching expression="<html><head>.+<title> 1000trimmed_reverse - report.+"/> | ||
| </assert_contents> | ||
| </element> | ||
| </output_collection> | ||
| <output_collection name="text_files"> | ||
| <element name="forward" value="forward_fastqc_data.txt" /> | ||
| <element name="reverse"> | ||
| <assert_contents> | ||
| <has_text text="1000trimmed_reverse" /> | ||
| </assert_contents> | ||
| </element> | ||
| </output_collection> | ||
| <output_collection name="summary_files"> | ||
| <element name="forward"> | ||
| <assert_contents> | ||
| <has_text text="1000trimmed_forward" /> | ||
| <has_n_lines n="11" /> | ||
| </assert_contents> | ||
| </element> | ||
| <element name="reverse"> | ||
| <assert_contents> | ||
| <has_text text="1000trimmed_reverse" /> | ||
| <has_n_lines n="11" /> | ||
| </assert_contents> | ||
| </element> | ||
| </output_collection> | ||
| </test> | ||
| </tests> | ||
| <help><![CDATA[ | ||
| **What it does** | ||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe should be type="list"