Skip to content

Commit 833f2c3

Browse files
authored
Merge pull request #1 from abretaud/bz2
Add fastq(*).bz2 datatypes and converters
2 parents 304c05f + 99a037d commit 833f2c3

16 files changed

Lines changed: 163 additions & 14 deletions

config/datatypes_conf.xml.sample

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,21 @@
9999
<datatype extension="fastqillumina.gz" type="galaxy.datatypes.sequence:FastqIlluminaGz" display_in_upload="true">
100100
<converter file="fastqilluminagz_to_fastqillumina.xml" target_datatype="fastqillumina"/>
101101
</datatype>
102+
<datatype extension="fastq.bz2" type="galaxy.datatypes.sequence:FastqBz2" display_in_upload="true">
103+
<converter file="fastqbz2_to_fastq.xml" target_datatype="fastq"/>
104+
</datatype>
105+
<datatype extension="fastqsanger.bz2" type="galaxy.datatypes.sequence:FastqSangerBz2" display_in_upload="true">
106+
<converter file="fastqsangerbz2_to_fastqsanger.xml" target_datatype="fastqsanger"/>
107+
</datatype>
108+
<datatype extension="fastqsolexa.bz2" type="galaxy.datatypes.sequence:FastqSolexaBz2" display_in_upload="true">
109+
<converter file="fastqsolexabz2_to_fastqsolexa.xml" target_datatype="fastqsolexa"/>
110+
</datatype>
111+
<datatype extension="fastqcssanger.bz2" type="galaxy.datatypes.sequence:FastqCSSangerBz2" display_in_upload="true">
112+
<converter file="fastqcssangerbz2_to_fastqcssanger.xml" target_datatype="fastqcssanger"/>
113+
</datatype>
114+
<datatype extension="fastqillumina.bz2" type="galaxy.datatypes.sequence:FastqIlluminaBz2" display_in_upload="true">
115+
<converter file="fastqilluminabz2_to_fastqillumina.xml" target_datatype="fastqillumina"/>
116+
</datatype>
102117
<datatype extension="fqtoc" type="galaxy.datatypes.sequence:SequenceSplitLocations" display_in_upload="true"/>
103118
<datatype extension="eland" type="galaxy.datatypes.tabular:Eland" display_in_upload="true"/>
104119
<datatype extension="elandmulti" type="galaxy.datatypes.tabular:ElandMulti" display_in_upload="true"/>
@@ -638,6 +653,7 @@
638653
<sniffer type="galaxy.datatypes.sequence:Fasta"/>
639654
<sniffer type="galaxy.datatypes.sequence:Fastq"/>
640655
<sniffer type="galaxy.datatypes.sequence:FastqGz"/>
656+
<sniffer type="galaxy.datatypes.sequence:FastqBz2"/>
641657
<sniffer type="galaxy.datatypes.interval:Wiggle"/>
642658
<sniffer type="galaxy.datatypes.text:Html"/>
643659
<sniffer type="galaxy.datatypes.images:Pdf"/>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<tool id="CONVERTER_fastqbz2_to_fastq" name="Convert fastq.bz2 files to fastq" version="1.0.0" hidden="true">
2+
<command>bzip2 -dcf '$input1' > '$output1'</command>
3+
<inputs>
4+
<param format="fastq.bz2" name="input1" type="data" label="Choose FASTQ file"/>
5+
</inputs>
6+
<outputs>
7+
<data format="fastq" name="output1"/>
8+
</outputs>
9+
<help>
10+
</help>
11+
</tool>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<tool id="CONVERTER_fastqcssangerbz2_to_fastqcssanger" name="Convert fastqcssanger.bz2 files to fastqcssanger" version="1.0.0" hidden="true">
2+
<command>bzip2 -dcf '$input1' > '$output1'</command>
3+
<inputs>
4+
<param format="fastqcssanger.bz2" name="input1" type="data" label="Choose FASTQ file"/>
5+
</inputs>
6+
<outputs>
7+
<data format="fastqcssanger" name="output1"/>
8+
</outputs>
9+
<help>
10+
</help>
11+
</tool>

lib/galaxy/datatypes/converters/fastqcssangergz_to_fastqcssanger.xml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
<tool id="CONVERTER_fastqcssangergz_to_fastqcssanger" name="Convert fastqcssanger.gz files to fastqcssanger" version="1.0.0" hidden="true">
22
<command>gzip -dcf '$input1' > '$output1'</command>
33
<inputs>
4-
<page>
5-
<param format="fastqcssanger.gz" name="input1" type="data" label="Choose FASTQ file"/>
6-
</page>
7-
</inputs>
4+
<param format="fastqcssanger.gz" name="input1" type="data" label="Choose FASTQ file"/>
5+
</inputs>
86
<outputs>
97
<data format="fastqcssanger" name="output1"/>
108
</outputs>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<tool id="CONVERTER_fastqilluminabz2_to_fastqillumina" name="Convert fastqillumina.bz2 files to fastqillumina" version="1.0.0" hidden="true">
2+
<command>bzip2 -dcf '$input1' > '$output1'</command>
3+
<inputs>
4+
<param format="fastqillumina.bz2" name="input1" type="data" label="Choose FASTQ file"/>
5+
</inputs>
6+
<outputs>
7+
<data format="fastqillumina" name="output1"/>
8+
</outputs>
9+
<help>
10+
</help>
11+
</tool>

lib/galaxy/datatypes/converters/fastqilluminagz_to_fastqillumina.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<command>gzip -dcf '$input1' > '$output1'</command>
33
<inputs>
44
<param format="fastqillumina.gz" name="input1" type="data" label="Choose FASTQ file"/>
5-
</inputs>
5+
</inputs>
66
<outputs>
77
<data format="fastqillumina" name="output1"/>
88
</outputs>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<tool id="CONVERTER_fastqsangerbz2_to_fastqsanger" name="Convert fastqsanger.bz2 files to fastqsanger" version="1.0.0" hidden="true">
2+
<command>bzip2 -dcf '$input1' > '$output1'</command>
3+
<inputs>
4+
<param format="fastqsanger.bz2" name="input1" type="data" label="Choose FASTQ file"/>
5+
</inputs>
6+
<outputs>
7+
<data format="fastqsanger" name="output1"/>
8+
</outputs>
9+
<help>
10+
</help>
11+
</tool>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<tool id="CONVERTER_fastqsolexabz2_to_fastqsolexa" name="Convert fastqsolexa.bz2 files to fastqsolexa" version="1.0.0" hidden="true">
2+
<command>bzip2 -dcf '$input1' > '$output1'</command>
3+
<inputs>
4+
<param format="fastqsolexa.bz2" name="input1" type="data" label="Choose FASTQ file"/>
5+
</inputs>
6+
<outputs>
7+
<data format="fastqsolexa" name="output1"/>
8+
</outputs>
9+
<help>
10+
</help>
11+
</tool>

lib/galaxy/datatypes/converters/fastqsolexagz_to_fastqsolexa.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<command>gzip -dcf '$input1' > '$output1'</command>
33
<inputs>
44
<param format="fastqsolexa.gz" name="input1" type="data" label="Choose FASTQ file"/>
5-
</inputs>
5+
</inputs>
66
<outputs>
77
<data format="fastqsolexa" name="output1"/>
88
</outputs>

lib/galaxy/datatypes/sequence.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import gzip
6+
import bz2
67
import json
78
import logging
89
import os
@@ -572,10 +573,13 @@ def set_meta( self, dataset, **kwd ):
572573
data_lines = 0
573574
sequences = 0
574575
seq_counter = 0 # blocks should be 4 lines long
575-
compressed = is_gzip(dataset.file_name)
576+
compressed_gzip = is_gzip(dataset.file_name)
577+
compressed_bzip2 = is_bz2(dataset.file_name)
576578
try:
577-
if compressed:
579+
if compressed_gzip:
578580
in_file = gzip.GzipFile(dataset.file_name)
581+
elif compressed_bzip2:
582+
in_file = bz2.BZ2File(dataset.file_name)
579583
else:
580584
in_file = open(dataset.file_name)
581585
for line in in_file:
@@ -615,7 +619,7 @@ def sniff( self, filename ):
615619
>>> Fastq().sniff( fname )
616620
True
617621
"""
618-
compressed = is_gzip(filename)
622+
compressed = is_gzip(filename) or is_bz2(filename)
619623
if compressed and not isinstance(self, Binary):
620624
return False
621625
headers = get_headers( filename, None )
@@ -743,6 +747,40 @@ class FastqCSSangerGz( FastqGz ):
743747
Binary.register_sniffable_binary_format("fastqcssanger.gz", "fastqcssanger.gz", FastqCSSangerGz)
744748

745749

750+
class FastqBz2 ( BaseFastq, Binary ):
751+
"""Class representing a generic compressed FASTQ sequence"""
752+
edam_format = "format_1930"
753+
file_ext = "fastq.gz"
754+
Binary.register_sniffable_binary_format("fastq.gz", "fastq.gz", FastqGz)
755+
756+
757+
class FastqSangerBz2( FastqBz2 ):
758+
"""Class representing a compressed FASTQ sequence ( the Sanger variant )"""
759+
edam_format = "format_1932"
760+
file_ext = "fastqsanger.bz2"
761+
Binary.register_sniffable_binary_format("fastqsanger.bz2", "fastqsanger.bz2", FastqSangerBz2)
762+
763+
764+
class FastqSolexaBz2( FastqBz2 ):
765+
"""Class representing a compressed FASTQ sequence ( the Solexa variant )"""
766+
edam_format = "format_1933"
767+
file_ext = "fastqsolexa.bz2"
768+
Binary.register_sniffable_binary_format("fastqsolexa.bz2", "fastqsolexa.bz2", FastqSolexaBz2)
769+
770+
771+
class FastqIlluminaBz2( FastqBz2 ):
772+
"""Class representing a compressed FASTQ sequence ( the Illumina 1.3+ variant )"""
773+
edam_format = "format_1931"
774+
file_ext = "fastqillumina.bz2"
775+
Binary.register_sniffable_binary_format("fastqillumina.bz2", "fastqillumina.bz2", FastqIlluminaBz2)
776+
777+
778+
class FastqCSSangerBz2( FastqBz2 ):
779+
"""Class representing a Color Space compressed FASTQ sequence ( e.g a SOLiD variant )"""
780+
file_ext = "fastqcssanger.bz2"
781+
Binary.register_sniffable_binary_format("fastqcssanger.bz2", "fastqcssanger.bz2", FastqCSSangerBz2)
782+
783+
746784
class Maf( Alignment ):
747785
"""Class describing a Maf alignment"""
748786
edam_format = "format_3008"

0 commit comments

Comments
 (0)