|
3 | 3 | """ |
4 | 4 |
|
5 | 5 | import gzip |
| 6 | +import bz2 |
6 | 7 | import json |
7 | 8 | import logging |
8 | 9 | import os |
@@ -572,10 +573,13 @@ def set_meta( self, dataset, **kwd ): |
572 | 573 | data_lines = 0 |
573 | 574 | sequences = 0 |
574 | 575 | seq_counter = 0 # blocks should be 4 lines long |
575 | | - compressed = is_gzip(dataset.file_name) |
| 576 | + compressed_gzip = is_gzip(dataset.file_name) |
| 577 | + compressed_bzip2 = is_bz2(dataset.file_name) |
576 | 578 | try: |
577 | | - if compressed: |
| 579 | + if compressed_gzip: |
578 | 580 | in_file = gzip.GzipFile(dataset.file_name) |
| 581 | + elif compressed_bzip2: |
| 582 | + in_file = bz2.BZ2File(dataset.file_name) |
579 | 583 | else: |
580 | 584 | in_file = open(dataset.file_name) |
581 | 585 | for line in in_file: |
@@ -615,7 +619,7 @@ def sniff( self, filename ): |
615 | 619 | >>> Fastq().sniff( fname ) |
616 | 620 | True |
617 | 621 | """ |
618 | | - compressed = is_gzip(filename) |
| 622 | + compressed = is_gzip(filename) or is_bz2(filename) |
619 | 623 | if compressed and not isinstance(self, Binary): |
620 | 624 | return False |
621 | 625 | headers = get_headers( filename, None ) |
@@ -743,6 +747,40 @@ class FastqCSSangerGz( FastqGz ): |
743 | 747 | Binary.register_sniffable_binary_format("fastqcssanger.gz", "fastqcssanger.gz", FastqCSSangerGz) |
744 | 748 |
|
745 | 749 |
|
| 750 | +class FastqBz2 ( BaseFastq, Binary ): |
| 751 | +"""Class representing a generic compressed FASTQ sequence""" |
| 752 | +edam_format = "format_1930" |
| 753 | +file_ext = "fastq.gz" |
| 754 | +Binary.register_sniffable_binary_format("fastq.gz", "fastq.gz", FastqGz) |
| 755 | + |
| 756 | + |
| 757 | +class FastqSangerBz2( FastqBz2 ): |
| 758 | +"""Class representing a compressed FASTQ sequence ( the Sanger variant )""" |
| 759 | +edam_format = "format_1932" |
| 760 | +file_ext = "fastqsanger.bz2" |
| 761 | +Binary.register_sniffable_binary_format("fastqsanger.bz2", "fastqsanger.bz2", FastqSangerBz2) |
| 762 | + |
| 763 | + |
| 764 | +class FastqSolexaBz2( FastqBz2 ): |
| 765 | +"""Class representing a compressed FASTQ sequence ( the Solexa variant )""" |
| 766 | +edam_format = "format_1933" |
| 767 | +file_ext = "fastqsolexa.bz2" |
| 768 | +Binary.register_sniffable_binary_format("fastqsolexa.bz2", "fastqsolexa.bz2", FastqSolexaBz2) |
| 769 | + |
| 770 | + |
| 771 | +class FastqIlluminaBz2( FastqBz2 ): |
| 772 | +"""Class representing a compressed FASTQ sequence ( the Illumina 1.3+ variant )""" |
| 773 | +edam_format = "format_1931" |
| 774 | +file_ext = "fastqillumina.bz2" |
| 775 | +Binary.register_sniffable_binary_format("fastqillumina.bz2", "fastqillumina.bz2", FastqIlluminaBz2) |
| 776 | + |
| 777 | + |
| 778 | +class FastqCSSangerBz2( FastqBz2 ): |
| 779 | +"""Class representing a Color Space compressed FASTQ sequence ( e.g a SOLiD variant )""" |
| 780 | +file_ext = "fastqcssanger.bz2" |
| 781 | +Binary.register_sniffable_binary_format("fastqcssanger.bz2", "fastqcssanger.bz2", FastqCSSangerBz2) |
| 782 | + |
| 783 | + |
746 | 784 | class Maf( Alignment ): |
747 | 785 | """Class describing a Maf alignment""" |
748 | 786 | edam_format = "format_3008" |
|
0 commit comments