Skip to content

Commit 132ff18

Browse files
committed
Merge pull request #1 from jmchilton/csv_fix
Small tweaks to CSV fixes by @Christian-B.
2 parents 79a7e31 + 7a960b0 commit 132ff18

2 files changed

Lines changed: 19 additions & 32 deletions

File tree

config/datatypes_conf.xml.sample

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,8 @@
5151
<converter file="interval_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/>
5252
<converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/>
5353
</datatype>
54-
<!-- MSI added Datatypes -->
55-
<datatype extension="csv" type="galaxy.datatypes.tabular:CSV" display_in_upload="true" />
56-
<!-- End MSI added Datatypes -->
54+
<datatype extension="csv" type="galaxy.datatypes.tabular:ExcelCSV" display_in_upload="true" />
55+
<datatype extension="tsv" type="galaxy.datatypes.tabular:ExcelCSV" display_in_upload="true" />
5756
<datatype extension="customtrack" type="galaxy.datatypes.interval:CustomTrack"/>
5857
<datatype extension="bowtie_color_index" type="galaxy.datatypes.ngsindex:BowtieColorIndex" mimetype="text/html" display_in_upload="False"/>
5958
<datatype extension="bowtie_base_index" type="galaxy.datatypes.ngsindex:BowtieBaseIndex" mimetype="text/html" display_in_upload="False"/>
@@ -494,7 +493,8 @@
494493
<sniffer type="galaxy.datatypes.sequence:RNADotPlotMatrix"/>
495494
<sniffer type="galaxy.datatypes.sequence:DotBracket"/>
496495
<sniffer type="galaxy.datatypes.tabular:ConnectivityTable"/>
497-
<sniffer type="galaxy.datatypes.tabular:CSV"/>
496+
<sniffer type="galaxy.datatypes.tabular:ExcelCSV"/>
497+
<sniffer type="galaxy.datatypes.tabular:ExcelTSV"/>
498498
<sniffer type="galaxy.datatypes.msa:Hmmer2" />
499499
<sniffer type="galaxy.datatypes.msa:Hmmer3" />
500500
<sniffer type="galaxy.datatypes.msa:Stockholm_1_0" />

lib/galaxy/datatypes/tabular.py

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -946,50 +946,45 @@ def set_meta( self, dataset, **kwd ):
946946

947947

948948
@dataproviders.decorators.has_dataproviders
949-
class Base_CSV( CSV ):
949+
class BaseCSV( CSV ):
950950
"""
951951
Delimiter-separated table data.
952952
This includes CSV, TSV and other dialects understood by the
953953
Python 'csv' module https://docs.python.org/2/library/csv.html
954954
Must be extended to define the dialect to use, strict_width: and file_ext.
955955
See Python module csv for documentation of dialect settings
956956
"""
957-
#dialect Set by subclass
958-
#file_ext Set by subclass
959-
#strict_width Set by subclass
960-
#If set sniff fails is a single row is incorrect.
961-
#Python's csv is more tollerant
962957
big_peek_size = 10240 # Large File chunk used for sniffing CSV dialect
963958

964959
def sniff( self, filename ):
965960
""" Return True if if recognizes dialect and header. """
966961
try:
967-
#check the dialect works
962+
# check the dialect works
968963
reader = csv.reader(open(filename, 'r'), self.dialect)
969-
#Check we can read header and get columns
964+
# Check we can read header and get columns
970965
header_row = reader.next()
971966
if len(header_row) < 2:
972-
#No columns so not seperated by this dialect.
967+
# No columns so not seperated by this dialect.
973968
return False
974969

975-
#check all rows can be read as otherwise set_meta throws an exception
970+
# check all rows can be read as otherwise set_meta throws an exception
976971
if self.strict_width:
977972
num_columns = len(header_row)
978973
for data_row in reader:
979-
#All columns must be the same length
974+
# All columns must be the same length
980975
if num_columns != len(data_row):
981976
return False
982977
else:
983-
#Check the next row as it is used by set_meta
978+
# Check the next row as it is used by set_meta
984979
data_row = reader.next()
985980
if len(data_row) < 2:
986-
#No columns so not seperated by this dialect.
981+
# No columns so not seperated by this dialect.
987982
return False
988-
#ignore the length in the rest
983+
# ignore the length in the rest
989984
for data_row in reader:
990985
pass
991986

992-
#Optional: Check Python's csv comes up with a similar dialect
987+
# Optional: Check Python's csv comes up with a similar dialect
993988
auto_dialect = csv.Sniffer().sniff(open(filename, 'r').read(self.big_peek_size))
994989
if (auto_dialect.delimiter != self.dialect.delimiter):
995990
return False
@@ -1010,7 +1005,7 @@ def sniff( self, filename ):
10101005

10111006
return True
10121007
except:
1013-
#Not readable by Python's csv using this dialect
1008+
# Not readable by Python's csv using this dialect
10141009
return False
10151010

10161011
def set_meta( self, dataset, **kwd ):
@@ -1042,25 +1037,21 @@ def set_meta( self, dataset, **kwd ):
10421037

10431038

10441039
@dataproviders.decorators.has_dataproviders
1045-
class Excell_CSV( Base_CSV ):
1040+
class ExcelCSV( BaseCSV ):
10461041
"""
10471042
Comma separated table data.
10481043
Only sniffs comma seperated files with at least 2 columns
10491044
"""
10501045

10511046
def __init__(self, **kwd):
1052-
Base_CSV.__init__( self, **kwd )
1047+
BaseCSV.__init__( self, **kwd )
10531048
self.dialect = csv.excel # This is the default
1054-
#delimiter = ','
1055-
#quotechar = '"'
1056-
#doublequote = True
1057-
#skipinitialspace = False
10581049
self.file_ext = 'csv' # File extension
10591050
self.strict_width = False # Previous csv type did not check column width
10601051

10611052

10621053
@dataproviders.decorators.has_dataproviders
1063-
class Excell_TSV( Base_CSV ):
1054+
class ExcelTSV( BaseCSV ):
10641055
"""
10651056
Comma separated table data.
10661057
Only sniff tab seperated files with at least two columns
@@ -1073,12 +1064,8 @@ class Excell_TSV( Base_CSV ):
10731064
"""
10741065

10751066
def __init__(self, **kwd):
1076-
Base_CSV.__init__( self, **kwd )
1067+
BaseCSV.__init__( self, **kwd )
10771068
self.dialect = csv.excel_tab
1078-
#delimiter = '\t'
1079-
#quotechar = '"'
1080-
#doublequote = True
1081-
#skipinitialspace = False
10821069
self.file_ext = 'tsv' # File extension
10831070
self.strict_width = True # Leave files with different width to tabular
10841071

0 commit comments

Comments
 (0)