@@ -113,7 +113,7 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N
113113 data_lines = 0
114114 sequences = 0
115115 with compression_utils .get_fileobj (dataset .get_file_name ()) as fh :
116- for line in fh :
116+ for line in iter_start_of_line ( fh , 1 ) :
117117 line = line .strip ()
118118 if line and line .startswith ("#" ):
119119 # We don't count comment lines for sequence data types
@@ -739,7 +739,7 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N
739739 data_lines = 0
740740 sequences = 0
741741 with compression_utils .get_fileobj (dataset .get_file_name ()) as in_file :
742- for line in in_file :
742+ for line in iter_start_of_line ( in_file , 1 ) :
743743 if line .startswith ("@" ) and data_lines % 4 == 0 :
744744 sequences += 1
745745 data_lines += 1
@@ -1291,7 +1291,7 @@ def sniff(self, filename: str) -> bool:
12911291 coor = False
12921292 pairs = False
12931293 with open (filename ) as handle :
1294- for line in handle :
1294+ for line in iter_start_of_line ( handle , 9 ) :
12951295 line = line .strip ()
12961296 if line :
12971297 if line .startswith ("/sequence" ):
@@ -1328,12 +1328,13 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N
13281328 data_lines = 0
13291329 sequences = 0
13301330
1331- for line in open (dataset .get_file_name ()):
1332- line = line .strip ()
1333- data_lines += 1
1331+ with open (dataset .get_file_name ()) as fh :
1332+ for line in iter_start_of_line (fh , 1 ):
1333+ line = line .strip ()
1334+ data_lines += 1
13341335
1335- if line and line .startswith (">" ):
1336- sequences += 1
1336+ if line and line .startswith (">" ):
1337+ sequences += 1
13371338
13381339 dataset .metadata .data_lines = data_lines
13391340 dataset .metadata .sequences = sequences
0 commit comments