Skip to content

Commit 9b85000

Browse files
committed
speed up fasten_metrics; make very large file for metrics benchmark
1 parent 406f813 commit 9b85000

2 files changed

Lines changed: 46 additions & 24 deletions

File tree

src/bin/fasten_metrics.rs

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,51 @@ fn main(){
6767

6868
// Header
6969
if each_read {
70-
println!("readID\treadLength\tavgQual");
70+
each_read_metrics(&filename);
7171
} else {
72-
println!("{}",vec!["totalLength", "numReads", "avgReadLength","avgQual"].join("\t"));
72+
summary_metrics(&filename);
73+
}
74+
}
75+
76+
fn each_read_metrics(filename: &str) {
77+
println!("readID\treadLength\tavgQual");
78+
79+
let mut num_lines :u64 = 0;
80+
81+
// read the file
82+
let my_file = File::open(&filename).expect("Could not open file");
83+
let my_buffer=BufReader::new(my_file);
84+
for line in my_buffer.lines() {
85+
num_lines+=1;
86+
let mod_line = num_lines % 4;
87+
88+
match mod_line {
89+
1 => {
90+
let id = line.expect("Expected an ID line");
91+
// remove the @
92+
print!("{}\t",&id[1..]);
93+
}
94+
2 => {
95+
let my_read_length=line.expect("Expected a sequence line").len() as f64;
96+
print!("{}\t",my_read_length);
97+
}
98+
0 => {
99+
let qual_line=line.expect("Expected a qual line");
100+
101+
let my_qual_vec: Vec<u8> = qual_line.into_bytes();
102+
let my_avg_qual = avg_qual(&my_qual_vec, 33);
103+
println!("{:.2}",my_avg_qual);
104+
}
105+
_ => {
73106

107+
}
108+
};
74109
}
110+
111+
}
112+
113+
fn summary_metrics(filename: &str) {
114+
println!("totalReadLength\tnumReads\tavgReadLength\tavgQual");
75115

76116
let mut read_length :Vec<f64> = vec![];
77117
let mut read_qual :Vec<u8> = vec![];
@@ -85,31 +125,15 @@ fn main(){
85125
let mod_line = num_lines % 4;
86126

87127
match mod_line {
88-
1 => {
89-
if each_read {
90-
let id = line.expect("Expected an ID line");
91-
// remove the @
92-
print!("{}\t",&id[1..]);
93-
}
94-
}
95128
2 => {
96129
let my_read_length=line.expect("Expected a sequence line").len() as f64;
97-
if each_read {
98-
print!("{}\t",my_read_length);
99-
}
100130
read_length.push(my_read_length);
101131
}
102132
0 => {
103133
let qual_line=line.expect("Expected a qual line");
104134

105135
let my_qual_vec: Vec<u8> = qual_line.into_bytes();
106-
// TODO this if statement makes the program take twice as long. Optimize?
107-
if each_read {
108-
let my_avg_qual = avg_qual(&my_qual_vec, 33);
109-
println!("{:.2}",my_avg_qual);
110-
}
111136
read_qual.extend(my_qual_vec.into_iter());
112-
113137
}
114138
_ => {
115139

@@ -121,18 +145,14 @@ fn main(){
121145

122146
let mut summary_metrics=vec![total_length.to_string(),num_reads.to_string()];
123147

124-
// add statistics if requested
125148
let total_length_str = (total_length as f64/num_reads as f64).to_string();
126149
let total_qual_str = format!("{:.2}", avg_qual(&read_qual, 33));
127150

128151
summary_metrics.push(total_length_str);
129152
summary_metrics.push(total_qual_str.to_string());
130153

131154
// summary metrics
132-
if !each_read {
133-
println!("{}", summary_metrics.join("\t"));
134-
}
135-
155+
println!("{}", summary_metrics.join("\t"));
136156
}
137157

138158
/// Calculates average quality value from a vector of quality bytes

tests/lib/benchmark.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ fi
5454
if [[ ! -s $largest ]]; then
5555
# Create largest sorted file if not present
5656
for i in `seq 1 $multiplier`; do
57-
cat $large_sorted
57+
for j in `seq 1 $multiplier`; do
58+
cat $large_sorted
59+
done
5860
done > $largest
5961
fi
6062

0 commit comments

Comments
 (0)