@@ -109,6 +109,56 @@ rule consensus_bcftools:
109109 """
110110
111111
112+ localrules :
113+ cons_bcf_QA ,
114+
115+
116+ IUPAC = (lambda x : x .upper () + x .lower ())(
117+ "R" # = A or G
118+ "K" # = G or T
119+ "S" # = G or C
120+ "Y" # = C or T
121+ "M" # = A or C
122+ "W" # = A or T
123+ "B" # = not A (C, G or T)
124+ "H" # = not G (A, C or T)
125+ "D" # = not C (A, G or T)
126+ "V" # = not T (A, C or G)
127+ )
128+
129+
130+ rule cons_bcf_QA :
131+ input :
132+ fname_fasta = "{dataset}/references/consensus.bcftools.fasta" ,
133+ fname_fasta_ambig = "{dataset}/references/consensus_ambig.bcftools.fasta" ,
134+ output :
135+ stats = "{dataset}/references/consensus.bcftools.stats.yaml" ,
136+ params :
137+ name = ID ,
138+ IUPAC = IUPAC ,
139+ log :
140+ outfile = "{dataset}/references/consensus.bcftools.stats.out.log" ,
141+ errfile = "{dataset}/references/consensus.bcftools.stats.err.log" ,
142+ # conda: # NOTE: no environment, we rely on standard bash+coreutils
143+ benchmark :
144+ "{dataset}/alignments/consensus.benchmark"
145+ resources :
146+ disk_mb = 1250 ,
147+ mem_mb = config .consensus_sequences ["mem" ],
148+ runtime = config .consensus_sequences ["time" ],
149+ threads : 1
150+ shell :
151+ r"""
152+ ( \
153+ echo '{params.name}:' && \
154+ printf ' %s: %s\n' \
155+ 'consensus_N' "$(grep -E '^[^>]' "{input.fname_fasta}" | tr -cd 'Nn' | wc -c)" \
156+ 'consensus_IUPAC' "$(grep -E '^[^>]' "{input.fname_fasta_ambig}" | tr -cd '{params.IUPAC}' | wc -c)" \
157+ ) > "{output.stats}" \
158+ 2> >(tee "{log.errfile}" >&2)
159+ """
160+
161+
112162rule consensus_sequences :
113163 input :
114164 BAM = alignment_wildcard ,
@@ -153,10 +203,14 @@ rule consseq_QA:
153203 input :
154204 REF = reference_file ,
155205 REF_majority_dels = "{dataset}/references/ref_majority_dels.fasta" ,
206+ REF_amb_dels = "{dataset}/references/ref_ambig_dels.fasta" ,
156207 output :
157208 REF_matcher = "{dataset}/references/ref_majority_dels.matcher" ,
209+ stats = "{dataset}/references/ref_stats.yaml" ,
158210 params :
159211 MATCHER = config .applications ["matcher" ],
212+ name = ID ,
213+ IUPAC = IUPAC ,
160214 log :
161215 outfile = "{dataset}/references/qa_consseq.out.log" ,
162216 errfile = "{dataset}/references/qa_consseq.err.log" ,
@@ -177,6 +231,15 @@ rule consseq_QA:
177231 touch {output.REF_matcher}
178232 echo "pure 'nnnn...' consensus, no possible alignement" | tee {log.outfile}
179233 fi
234+
235+ ( \
236+ echo '{params.name}:' && \
237+ printf ' %s: %s\\ n' \
238+ 'consensus_N' "$(grep -E '^[^>]' "{input.REF_majority_dels}" | tr -cd 'Nn' | wc -c)" \
239+ 'consensus_lower' "$(grep -E '^[^>]' "{input.REF_majority_dels}" | tr -cd 'atcg' | wc -c)" \
240+ 'consensus_IUPAC' "$(grep -E '^[^>]' "{input.REF_amb_dels}" | tr -cd '{params.IUPAC}' | wc -c)" \
241+ ) > "{output.stats}" \
242+ 2> >(tee -a "{log.errfile}" >&2)
180243 """
181244
182245
0 commit comments