Skip to content

Commit 1a4c7a5

Browse files
committed
[feature] num consens positions with N and IUPAC
1 parent ca5ed55 commit 1a4c7a5

3 files changed

Lines changed: 73 additions & 0 deletions

File tree

workflow/rules/common.smk

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,8 @@ for srec in sample_list:
681681

682682
if config.output["QA"]:
683683
alignments.append(os.path.join(sdir, "references/ref_majority_dels.matcher"))
684+
alignments.append(os.path.join(sdir, "references/ref_stats.yaml"))
685+
alignments.append(os.path.join(sdir, "references/consensus.bcftools.stats.yaml"))
684686
alignments.append(os.path.join(sdir, "references/frameshift_deletions_check.tsv"))
685687

686688
trimmed_files.append(os.path.join(sdir, "preprocessed_data/R1.fastq.gz"))

workflow/rules/consensus.smk

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,56 @@ rule consensus_bcftools:
109109
"""
110110

111111

112+
localrules:
113+
cons_bcf_QA,
114+
115+
116+
IUPAC = (lambda x: x.upper() + x.lower())(
117+
"R" # = A or G
118+
"K" # = G or T
119+
"S" # = G or C
120+
"Y" # = C or T
121+
"M" # = A or C
122+
"W" # = A or T
123+
"B" # = not A (C, G or T)
124+
"H" # = not G (A, C or T)
125+
"D" # = not C (A, G or T)
126+
"V" # = not T (A, C or G)
127+
)
128+
129+
130+
rule cons_bcf_QA:
131+
input:
132+
fname_fasta="{dataset}/references/consensus.bcftools.fasta",
133+
fname_fasta_ambig="{dataset}/references/consensus_ambig.bcftools.fasta",
134+
output:
135+
stats="{dataset}/references/consensus.bcftools.stats.yaml",
136+
params:
137+
name=ID,
138+
IUPAC=IUPAC,
139+
log:
140+
outfile="{dataset}/references/consensus.bcftools.stats.out.log",
141+
errfile="{dataset}/references/consensus.bcftools.stats.err.log",
142+
# conda: # NOTE: no environment, we rely on standard bash+coreutils
143+
benchmark:
144+
"{dataset}/alignments/consensus.benchmark"
145+
resources:
146+
disk_mb=1250,
147+
mem_mb=config.consensus_sequences["mem"],
148+
runtime=config.consensus_sequences["time"],
149+
threads: 1
150+
shell:
151+
r"""
152+
( \
153+
echo '{params.name}:' && \
154+
printf ' %s: %s\n' \
155+
'consensus_N' "$(grep -E '^[^>]' "{input.fname_fasta}" | tr -cd 'Nn' | wc -c)" \
156+
'consensus_IUPAC' "$(grep -E '^[^>]' "{input.fname_fasta_ambig}" | tr -cd '{params.IUPAC}' | wc -c)" \
157+
) > "{output.stats}" \
158+
2> >(tee "{log.errfile}" >&2)
159+
"""
160+
161+
112162
rule consensus_sequences:
113163
input:
114164
BAM=alignment_wildcard,
@@ -153,10 +203,14 @@ rule consseq_QA:
153203
input:
154204
REF=reference_file,
155205
REF_majority_dels="{dataset}/references/ref_majority_dels.fasta",
206+
REF_amb_dels="{dataset}/references/ref_ambig_dels.fasta",
156207
output:
157208
REF_matcher="{dataset}/references/ref_majority_dels.matcher",
209+
stats="{dataset}/references/ref_stats.yaml",
158210
params:
159211
MATCHER=config.applications["matcher"],
212+
name=ID,
213+
IUPAC=IUPAC,
160214
log:
161215
outfile="{dataset}/references/qa_consseq.out.log",
162216
errfile="{dataset}/references/qa_consseq.err.log",
@@ -177,6 +231,15 @@ rule consseq_QA:
177231
touch {output.REF_matcher}
178232
echo "pure 'nnnn...' consensus, no possible alignement" | tee {log.outfile}
179233
fi
234+
235+
( \
236+
echo '{params.name}:' && \
237+
printf ' %s: %s\\n' \
238+
'consensus_N' "$(grep -E '^[^>]' "{input.REF_majority_dels}" | tr -cd 'Nn' | wc -c)" \
239+
'consensus_lower' "$(grep -E '^[^>]' "{input.REF_majority_dels}" | tr -cd 'atcg' | wc -c)" \
240+
'consensus_IUPAC' "$(grep -E '^[^>]' "{input.REF_amb_dels}" | tr -cd '{params.IUPAC}' | wc -c)" \
241+
) > "{output.stats}" \
242+
2> >(tee -a "{log.errfile}" >&2)
180243
"""
181244

182245

workflow/rules/publish.smk

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ rule prepare_upload:
6060
if config.output["QA"]
6161
else []
6262
),
63+
consensus_stats=(
64+
"{dataset}/references/consensus.bcftools.stats.yaml"
65+
if config.output["QA"]
66+
else []
67+
),
68+
consensus_aligned_stats=(
69+
"{dataset}/references/ref_stats.yaml" if config.output["QA"] else []
70+
),
6371
output:
6472
upload_prepared_touch="{dataset}/upload_prepared.touch",
6573
params:

0 commit comments

Comments
 (0)