-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathcatch_workflow.sh
More file actions
executable file
·168 lines (139 loc) · 5.86 KB
/
catch_workflow.sh
File metadata and controls
executable file
·168 lines (139 loc) · 5.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env sh
#
#SBATCH --get-user-env
#SBATCH -J CaTCH
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=50000
#SBATCH --output=catch.out
#SBATCH --error=catch.err
echo "CaTCH version 0.8.2"
function usage() {
echo "Usage:"
echo " $0 -b BAM_DIR -o PROCESS_DIR -O REPORT_DIR -v COVARS [-A ABUND_THRESH] [-R REF_ROWS] [-c DEMUX_BC] [-m HAMMING_DIST] [-X PATH_TO_CATCH_SCRIPTS] [-r] [-i] [-s]"
exit 1
}
# Defaults
SCRIPTSPATH='/users/kimon.froussios/catch'
#FU='/users/kimon.froussios/utility_scripts'
PYTHONPATH="$PYTHONPATH:$SCRIPTSPATH"
hamm=0
dxcnt=1
dist=1
post=1
dark=0
plot=1
abund=0.01
revcomp=0
spikedin=0
stringent=0
demux=0
# Parse options.
while getopts 'b:d:o:O:v:X:m:n:A:R:ris1234' flag; do
case "${flag}" in
b) bam="${OPTARG}" ;; # BAM folder.
d) demux=1 ;; # Custom demultiplex.
o) outdir="${OPTARG}" ;; # Output directory for the counts.
O) resdir="${OPTARG}" ;; # Ouptut directory for the analysis report.
v) covars="${OPTARG}" ;; # List of samples in desired order, with corresponding demultiplexing tag (optional), group, treatment, and display colour.
X) SCRIPTSPATH="${OPTARG}" ;; # Where to find all the scripts for this workflow.
m) hamm="${OPTARG}" ;; # Hamming distance at which to merge barcodes as likely sequencing errors.
n) dark="${OPTARG}" ;; # Number of dark bases to allow in the pattern (0).
A) abund="${OPTARG}" ;; # Barcode abundance threshold for the report (0.01).
R) ref="${OPTARG}" ;; # Comma seperated list of row numbers in covars to be used as reference samples in report, NOT counting the header line. If omitted, no reference will be used.
r) revcomp=1 ;; # Reverse complement the barcodes.
i) spikedin=1 ;; # Spike-in barcode was added (hard-coded barcode sequence).
s) stringent=1 ;; # Match full format of semi-random barcodes.
1) dxcnt=0 ;; # Skip demux and count.
2) dist=0 ;; # Skip hammind distance merge.
3) post=0 ;; # Skip table mergers.
4) plot=0 ;; # Skip analysis report.
*) usage ;;
esac
done
if [[ -z "$outdir" ]]; then
outdir='./process'
fi
if [[ ! -d $outdir ]]; then
mkdir -p $outdir
fi
wait_for_jobs(){
echo "waiting"
sleep 60 # seconds, give time to the scheduler to put up the task
sleeptime=120 # ask every 2 mins, for the first 10 mins
n=1
while true; do
if [ $(squeue | grep kimon.fr | grep -c $1) -eq 0 ]; then
break
else
echo "sleep another" $((sleeptime / 60)) "minutes..."
sleep $sleeptime
fi
n=$((n + 1))
if [ $n -eq 5 ]; then
sleeptime=300 # if still running after 10 mins, ask every 5 mins
fi
if [ $n -eq 10 ]; then
sleeptime=600 # if still running after 30 mins, ask every 10 mins
fi
done
}
set -e
# set -x
prefix=$(basename $bam)
prefix=${prefix/.bam/}
if [ "$dxcnt" -eq 1 ]; then
if [[ ! -z "$demux" ]]; then
barcodes=",-d ${covars}"
fi
if [ "$revcomp" -eq 1 ]; then
revcomp=',-r'
else
unset revcomp
fi
if [ "$spikedin" -eq 1 ]; then
spikedin=',-i'
else
unset spikedin
fi
if [ "$stringent" -eq 1 ]; then
stringent=',-s'
else
unset stringent
fi
# Match semi-random barcode format, demultiplex, and count the barcodes.
echo "${prefix}: Demultiplexing and counting barcodes"
# ,-o /dev/null ,-e /dev/null
${SCRIPTSPATH}/fileutilities.py T $bam --dir 'bam$' | ${SCRIPTSPATH}/fileutilities.py P --loop sbatch ,-J CaTCHc ,--mem=50G ,--time=0-03:00:00 ${SCRIPTSPATH}/barcodingQuantifier.py ,-f {abs} ,-o $outdir ,--n_dark $dark $barcodes $revcomp $spikedin $stringent
wait_for_jobs CaTCHc
fi
if [ "$dist" -eq 1 ]; then
# Merge barcodes with too similar sequences
if [ "$hamm" -gt 0 ]; then
echo "${prefix}: Merging similar barcodes at distance $hamm"
## preserve unmerged counts in a new name, so the merged can have the name that is used downstream
${SCRIPTSPATH}/fileutilities.py T ${outdir}/*/ --dir barcode-counts.txt | ${SCRIPTSPATH}/fileutilities.py P --loop mv {abs} {dir}/{bas}.raw.txt
${SCRIPTSPATH}/fileutilities.py T ${outdir}/*/ --dir barcode-counts.raw.txt | ${SCRIPTSPATH}/fileutilities.py P --loop sbatch ,-o /dev/null ,-e /dev/null ,-J CaTCHh ,--qos=medium ,--mem=20G ${SCRIPTSPATH}/barcodingHammingMerge.py ,-b {abs} ,-d $hamm ,-o {dir}/{cor}.txt
wait_for_jobs CaTCHh
fi
fi
if [ "$post" -eq 1 ]; then
echo "${prefix}: Merging sample-specific count tables into one"
# Merge sample-specific count tables into one collective table
${SCRIPTSPATH}/mergeBCcounts.R ${outdir}/${prefix}_barcode-counts.txt ${outdir}/*/*_barcode-counts.txt
# Also merge the summary reports.
${SCRIPTSPATH}/mergeBCcounts.R ${outdir}/${prefix}_summaries.txt ${outdir}/*/*_summary.txt > /dev/null # The stdout merge report is not sensible for the summaries file.
fi
# # Note to self how to merge the hammdist tables.
# head -n 2 ${outdir}/${prefix}/*hammdist.txt | tail -n 1 > ${outdir}/${prefix}_hammdist.tsv
# tail -n +3 ${outdir}/${prefix}/*hammdist.txt | perl -e 'while($line=<STDIN>){print $line if $line !~/^[=\n]/}' >> ${outdir}/${prefix}_hammdist.tsv
if [ "$plot" -eq 1 ]; then
echo "${prefix}: Compiling barcoding report"
if [ -z "$ref"]; then
srun --mem=9G ${SCRIPTSPATH}/barcoding_results_run.R -c $(realpath ${outdir}/${prefix}_barcode-counts.txt) -s $(realpath ${outdir}/${prefix}_summaries.txt) -d $(realpath ${resdir}/${prefix}) -v $(realpath $covars) -A $abund -p
else
srun --mem=9G ${SCRIPTSPATH}/barcoding_results_run.R -c $(realpath ${outdir}/${prefix}_barcode-counts.txt) -s $(realpath ${outdir}/${prefix}_summaries.txt) -d $(realpath ${resdir}/${prefix}) -v $(realpath $covars) -r $ref -A $abund -p
fi
fi
echo "${prefix}: Workflow finished!"