Skip to content

Commit 8a6dccb

Browse files
committed
Update new parameter large_ref
1 parent abf4642 commit 8a6dccb

3 files changed

Lines changed: 22 additions & 25 deletions

File tree

docs/usage.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,9 @@ If you prefer, you can specify the full path to your reference genome when you r
170170
```
171171
> If you don't specify appropriate `--bwa_index`, `--fasta_index` parameters, the pipeline will create these indices for you automatically. Note, that saving these for later has to be turned on using `--saveReference`. You may also specify the path to a gzipped (`*.gz` file extension) FastA as reference genome - this will be uncompressed by the pipeline automatically for you. Note that other file extensions such as `.fna`, `.fa` are also supported but will be renamed to `.fasta` automatically by the pipeline.
172172
173-
### `--size`
173+
### `--large_ref`
174174

175-
This parameter is automatically set by the pipeline depending on the size of your chosen reference FastA genome. If this is larger than 3.5GB, the `samtools index` calls in the pipeline automatically generate `CSI` indices instead of `BAI` indices to accompensate for the size of the reference genome. Shouldn't be required for smaller genomes, but `>4GB` genomes have been shown to need `CSI` indices. You cannot set this parameter yourselves, but it is nevertheless documented for the sake of completeness in here.
175+
This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `CSI` indices instead of `BAI` indices to accompensate for the size of the reference genome. This parameter is not required for smaller references (including a human `hg19` or `grch37`/`grch38` reference), but `>4GB` genomes have been shown to need `CSI` indices.
176176

177177
### `--genome` (using iGenomes)
178178

main.nf

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -241,15 +241,6 @@ if("${params.fasta}".endsWith(".gz")){
241241
.into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper_index}
242242
}
243243

244-
245-
//Check genome size for large reference genomes
246-
params.size = (file("${params.fasta}").size() > 3500000000) ? "-c" : ""
247-
248-
249-
250-
251-
252-
253244
//Index files provided? Then check whether they are correct and complete
254245
if (params.aligner != 'bwa' && !params.circularmapper && !params.bwamem){
255246
exit 1, "Invalid aligner option. Default is bwa, but specify --circularmapper or --bwamem to use these."
@@ -349,7 +340,7 @@ summary['Pipeline Version'] = workflow.manifest.version
349340
summary['Run Name'] = custom_runName ?: workflow.runName
350341
summary['Reads'] = params.reads
351342
summary['Fasta Ref'] = params.fasta
352-
summary['BAM Index Type'] = (params.size == "") ? 'BAI' : 'CSI'
343+
summary['BAM Index Type'] = (params.large_ref == "") ? 'BAI' : 'CSI'
353344
if(params.bwa_index) summary['BWA Index'] = params.bwa_index
354345
summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End'
355346
summary['Max Memory'] = params.max_memory
@@ -659,10 +650,11 @@ process bwa {
659650
script:
660651
prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
661652
fasta = "${index}/*.fasta"
653+
size = ${params.large_ref} ? '-c' : ''
662654
"""
663655
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai"
664656
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
665-
samtools index ${params.size} "${prefix}".sorted.bam
657+
samtools index $size "${prefix}".sorted.bam
666658
"""
667659
}
668660

@@ -713,13 +705,14 @@ process circularmapper{
713705
filter = "${params.circularfilter}" ? '' : '-f true -x false'
714706
prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
715707
fasta = "${index}/*_*.fasta"
708+
size = ${params.large_ref} ? '-c' : ''
716709

717710
"""
718711
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai"
719712
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out
720713
realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter
721714
samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam
722-
samtools index ${params.size} "${prefix}".sorted.bam
715+
samtools index $size "${prefix}".sorted.bam
723716
"""
724717
}
725718

@@ -741,9 +734,10 @@ process bwamem {
741734
script:
742735
prefix = reads[0].toString() - ~/(_R1)?(\.combined\.)?(prefixed)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
743736
fasta = "${index}/*.fasta"
737+
size = ${params.large_ref} ? '-c' : ''
744738
"""
745739
bwa mem -t ${task.cpus} $fasta $reads -R "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
746-
samtools index -c -@ ${task.cpus} "${prefix}".sorted.bam
740+
samtools index $size -@ ${task.cpus} "${prefix}".sorted.bam
747741
"""
748742
}
749743

@@ -794,34 +788,35 @@ process samtools_filter {
794788

795789
script:
796790
prefix="$bam" - ~/(\.bam)?/
791+
size = ${params.large_ref} ? '-c' : ''
797792

798793
if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "discard"){
799794
"""
800795
samtools view -h -b $bam -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam
801-
samtools index ${params.size} ${prefix}.filtered.bam
796+
samtools index $size ${prefix}.filtered.bam
802797
"""
803798
} else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "bam"){
804799
"""
805800
samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam)
806-
samtools index ${params.size} ${prefix}.filtered.bam
801+
samtools index $size ${prefix}.filtered.bam
807802
"""
808803
} else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "fastq"){
809804
"""
810805
samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam)
811-
samtools index ${params.size} ${prefix}.filtered.bam
806+
samtools index $size ${prefix}.filtered.bam
812807
samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz
813808
rm ${prefix}.unmapped.bam
814809
"""
815810
} else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "both"){
816811
"""
817812
samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam)
818-
samtools index ${params.size} ${prefix}.filtered.bam
813+
samtools index $size ${prefix}.filtered.bam
819814
samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz
820815
"""
821816
} else { //Only apply quality filtering, default
822817
"""
823818
samtools view -h -b $bam -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam
824-
samtools index ${params.size} ${prefix}.filtered.bam
819+
samtools index $size ${prefix}.filtered.bam
825820
"""
826821
}
827822
}
@@ -850,20 +845,21 @@ process dedup{
850845
script:
851846
prefix="${bam.baseName}"
852847
treat_merged="${params.dedup_all_merged}" ? '-m' : ''
848+
size = ${params.large_ref} ? '-c' : ''
853849

854850
if(params.singleEnd) {
855851
"""
856852
dedup -i $bam $treat_merged -o . -u
857853
mv *.log dedup.log
858854
samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam
859-
samtools index ${params.size} "$prefix".sorted.bam
855+
samtools index $size "$prefix".sorted.bam
860856
"""
861857
} else {
862858
"""
863859
dedup -i $bam $treat_merged -o . -u
864860
mv *.log dedup.log
865861
samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam
866-
samtools index ${params.size} "$prefix".sorted.bam
862+
samtools index $size "$prefix".sorted.bam
867863
"""
868864
}
869865
}
@@ -1046,10 +1042,11 @@ process bam_trim {
10461042
script:
10471043
prefix="${bam.baseName}"
10481044
softclip = "${params.bamutils_softclip}" ? '-c' : ''
1045+
size = ${params.large_ref} ? '-c' : ''
10491046
"""
10501047
bam trimBam $bam tmp.bam -L ${params.bamutils_clip_left} -R ${params.bamutils_clip_right} ${softclip}
10511048
samtools sort -@ ${task.cpus} tmp.bam -o ${prefix}.trimmed.bam
1052-
samtools index ${params.size} ${prefix}.trimmed.bam
1049+
samtools index $size ${prefix}.trimmed.bam
10531050
"""
10541051
}
10551052

nextflow.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ params {
2323
tracedir = "${params.outdir}/pipeline_info"
2424
readPaths = false
2525
bam = false
26-
size = ""
27-
26+
large_ref = false
27+
2828
//More defaults
2929
complexity_filter = false
3030
complexity_filter_poly_g_min = 10

0 commit comments

Comments
 (0)