Skip to content

Commit abf4642

Browse files
committed
Add docs on this
1 parent 8d26b33 commit abf4642

3 files changed

Lines changed: 27 additions & 18 deletions

File tree

docs/usage.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,10 @@ If you prefer, you can specify the full path to your reference genome when you r
170170
```
171171
> If you don't specify appropriate `--bwa_index`, `--fasta_index` parameters, the pipeline will create these indices for you automatically. Note, that saving these for later has to be turned on using `--saveReference`. You may also specify the path to a gzipped (`*.gz` file extension) FastA as reference genome - this will be uncompressed by the pipeline automatically for you. Note that other file extensions such as `.fna`, `.fa` are also supported but will be renamed to `.fasta` automatically by the pipeline.
172172
173+
### `--size`
174+
175+
This parameter is automatically set by the pipeline depending on the size of your chosen reference FastA genome. If this is larger than 3.5GB, the `samtools index` calls in the pipeline automatically generate `CSI` indices instead of `BAI` indices to accompensate for the size of the reference genome. Shouldn't be required for smaller genomes, but `>4GB` genomes have been shown to need `CSI` indices. You cannot set this parameter yourselves, but it is nevertheless documented for the sake of completeness in here.
176+
173177
### `--genome` (using iGenomes)
174178

175179
The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource.

main.nf

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,10 @@ if("${params.fasta}".endsWith(".gz")){
240240
.ifEmpty { exit 1, "No genome specified! Please specify one with --fasta"}
241241
.into {ch_fasta_for_bwa_indexing;ch_fasta_for_faidx_indexing;ch_fasta_for_dict_indexing; ch_fasta_for_damageprofiler; ch_fasta_for_qualimap; ch_fasta_for_pmdtools; ch_fasta_for_circularmapper_index}
242242
}
243-
243+
244+
245+
//Check genome size for large reference genomes
246+
params.size = (file("${params.fasta}").size() > 3500000000) ? "-c" : ""
244247

245248

246249

@@ -346,6 +349,7 @@ summary['Pipeline Version'] = workflow.manifest.version
346349
summary['Run Name'] = custom_runName ?: workflow.runName
347350
summary['Reads'] = params.reads
348351
summary['Fasta Ref'] = params.fasta
352+
summary['BAM Index Type'] = (params.size == "") ? 'BAI' : 'CSI'
349353
if(params.bwa_index) summary['BWA Index'] = params.bwa_index
350354
summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End'
351355
summary['Max Memory'] = params.max_memory
@@ -649,7 +653,7 @@ process bwa {
649653

650654
output:
651655
file "*.sorted.bam" into ch_mapped_reads_idxstats,ch_mapped_reads_filter,ch_mapped_reads_preseq, ch_mapped_reads_damageprofiler
652-
file "*.csi" into ch_bam_index_for_damageprofiler
656+
file "*.{bai,csi}" into ch_bam_index_for_damageprofiler
653657

654658

655659
script:
@@ -658,7 +662,7 @@ process bwa {
658662
"""
659663
bwa aln -t ${task.cpus} $fasta $reads -n ${params.bwaalnn} -l ${params.bwaalnl} -k ${params.bwaalnk} -f "${reads.baseName}.sai"
660664
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads | samtools sort -@ ${task.cpus} -O bam - > "${prefix}".sorted.bam
661-
samtools index -c "${prefix}".sorted.bam
665+
samtools index ${params.size} "${prefix}".sorted.bam
662666
"""
663667
}
664668

@@ -703,7 +707,7 @@ process circularmapper{
703707

704708
output:
705709
file "*.sorted.bam" into ch_mapped_reads_idxstats_cm,ch_mapped_reads_filter_cm,ch_mapped_reads_preseq_cm, ch_mapped_reads_damageprofiler_cm
706-
file "*.csi"
710+
file "*.{bai,csi}"
707711

708712
script:
709713
filter = "${params.circularfilter}" ? '' : '-f true -x false'
@@ -715,7 +719,7 @@ process circularmapper{
715719
bwa samse -r "@RG\\tID:ILLUMINA-${prefix}\\tSM:${prefix}\\tPL:illumina" $fasta "${reads.baseName}".sai $reads > tmp.out
716720
realignsamfile -e ${params.circularextension} -i tmp.out -r $fasta $filter
717721
samtools sort -@ ${task.cpus} -O bam tmp_realigned.bam > "${prefix}".sorted.bam
718-
samtools index -c "${prefix}".sorted.bam
722+
samtools index ${params.size} "${prefix}".sorted.bam
719723
"""
720724
}
721725

@@ -731,7 +735,7 @@ process bwamem {
731735

732736
output:
733737
file "*.sorted.bam" into ch_bwamem_mapped_reads_idxstats,ch_bwamem_mapped_reads_filter,ch_bwamem_mapped_reads_preseq, ch_bwamem_mapped_reads_damageprofiler
734-
file "*.csi"
738+
file "*.{bai,csi}"
735739

736740

737741
script:
@@ -786,38 +790,38 @@ process samtools_filter {
786790
file "*filtered.bam" into ch_bam_filtered_qualimap, ch_bam_filtered_dedup, ch_bam_filtered_markdup, ch_bam_filtered_pmdtools, ch_bam_filtered_angsd, ch_bam_filtered_gatk
787791
file "*.fastq.gz" optional true
788792
file "*.unmapped.bam" optional true
789-
file "*.csi"
793+
file "*.{bai,csi}"
790794

791795
script:
792796
prefix="$bam" - ~/(\.bam)?/
793797

794798
if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "discard"){
795799
"""
796800
samtools view -h -b $bam -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam
797-
samtools index -c ${prefix}.filtered.bam
801+
samtools index ${params.size} ${prefix}.filtered.bam
798802
"""
799803
} else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "bam"){
800804
"""
801805
samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam)
802-
samtools index -c ${prefix}.filtered.bam
806+
samtools index ${params.size} ${prefix}.filtered.bam
803807
"""
804808
} else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "fastq"){
805809
"""
806810
samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam)
807-
samtools index -c ${prefix}.filtered.bam
811+
samtools index ${params.size} ${prefix}.filtered.bam
808812
samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz
809813
rm ${prefix}.unmapped.bam
810814
"""
811815
} else if("${params.bam_discard_unmapped}" && "${params.bam_unmapped_type}" == "both"){
812816
"""
813817
samtools view -h $bam | tee >(samtools view - -@ ${task.cpus} -f4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.unmapped.bam) >(samtools view - -@ ${task.cpus} -F4 -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam)
814-
samtools index -c ${prefix}.filtered.bam
818+
samtools index ${params.size} ${prefix}.filtered.bam
815819
samtools fastq -tn ${prefix}.unmapped.bam | pigz -p ${task.cpus} > ${prefix}.unmapped.fastq.gz
816820
"""
817821
} else { //Only apply quality filtering, default
818822
"""
819823
samtools view -h -b $bam -@ ${task.cpus} -q ${params.bam_mapping_quality_threshold} -o ${prefix}.filtered.bam
820-
samtools index -c ${prefix}.filtered.bam
824+
samtools index ${params.size} ${prefix}.filtered.bam
821825
"""
822826
}
823827
}
@@ -841,25 +845,25 @@ process dedup{
841845
file "*.hist" into ch_hist_for_preseq
842846
file "*.log" into ch_dedup_results_for_multiqc
843847
file "${prefix}.sorted.bam" into ch_dedup_bam
844-
file "*.csi"
848+
file "*.{bai,csi}"
845849

846850
script:
847851
prefix="${bam.baseName}"
848852
treat_merged="${params.dedup_all_merged}" ? '-m' : ''
849-
853+
850854
if(params.singleEnd) {
851855
"""
852856
dedup -i $bam $treat_merged -o . -u
853857
mv *.log dedup.log
854858
samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam
855-
samtools index -c "$prefix".sorted.bam
859+
samtools index ${params.size} "$prefix".sorted.bam
856860
"""
857861
} else {
858862
"""
859863
dedup -i $bam $treat_merged -o . -u
860864
mv *.log dedup.log
861865
samtools sort -@ ${task.cpus} "$prefix"_rmdup.bam -o "$prefix".sorted.bam
862-
samtools index -c "$prefix".sorted.bam
866+
samtools index ${params.size} "$prefix".sorted.bam
863867
"""
864868
}
865869
}
@@ -1037,15 +1041,15 @@ process bam_trim {
10371041

10381042
output:
10391043
file "*.trimmed.bam" into ch_trimmed_bam_for_genotyping
1040-
file "*.csi"
1044+
file "*.{bai,csi}"
10411045

10421046
script:
10431047
prefix="${bam.baseName}"
10441048
softclip = "${params.bamutils_softclip}" ? '-c' : ''
10451049
"""
10461050
bam trimBam $bam tmp.bam -L ${params.bamutils_clip_left} -R ${params.bamutils_clip_right} ${softclip}
10471051
samtools sort -@ ${task.cpus} tmp.bam -o ${prefix}.trimmed.bam
1048-
samtools index -c ${prefix}.trimmed.bam
1052+
samtools index ${params.size} ${prefix}.trimmed.bam
10491053
"""
10501054
}
10511055

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ params {
2323
tracedir = "${params.outdir}/pipeline_info"
2424
readPaths = false
2525
bam = false
26+
size = ""
2627

2728
//More defaults
2829
complexity_filter = false

0 commit comments

Comments
 (0)