Skip to content

Commit 094a650

Browse files
authored
Merge branch 'dev' into dev
2 parents 54ca8f3 + d0cbcfa commit 094a650

66 files changed

Lines changed: 30237 additions & 8837 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/markdownlint.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,5 @@ no-inline-html:
88
- img
99
- p
1010
- kbd
11+
- details
12+
- summary

.github/workflows/ci.yml

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,13 @@ jobs:
5252
git clone --single-branch --branch eager https://github.com/nf-core/test-datasets.git data
5353
- name: BASIC Run the basic pipeline with directly supplied single-end FASTQ
5454
run: |
55-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --input 'data/testdata/Mammoth/fastq/*_R1_*.fq.gz' --single_end
55+
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --input 'data/testdata/Mammoth/fastq/*_R1_*.fq.gz' --single_end
5656
- name: BASIC Run the basic pipeline with directly supplied paired-end FASTQ
5757
run: |
58-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --input 'data/testdata/Mammoth/fastq/*_{R1,R2}_*tengrand.fq.gz'
58+
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --input 'data/testdata/Mammoth/fastq/*_{R1,R2}_*tengrand.fq.gz'
5959
- name: BASIC Run the basic pipeline with supplied --input BAM
6060
run: |
61-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --input 'data/testdata/Mammoth/bam/*_R1_*.bam' --bam --single_end
61+
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --input 'data/testdata/Mammoth/bam/*_R1_*.bam' --bam --single_end
6262
- name: BASIC Run the basic pipeline with the test profile with, PE/SE, bwa aln
6363
run: |
6464
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --save_reference
@@ -107,30 +107,30 @@ jobs:
107107
- name: BAM_FILTERING Run basic mapping pipeline with mapping quality filtering, and unmapped export
108108
run: |
109109
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_mapping_quality_threshold 37 --bam_discard_unmapped --bam_unmapped_type 'fastq'
110-
- name: DEDUPLICATION Test with markduplicates
110+
- name: DEDUPLICATION Test with dedup
111111
run: |
112-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --dedupper 'markduplicates'
112+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --dedupper 'dedup'
113113
- name: GENOTYPING_HC Test running GATK HaplotypeCaller
114114
run: |
115-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_fna,docker --dedupper 'dedup' --run_genotyping --genotyping_tool 'hc' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_hc_emitrefconf 'BP_RESOLUTION'
115+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_fna,docker --run_genotyping --genotyping_tool 'hc' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_hc_emitrefconf 'BP_RESOLUTION'
116116
- name: GENOTYPING_FB Test running FreeBayes
117117
run: |
118-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --dedupper 'dedup' --run_genotyping --genotyping_tool 'freebayes'
118+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'freebayes'
119119
- name: GENOTYPING_PC Test running pileupCaller
120120
run: |
121-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --dedupper 'dedup' --run_genotyping --genotyping_tool 'pileupcaller'
121+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --run_genotyping --genotyping_tool 'pileupcaller'
122122
- name: GENOTYPING_ANGSD Test running ANGSD genotype likelihood calculation
123123
run: |
124-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --dedupper 'dedup' --run_genotyping --genotyping_tool 'angsd'
124+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --run_genotyping --genotyping_tool 'angsd'
125125
- name: SKIPPING Test checking all skip steps work i.e. input bam, skipping straight to genotyping
126126
run: |
127127
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes'
128128
- name: TRIMBAM Test bamutils works alone
129129
run: |
130-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --dedupper 'dedup' --run_trim_bam
130+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_trim_bam
131131
- name: TRIMBAM Test PMDtools works alone
132132
run: |
133-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --dedupper 'dedup' --run_pmdtools
133+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_pmdtools
134134
- name: GATK 3.5 Download resource files
135135
run: |
136136
mkdir -p jars/gatk_3_5
@@ -140,13 +140,13 @@ jobs:
140140
GATK_JAR=$(readlink -f jars/gatk_3_5/GenomeAnalysisTK.jar)
141141
- name: GENOTYPING_UG AND MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
142142
run: |
143-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --dedupper 'dedup' --run_genotyping --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
143+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
144144
- name: COMPLEX LANE/LIBRARY MERGING Test running lane and library merging prior to GATK UnifiedGenotyper and running MultiVCFAnalyzer
145145
run: |
146-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --dedupper 'dedup' --run_genotyping --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
146+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
147147
- name: GENOTYPING_UG ON TRIMMED BAM Test
148148
run: |
149-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --dedupper 'dedup' --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
149+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --gatk_ug_jar '/home/runner/work/eager/eager/jars/gatk_3_5/GenomeAnalysisTK.jar' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
150150
- name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
151151
run: |
152152
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval --run_convertinputbam

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
2828
* [#504] Removed sexdeterrmine-snps plot from MultiQC report.
2929
* Nuclear contamination results are now shown in the MultiQC report.
3030
* Nuclear contamination is now reported with the correct library names.
31+
* Tutorial on how to use profiles for reproducible science (i.e. parameter sharing between different groups)
3132

3233
### `Fixed`
3334

@@ -43,6 +44,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
4344
* [#444](https://github.com/nf-core/eager/issues/444) - Provide option for preserving realigned bam + index
4445
* Increase MultiQC process memory requirements to ensure enough memory for large runs
4546
* Fixed deduplication output logic. Will now pass along only the post-rmdup bams if duplicate removal is not skipped, instead of both the post-rmdup and pre-rmdup bams.
47+
* [#497](https://github.com/nf-core/eager/issues/497) - Simplifies number of parameters required to run bam filtering
48+
* [#501](https://github.com/nf-core/eager/issues/501) - Adds additional validation checks for MALT/MaltExtract database input files
49+
* [#508](https://github.com/nf-core/eager/issues/508) - Made Markduplicates default dedupper due to narrower context specificity of dedup
50+
* [#516](https://github.com/nf-core/eager/issues/516) - Made bedtools not report out of memory exit code when warning of inconsistant FASTA/Bed entry names
4651

4752
### `Dependencies`
4853

@@ -60,6 +65,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
6065
* Latest version of xopen (0.9.0)
6166
* Added Bowtie 2 (2.4.1)
6267
* Latest version of Sex.DetERRmine (1.1.2)
68+
* Latest version of endorS.py (0.3)
6369

6470
## [2.1.0] - 2020-03-05 - "Ravensburg"
6571

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ By default the pipeline currently performs the following:
4545

4646
Additional functionality contained by the pipeline currently includes:
4747

48+
#### Input
49+
50+
* Automatic merging of complex sequencing setups (e.g. multiple lanes, sequencing configurations, library types)
51+
4852
#### Preprocessing
4953

5054
* Illumina two-coloured sequencer poly-G tail removal (`fastp`)

assets/dummy.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Empty dummy file for processes with optional input but channels still need to be filled.

assets/dummy_postfilterflagstat.stats

Lines changed: 0 additions & 1 deletion
This file was deleted.

assets/multiqc_config.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ top_modules:
9393
- 'mtnucratio'
9494
- 'qualimap'
9595
- 'sexdeterrmine'
96-
- 'gatk'
9796
- 'multivcfanalyzer':
9897
path_filters:
9998
- '*MultiVCFAnalyzer.json'

bin/endorS.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
'''))
2424
parser.add_argument('samtoolsfiles', metavar='<samplefile>.stats', type=str, nargs='+',
2525
help='output of samtools flagstat in a txt file (at least one required). If two files are supplied, the mapped reads of the second file is divided by the total reads in the first, since it assumes that the <samplefile.stats> are related to the same sample. Useful after BAM filtering')
26-
parser.add_argument('-v','--version', action='version', version='%(prog)s 0.2')
26+
parser.add_argument('-v','--version', action='version', version='%(prog)s 0.3')
2727
parser.add_argument('--output', '-o', nargs='?', help='specify a file format for an output file. Options: <json> for a MultiQC json output. Default: none')
2828
parser.add_argument('--name', '-n', nargs='?', help='specify name for the output file. Default: extracted from the first samtools flagstat file provided')
2929
args = parser.parse_args()
@@ -37,7 +37,14 @@
3737
#Extract number of mapped reads pre-quality filtering:
3838
mappedPre = float((re.findall(r'([0-9]+) \+ [0-9]+ mapped ',contentsPre))[0])
3939
#Calculation of endogenous DNA pre-quality filtering:
40-
endogenousPre = float("{0:.2f}".format(round((mappedPre / totalReads * 100), 2)))
40+
if totalReads == 0.0:
41+
endogenousPre = 0.000000
42+
print("WARNING: no reads in the fastq input, Endogenous DNA raw (%) set to 0.000000")
43+
elif mappedPre == 0.0:
44+
endogenousPre = 0.000000
45+
print("WARNING: no mapped reads, Endogenous DNA raw (%) set to 0.000000")
46+
else:
47+
endogenousPre = float("{0:.6f}".format(round((mappedPre / totalReads * 100), 6)))
4148
except:
4249
print("Incorrect input, please provide at least a samtools flag stats as input\nRun:\npython endorS.py --help \nfor more information on how to run this script")
4350
sys.exit()
@@ -49,7 +56,14 @@
4956
#Extract number of mapped reads post-quality filtering:
5057
mappedPost = float((re.findall(r'([0-9]+) \+ [0-9]+ mapped',contentsPost))[0])
5158
#Calculation of endogenous DNA post-quality filtering:
52-
endogenousPost = float("{0:.2f}".format(round((mappedPost / totalReads * 100),2)))
59+
if totalReads == 0.0:
60+
endogenousPost = 0.000000
61+
print("WARNING: no reads in the fastq input, Endogenous DNA modified (%) set to 0.000000")
62+
elif mappedPost == 0.0:
63+
endogenousPost = 0.000000
64+
print("WARNING: no mapped reads, Endogenous DNA modified (%) set to 0.000000")
65+
else:
66+
endogenousPost = float("{0:.6f}".format(round((mappedPost / totalReads * 100),6)))
5367
except:
5468
print("Only one samtools flagstat file provided")
5569
#Set the number of reads post-quality filtering to 0 if samtools
@@ -68,9 +82,10 @@
6882
if mappedPost == "NA":
6983
#Creating the json file
7084
jsonOutput={
85+
"id": "endorS.py ",
7186
"plot_type": "generalstats",
7287
"pconfig": {
73-
"endogenous_dna": { "max": 100, "min": 0, "title": "Endogenous DNA (%)"}
88+
"endogenous_dna": { "max": 100, "min": 0, "title": "Endogenous DNA (%)", "format": '{:,.2f}'}
7489
},
7590
"data": {
7691
name : { "endogenous_dna": endogenousPre}
@@ -79,14 +94,15 @@
7994
else:
8095
#Creating the json file
8196
jsonOutput={
97+
"id": "endorS.py ",
8298
"plot_type": "generalstats",
8399
"pconfig": {
84-
"endogenous_dna": { "max": 100, "min": 0, "title": "Endogenous DNA (%)"},
85-
"endogenous_dna_post": { "max": 100, "min": 0, "title": "Endogenous DNA Post (%)"}
100+
"endogenous_dna": { "max": 100, "min": 0, "title": "Endogenous DNA (%)", "format": '{:,.2f}'},
101+
"endogenous_dna_post": { "max": 100, "min": 0, "title": "Endogenous DNA Post (%)", "format": '{:,.2f}'}
86102
},
87103
"data": {
88104
name : { "endogenous_dna": endogenousPre, "endogenous_dna_post": endogenousPost}
89-
}
105+
},
90106
}
91107
#Checking for print to screen argument:
92108
if args.output is not None:

conf/base.config

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,22 +82,22 @@ process {
8282

8383
// Add 141 ignore due to unclean pipe closing by pmdtools https://github.com/pontussk/PMDtools/issues/7
8484
withName: pmdtools {
85-
errorStrategy = { task.exitStatus in [141] ? 'ignore' : 'retry' }
85+
errorStrategy = { task.exitStatus in [143,137,104,134,139,141] ? 'ignore' : 'retry' }
8686
}
8787

8888
// Add 1 retry for certain java tools as not enough heap space java errors gives exit code 1
8989
withName: dedup {
90-
errorStrategy = { task.exitStatus in [1] ? 'retry' : 'finish' }
90+
errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' }
9191
}
9292

9393
// Add 1 retry as not enough heapspace java error gives exit code 1
9494
withName: malt {
95-
errorStrategy = { task.exitStatus in [1] ? 'retry' : 'finish' }
95+
errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' }
9696
}
9797

9898
// other process specific exit statuses
9999
withName: nuclear_contamination {
100-
errorStrategy = { task.exitStatus in [134] ? 'ignore' : 'retry' }
100+
errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'ignore' : 'retry' }
101101
}
102102

103103
withName: multiqc {

conf/test.config

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ params {
1717
genome = false
1818
//Input data
1919
single_end = false
20-
readPaths = [['JK2782_TGGCCGATCAACGA_L008', ['https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/fastq/JK2782_TGGCCGATCAACGA_L008_R1_001.fastq.gz.tengrand.fq.gz','https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/fastq/JK2782_TGGCCGATCAACGA_L008_R2_001.fastq.gz.tengrand.fq.gz']],
21-
['JK2802_AGAATAACCTACCA_L008', ['https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/fastq/JK2802_AGAATAACCTACCA_L008_R1_001.fastq.gz.tengrand.fq.gz','https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/fastq/JK2802_AGAATAACCTACCA_L008_R2_001.fastq.gz.tengrand.fq.gz']],
22-
]
2320
// Genome references
2421
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Mammoth/Mammoth_MT_Krause.fasta'
2522
}

0 commit comments

Comments
 (0)