nf-core · apeltzer · Sep 30, 2020 · Jul 29, 2020 · Sep 14, 2020 · Sep 15, 2020
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -103,9 +103,9 @@ jobs:
       - name: MAPPER_BT2 Test running with BowTie2
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --mapper 'bowtie2' --bt2_alignmode 'local' --bt2_sensitivity 'sensitive' --bt2n 1 --bt2l 16 --bt2_trim5 1 --bt2_trim3 1
-      - name: STRIP_FASTQ Run the basic pipeline with output unmapped reads as fastq
+      - name: HOST_REMOVAL_FASTQ Run the basic pipeline with output unmapped reads as fastq
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --strip_input_fastq
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --hostremoval_input_fastq
       - name: BAM_FILTERING Run basic mapping pipeline with mapping quality filtering, and unmapped export
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_mapping_quality_threshold 37  --bam_unmapped_type 'fastq'

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -55,6 +55,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * [#516](https://github.com/nf-core/eager/issues/516) - Made bedtools not report out of memory exit code when warning of inconsistant FASTA/Bed entry names
 * [#504](https://github.com/nf-core/eager/issues/504) - Removed uninformative sexdeterrmine-snps plot from MultiQC report.
 * Nuclear contamination is now reported with the correct library names.
+* [#531](https://github.com/nf-core/eager/pull/531) - Renamed 'FASTQ stripping' to 'host removal'
 * Merged all tutorials and FAQs into `usage.md` for display on nf-co.re
 
 ### `Dependencies`

diff --git a/bin/extract_map_reads.py b/bin/extract_map_reads.py
@@ -36,8 +36,8 @@ def _get_args():
     parser.add_argument(
         '-m',
         dest='mode',
-        default='strip',
-        help='Read removal mode: remove reads (strip) or replace sequence by N (replace)'
+        default='remove',
+        help='Read removal mode: remove reads (remove) or replace sequence by N (replace)'
     )
     parser.add_argument(
         '-p',
@@ -179,27 +179,27 @@ def difference(list1, list2):
     return(fqd)
 
 
-def write_fq(fq_dict, fname, write_mode, strip_mode, proc):
+def write_fq(fq_dict, fname, write_mode, remove_mode, proc):
     """Write to fastq file
     Args:
         fq_dict(dict): fq_dict with unmapped read names as keys,
             unmapped/mapped (u|m), seq, and quality as values in a list
         fname(string) Path to output fastq file
         write_mode (str): 'rb' or 'r'
-        strip_mode (str): strip (remove read) or replace (replace read sequence) by Ns
+        remove_mode (str): remove (remove read) or replace (replace read sequence) by Ns
         proc(int) number of processes
     """
     fq_dict_keys = list(fq_dict.keys())
     if write_mode == 'wb':
         with xopen(fname, mode='wb', threads=proc) as fw:
             for fq_dict_key in fq_dict_keys:
                 wstring = ""
-                if strip_mode == 'strip':
+                if remove_mode == 'remove':
                     if fq_dict[fq_dict_key][0] == 'u':
                         wstring += f"@{fq_dict_key+fq_dict[fq_dict_key][1]}\n"
                         for i in fq_dict[fq_dict_key][2:]:
                             wstring += f"{i}\n"
-                elif strip_mode == 'replace':
+                elif remove_mode == 'replace':
                     # if unmapped, write all the read lines
                     if fq_dict[fq_dict_key][0] == 'u':
                         wstring += f"@{fq_dict_key+fq_dict[fq_dict_key][1]}\n"
@@ -217,12 +217,12 @@ def write_fq(fq_dict, fname, write_mode, strip_mode, proc):
         with open(fname, 'w') as fw:
             for fq_dict_key in fq_dict_keys:
                 wstring = ""
-                if strip_mode == 'strip':
+                if remove_mode == 'remove':
                     if fq_dict[fq_dict_key][0] == 'u':
                         wstring += f"@{fq_dict_key+fq_dict[fq_dict_key][1]}\n"
                         for i in fq_dict[fq_dict_key][2:]:
                             wstring += f"{i}\n"
-                elif strip_mode == 'replace':
+                elif remove_mode == 'replace':
                     # if unmapped, write all the read lines
                     if fq_dict[fq_dict_key][0] == 'u':
                         wstring += f"@{fq_dict_key+fq_dict[fq_dict_key][1]}\n"
@@ -238,8 +238,8 @@ def write_fq(fq_dict, fname, write_mode, strip_mode, proc):
                 fw.write(wstring)
 
 
-def check_strip_mode(mode):
-    if mode.lower() not in ['replace', 'strip']:
+def check_remove_mode(mode):
+    if mode.lower() not in ['replace', 'remove']:
         print(f"Mode must be {' or '.join(mode)}")
     return(mode.lower())
 
@@ -257,7 +257,7 @@ def check_strip_mode(mode):
     else:
         write_mode = "w"
 
-    strip_mode = check_strip_mode(MODE)
+    remove_mode = check_remove_mode(MODE)
     BAMFILE = pysam.AlignmentFile(BAM, 'r')
 
    # FORWARD OR SE FILE
@@ -270,7 +270,7 @@ def check_strip_mode(mode):
     # print(fq_dict_fwd)
     print(f"- Writing forward fq to {out_fwd}")
     write_fq(fq_dict=fq_dict_fwd, fname=out_fwd,
-             write_mode=write_mode, strip_mode=strip_mode, proc=PROC)
+             write_mode=write_mode, remove_mode=remove_mode, proc=PROC)
 
     # REVERSE FILE
     if IN_REV:
@@ -284,4 +284,4 @@ def check_strip_mode(mode):
         fq_dict_rev = get_mapped_reads(fqd_rev, mapped_reads)
         print(f"- Writing reverse fq to {out_rev}")
         write_fq(fq_dict=fq_dict_rev, fname=out_rev,
-                 write_mode=write_mode, strip_mode=strip_mode, proc=PROC)
+                 write_mode=write_mode, remove_mode=remove_mode, proc=PROC)
diff --git a/docs/usage.md b/docs/usage.md
@@ -62,6 +62,8 @@
       - [`--skip_deduplication`](#--skip_deduplication)
       - [`--skip_damage_calculation`](#--skip_damage_calculation)
       - [`--skip_qualimap`](#--skip_qualimap)
+    - [BAM Conversion Options](#bam-conversion-options)
+      - [`--run_convertinputbam`](#--run_convertinputbam)
     - [Complexity Filtering Options](#complexity-filtering-options)
       - [`--complexity_filter_poly_g`](#--complexity_filter_poly_g)
       - [`--complexity_filter_poly_g_min`](#--complexity_filter_poly_g_min)
@@ -106,14 +108,7 @@
     - [Library Complexity Estimation Parameters](#library-complexity-estimation-parameters)
       - [`--preseq_step_size`](#--preseq_step_size)
     - [DNA Damage Assessment Parameters](#dna-damage-assessment-parameters)
-      - [`--damageprofiler_length`](#--damageprofiler_length)
-      - [`--damageprofiler_threshold`](#--damageprofiler_threshold)
-      - [`--damageprofiler_yaxis`](#--damageprofiler_yaxis)
-      - [`--run_pmdtools`](#--run_pmdtools)
-      - [`--pmdtools_range`](#--pmdtools_range)
-      - [`--pmdtools_threshold`](#--pmdtools_threshold)
-      - [`--pmdtools_reference_mask`](#--pmdtools_reference_mask)
-      - [`--pmdtools_max_reads`](#--pmdtools_max_reads)
+      - [`--udg_type`](#--udg_type)
     - [Feature Annotation Statistics](#feature-annotation-statistics)
       - [`--run_bedtools_coverage`](#--run_bedtools_coverage)
       - [`--anno_file`](#--anno_file)
@@ -1303,7 +1298,7 @@ when left-over sequencing artefacts of in-line barcodes present Default: 0
 Number of bases to trim of 3' (right) end of read prior alignment. Maybe useful
 when left-over sequencing artefacts of in-line barcodes present Default: 0.
 
-### Mapped Reads Stripping
+### Removal of Host-Mapped Reads
 
 These parameters are used for removing mapped reads from the original input
 FASTQ files, usually in the context of uploading the original FASTQ files to a
@@ -1318,17 +1313,16 @@ your data to apply their own adapter removal/read merging procedures, while
 maintaining anonyminity for sample donors - for example with microbiome
 research.
 
-If using TSV input, stripping is performed library, i.e. after lane merging.
+If using TSV input, mapped read removal is performed per library, i.e. after lane merging.
 
-#### `--strip_input_fastq`
+#### `--hostremoval_input_fastq`
 
 Create pre-Adapter Removal FASTQ files without reads that mapped to reference
 (e.g. for public upload of privacy sensitive non-host data)
 
-#### `--strip_mode`
+#### `--hostremoval_mode`
 
-Read removal mode. Strip mapped reads completely (`'strip'`) or just replace
-mapped reads sequence by N (`'replace'`)
+Read removal mode. Completely remove mapped reads from the file(s) (`'remove'`) or just replace mapped reads sequence by N (`'replace'`)
 
 ### Read Filtering and Conversion Parameters
 

diff --git a/main.nf b/main.nf
@@ -95,9 +95,9 @@ def helpMessage() {
       --bt2_trim5 [num]          Specify number of bases to trim off from 5' (left) end of read before alignment. Default: ${params.bt2_trim5}
       --bt2_trim3 [num]          Specify number of bases to trim off from 3' (right) end of read before alignment. Default: ${params.bt2_trim3}
 
-    Stripping
-      --strip_input_fastq [bool]  Turn on creation of per-library pre-Adapter Removal FASTQ files without reads that mapped to reference (e.g. for public upload of privacy sensitive non-host data).
-      --strip_mode [str]          Stripping mode. Remove mapped reads completely from FASTQ (strip) or just mask mapped reads sequence by N (replace). Default: '${params.strip_mode}'
+    Host removal
+      --hostremoval_input_fastq [bool]    Turn on creating pre-Adapter Removal FASTQ files without reads that mapped to reference (e.g. for public upload of privacy sensitive non-host data)
+      --hostremoval_mode [str]            Host DNA Removal mode. Remove mapped reads completely from FASTQ (remove) or just mask mapped reads sequence by N (replace). Default: '${params.hostremoval_mode}'
 
     BAM Filtering
       --run_bam_filtering [bool]               Turn on filtering of mapping quality, read lengths, or unmapped reads of BAM files.
@@ -359,10 +359,10 @@ if (!has_extension(params.input, "tsv") && params.skip_collapse  && params.singl
     exit 1, "[nf-core/eager] error: --skip_collapse can only be set for paired_end samples."
 }
 
-// Strip mode validation
-if (params.strip_input_fastq){
-    if (!(['strip','replace'].contains(params.strip_mode))) {
-        exit 1, "[nf-core/eager] error: --strip_mode can only be set to strip or replace."
+// Host removal mode validation
+if (params.hostremoval_input_fastq){
+    if (!(['remove','replace'].contains(params.hostremoval_mode))) {
+        exit 1, "[nf-core/eager] error: --hostremoval_mode can only be set to remove or replace."
     }
 }
 
@@ -654,9 +654,9 @@ ch_input_for_convertbam = Channel.empty()
 ch_bam_channel
   .into { ch_input_for_convertbam; ch_input_for_indexbam; }
 
-// Also need to send raw files for lane merging, if we want to strip fastq
+// Also need to send raw files for lane merging, if we want to host removed fastq
 ch_fastq_channel
-  .into { ch_input_for_skipconvertbam; ch_input_for_lanemerge_stripfastq }
+  .into { ch_input_for_skipconvertbam; ch_input_for_lanemerge_hostremovalfastq }
 
 ///////////////////////////////////////////////////
 /* --             HEADER LOG INFO             -- */
@@ -683,9 +683,9 @@ summary['Running BAM filtering'] = params.run_bam_filtering ? 'Yes' : 'No'
 if (params.run_bam_filtering) {
   summary['Skip Read Merging'] = params.bam_unmapped_type
 }
-summary['Run Fastq Stripping'] = params.strip_input_fastq ? 'Yes' : 'No'
-if (params.strip_input_fastq){
-    summary['Strip mode'] = params.strip_mode
+summary['Run Fastq Host Removal'] = params.hostremoval_input_fastq ? 'Yes' : 'No'
+if (params.hostremoval_input_fastq){
+    summary['Host removal mode'] = params.hostremoval_mode
 }
 summary['Skipping Preseq?'] = params.skip_preseq ? 'Yes' : 'No'
 summary['Skipping Deduplication?'] = params.skip_deduplication ? 'Yes' : 'No'
@@ -1330,21 +1330,21 @@ if ( ( params.skip_collapse || params.skip_adapterremoval ) ) {
     .into { ch_lanemerge_for_skipmap; ch_lanemerge_for_bwa; ch_lanemerge_for_cm; ch_lanemerge_for_bwamem; ch_lanemerge_for_bt2 }
 }
 
-// ENA upload doesn't do separate lanes, so merge raw FASTQs for mapped-reads stripping 
+// ENA upload doesn't do separate lanes, so merge raw FASTQs for mapped-reads removal 
 
 // Per-library lane grouping done within process
-process lanemerge_stripfastq {
+process lanemerge_hostremoval_fastq {
   label 'sc_tiny'
   tag "${libraryid}"
 
   when: 
-  params.strip_input_fastq
+  params.hostremoval_input_fastq
 
   input:
-  tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_input_for_lanemerge_stripfastq.groupTuple(by: [0,1,3,4,5,6,7])
+  tuple samplename, libraryid, lane, colour, seqtype, organism, strandedness, udg, file(r1), file(r2) from ch_input_for_lanemerge_hostremovalfastq.groupTuple(by: [0,1,3,4,5,6,7])
 
   output:
-  tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.fq.gz") into ch_fastqlanemerge_for_stripfastq
+  tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, file("*.fq.gz") into ch_fastqlanemerge_for_hostremovalfastq
 
   script:
   if ( seqtype == 'PE' ){
@@ -1620,10 +1620,10 @@ process bowtie2 {
 
 // Gather all mapped BAMs from all possible mappers into common channels to send downstream
 ch_output_from_bwa.mix(ch_output_from_bwamem, ch_output_from_cm, ch_indexbam_for_filtering, ch_output_from_bt2)
-  .into { ch_mapping_for_stripfastq; ch_mapping_for_seqtype_merging }
+  .into { ch_mapping_for_hostremovalfastq; ch_mapping_for_seqtype_merging }
 
 // Synchronise the mapped input FASTQ and input non-remapped BAM channels
-ch_fastqlanemerge_for_stripfastq
+ch_fastqlanemerge_for_hostremovalfastq
     .map {
         def samplename = it[0]
         def libraryid  = it[1]
@@ -1638,7 +1638,7 @@ ch_fastqlanemerge_for_stripfastq
         [ samplename, libraryid, lane, seqtype, organism, strandedness, udg, r1, r2 ]
 
     }
-    .mix(ch_mapping_for_stripfastq)
+    .mix(ch_mapping_for_hostremovalfastq)
     .groupTuple(by: [0,1,3,4,5,6])
     .map {
         def samplename = it[0]
@@ -1657,38 +1657,37 @@ ch_fastqlanemerge_for_stripfastq
 
     }
     .filter{ it[8] != null }
-    .dump(tag: "StripFastq Input")
-    .set { ch_synced_for_stripfastq }
+    .set { ch_synced_for_hostremovalfastq }
 
 // Remove mapped reads from original (lane merged) input FASTQ e.g. for sensitive host data when running metagenomic data
 
-process strip_input_fastq {
+process hostremoval_input_fastq {
     label 'mc_medium'
     tag "${libraryid}"
-    publishDir "${params.outdir}/stripped_fastq", mode: params.publish_dir_mode
+    publishDir "${params.outdir}/hostremoved_fastq", mode: params.publish_dir_mode
 
     when: 
-    params.strip_input_fastq
+    params.hostremoval_input_fastq
 
     input: 
-    tuple samplename, libraryid, seqtype, organism, strandedness, udg, path(r1), path(r2), file(bam), file(bai) from ch_synced_for_stripfastq
+    tuple samplename, libraryid, seqtype, organism, strandedness, udg, file(r1), file(r2), file(bam), file(bai) from ch_synced_for_hostremovalfastq
 
     output:
-    tuple samplename, libraryid, seqtype, organism, strandedness, udg, file("*.fq.gz") into ch_output_from_stripfastq
+    tuple samplename, libraryid, seqtype, organism, strandedness, udg, file("*.fq.gz") into ch_output_from_hostremovalfastq
 
     script:
     if ( seqtype == 'SE' ) {
-        out_fwd = bam.baseName+'.stripped.fq.gz'
+        out_fwd = bam.baseName+'.hostremoved.fq.gz'
         """
         samtools index $bam
-        extract_map_reads.py $bam ${r1} -m ${params.strip_mode} -of $out_fwd -p ${task.cpus}
+        extract_map_reads.py $bam ${r1} -m ${params.hostremoval_mode} -of $out_fwd -p ${task.cpus}
         """
     } else {
-        out_fwd = bam.baseName+'.stripped.fwd.fq.gz'
-        out_rev = bam.baseName+'.stripped.rev.fq.gz'
+        out_fwd = bam.baseName+'.hostremoved.fwd.fq.gz'
+        out_rev = bam.baseName+'.hostremoved.rev.fq.gz'
         """
         samtools index $bam
-        extract_map_reads.py $bam ${r1} -rev ${r2} -m  ${params.strip_mode} -of $out_fwd -or $out_rev -p ${task.cpus}
+        extract_map_reads.py $bam ${r1} -rev ${r2} -m  ${params.hostremoval_mode} -of $out_fwd -or $out_rev -p ${task.cpus}
         """ 
     }
 

diff --git a/nextflow.config b/nextflow.config
@@ -77,9 +77,9 @@ params {
   bt2_trim5 = 0
   bt2_trim3 = 0
 
-  //Mapped read stripping from input FASTQ
-  strip_input_fastq = false
-  strip_mode = 'strip'
+  //Mapped read removal from input FASTQ
+  hostremoval_input_fastq = false
+  hostremoval_mode = 'remove'
 
   //BAM Filtering steps (default = discard unmapped reads)
   run_bam_filtering = false
@@ -117,7 +117,7 @@ params {
   bamutils_clip_half_udg_right = 1 
   bamutils_clip_none_udg_left = 1 
   bamutils_clip_none_udg_right = 1 
-  bamutils_softclip = false 
+  bamutils_softclip = false
 
   //Genotyping options
   run_genotyping = false

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -570,24 +570,24 @@
             },
             "fa_icon": "fas fa-layer-group"
         },
-        "stripping": {
-            "title": "Stripping",
+        "host_removal": {
+            "title": "Removal of Host-Mapped Reads",
             "type": "object",
             "description": "Options for production of host-read removed FASTQ files for privacy reasons.",
             "default": "",
             "properties": {
-                "strip_input_fastq": {
+                "hostremoval_input_fastq": {
                     "type": "boolean",
                     "description": "Turn on per-library creation pre-Adapter Removal FASTQ files without reads that mapped to reference (e.g. for public upload of privacy sensitive non-host data)",
                     "fa_icon": "fas fa-power-off",
                     "help_text": "Create pre-Adapter Removal FASTQ files without reads that mapped to reference (e.g. for public upload of privacy sensitive non-host data)\n"
                 },
-                "strip_mode": {
+                "hostremoval_mode": {
                     "type": "string",
-                    "default": "strip",
-                    "description": "Stripping mode. Remove mapped reads completely from FASTQ (strip) or just mask mapped reads sequence by N (replace).",
+                    "default": "remove",
+                    "description": "Host removal mode. Remove mapped reads completely from FASTQ (remove) or just mask mapped reads sequence by N (replace).",
                     "fa_icon": "fas fa-mask",
-                    "help_text": "Read removal mode. Strip mapped reads completely (`'strip'`) or just replace mapped reads sequence by N (`'replace'`)\n"
+                    "help_text": "Read removal mode. Remove mapped reads completely (`'remove'`) or just replace mapped reads sequence by N (`'replace'`)\n"
                 }
             },
             "fa_icon": "fas fa-user-shield"
@@ -1374,7 +1374,7 @@
             "$ref": "#/definitions/mapping"
         },
         {
-            "$ref": "#/definitions/stripping"
+            "$ref": "#/definitions/host_removal"
         },
         {
             "$ref": "#/definitions/bam_filtering"
@@ -1419,4 +1419,4 @@
             "$ref": "#/definitions/metagenomic_authentication"
         }
     ]
-}
+}