forked from nf-core/rnaseq
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.nf
More file actions
89 lines (77 loc) · 3.7 KB
/
main.nf
File metadata and controls
89 lines (77 loc) · 3.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
nextflow.preview.types = true
record RsemMergedResult {
counts_gene: Path
tpm_gene: Path
counts_transcript: Path
tpm_transcript: Path
genes_long: Path
isoforms_long: Path
}
process RSEM_MERGE_COUNTS {
label "process_medium"
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'nf-core/ubuntu:20.04' }"
input:
genes: Path // path ('genes/*')
isoforms: Path // path ('isoforms/*')
output:
record(
counts_gene: file("rsem.merged.gene_counts.tsv"),
tpm_gene: file("rsem.merged.gene_tpm.tsv"),
counts_transcript: file("rsem.merged.transcript_counts.tsv"),
tpm_transcript: file("rsem.merged.transcript_tpm.tsv"),
genes_long: file("rsem.merged.genes_long.tsv"),
isoforms_long: file("rsem.merged.isoforms_long.tsv")
)
tuple val("${task.process}"), val('sed'), eval('echo $(sed --version 2>&1) | sed "s/^.*GNU sed) //; s/ .*$//"'), topic: versions
when:
task.ext.when == null || task.ext.when
script:
"""
mkdir -p tmp/genes
cut -f 1,2 `ls ./genes/* | head -n 1` > gene_ids.txt
for fileid in `ls ./genes/*`; do
samplename=`basename \$fileid | sed s/\\.genes.results\$//g`
echo \$samplename > tmp/genes/\${samplename}.counts.txt
cut -f 5 \${fileid} | tail -n+2 >> tmp/genes/\${samplename}.counts.txt
echo \$samplename > tmp/genes/\${samplename}.tpm.txt
cut -f 6 \${fileid} | tail -n+2 >> tmp/genes/\${samplename}.tpm.txt
done
mkdir -p tmp/isoforms
cut -f 1,2 `ls ./isoforms/* | head -n 1` > transcript_ids.txt
for fileid in `ls ./isoforms/*`; do
samplename=`basename \$fileid | sed s/\\.isoforms.results\$//g`
echo \$samplename > tmp/isoforms/\${samplename}.counts.txt
cut -f 5 \${fileid} | tail -n+2 >> tmp/isoforms/\${samplename}.counts.txt
echo \$samplename > tmp/isoforms/\${samplename}.tpm.txt
cut -f 6 \${fileid} | tail -n+2 >> tmp/isoforms/\${samplename}.tpm.txt
done
paste gene_ids.txt tmp/genes/*.counts.txt > rsem.merged.gene_counts.tsv
paste gene_ids.txt tmp/genes/*.tpm.txt > rsem.merged.gene_tpm.tsv
paste transcript_ids.txt tmp/isoforms/*.counts.txt > rsem.merged.transcript_counts.tsv
paste transcript_ids.txt tmp/isoforms/*.tpm.txt > rsem.merged.transcript_tpm.tsv
# Create long format for genes (idx=1-4, concat columns 5-7)
echo -e "sample_name\tgene_id\ttranscript_id(s)\tlength\teffective_length\texpected_count\tTPM\tFPKM" > rsem.merged.genes_long.tsv
for fileid in `ls ./genes/*`; do
samplename=`basename \$fileid | sed s/\\.genes.results\$//g`
tail -n+2 \$fileid | awk -v sample=\$samplename 'BEGIN{OFS="\t"}{print sample,\$1,\$2,\$3,\$4,\$5,\$6,\$7}' >> rsem.merged.genes_long.tsv
done
# Create long format for isoforms (idx=1-4, concat columns 5-8)
echo -e "sample_name\ttranscript_id\tgene_id\tlength\teffective_length\texpected_count\tTPM\tFPKM\tIsoPct" > rsem.merged.isoforms_long.tsv
for fileid in `ls ./isoforms/*`; do
samplename=`basename \$fileid | sed s/\\.isoforms.results\$//g`
tail -n+2 \$fileid | awk -v sample=\$samplename 'BEGIN{OFS="\t"}{print sample,\$1,\$2,\$3,\$4,\$5,\$6,\$7,\$8}' >> rsem.merged.isoforms_long.tsv
done
"""
stub:
"""
touch rsem.merged.gene_counts.tsv
touch rsem.merged.gene_tpm.tsv
touch rsem.merged.transcript_counts.tsv
touch rsem.merged.transcript_tpm.tsv
touch rsem.merged.genes_long.tsv
touch rsem.merged.isoforms_long.tsv
"""
}