-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreferences.yml
More file actions
122 lines (114 loc) · 3.28 KB
/
references.yml
File metadata and controls
122 lines (114 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
references:
# Human reference used for read and contig filtering.
hg38g:
- url: http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/gencode.v38.annotation.gtf.gz
type: gtf
- url: http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/GRCh38.primary_assembly.genome.fa.gz
type: fasta
- url: http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/gencode.v38.transcripts.fa.gz
type: tx.fasta
- url: https://sourceforge.net/projects/rseqc/files/BED/Human_Homo_sapiens/hg38_GENCODE.v38.bed.gz
type: bed
- type: pipeline
stages:
- index_star
- type: pipeline
stages:
- index_rsem
- type: pipeline
stages:
- index_salmon_decoyG
- type: pipeline
stages:
- index_bowtie2
- type: pipeline
stages:
- index_blast
- type: pipeline
stages:
- index_fasta
- type: pipeline
stages:
- index_tx_fasta
# Human reference for test data (contains only start of chr1)
hg38gtest:
- type: gtf
url: test_data/test.gtf.gz
- type: fasta
url: test_data/test.fasta.gz
- type: tx.fasta
url: test_data/test.tx.fasta.gz
- type: pipeline
stages:
- index_star
- type: pipeline
stages:
- index_rsem
- type: pipeline
stages:
- index_salmonG
- type: pipeline
stages:
- index_bowtie2
- type: pipeline
stages:
- index_blast
mm39g:
- url: http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M27/gencode.vM27.annotation.gtf.gz
type: gtf
- url: http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M27/GRCm39.primary_assembly.genome.fa.gz
type: fasta
- url: http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M27/gencode.vM27.transcripts.fa.gz
type: tx.fasta
- type: pipeline
stages:
- index_star
- type: pipeline
stages:
- index_rsem
- type: pipeline
stages:
- index_salmon_decoyG
- type: pipeline
stages:
- index_bowtie2
- type: pipeline
stages:
- index_blast
- type: pipeline
stages:
- index_fasta
- type: pipeline
stages:
- index_tx_fasta
# NCBI Nucleotide database for Blast
NT:
- url: !workdir databases/nt
type: path
match:
- (?P<sample>[^.]+)\.((nal|not|nto|ntf|nos|ndb)|[0-9]+\.(nin|nhr|nsq|nsd|nog))
- (?P<sample>[^.]+)\.(ndb|nhr|nin|nog|nos|not|nsq|ntf|nto)
# Subset of NT for testing
NT_test:
- url: test_data/nt_test.tar.bz2
type: dir
files: [nt.ndb, nt.nhr, nt.nin, nt.nog, nt.nos, nt.not, nt.nsq, nt.ntf, nt.nto]
id: nt
# NCBI Taxonomy dump for classification
NcbiTaxonomy:
- url: http://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz
type: archive
files:
ALL.NCBI.nodes.dmp: nodes.dmp
ALL.NCBI.names.dmp: names.dmp
ALL.NCBI.merged.dmp: merged.dmp
- type: pipeline
stages:
- index_taxonomy_vp
# VirusHostDB from genome.jp
VHDB:
- url: https://www.genome.jp/ftp/db/virushostdb/
# or use ftp: - url: ftp://www.genome.jp/db/virushostdb/
type: localdir
files:
virushostdb.tsv: virushostdb.tsv