-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathsimulate_sample.R
More file actions
27 lines (23 loc) · 980 Bytes
/
simulate_sample.R
File metadata and controls
27 lines (23 loc) · 980 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
library(polyester)
library(Biostrings)
# get empirical counts (generated by Salmon)
empirical_counts = read.table('data/quant_bias_corrected.sf',
sep="\t",
stringsAsFactors=F,
row.names=1)
colnames(empirical_counts) = c("Length", "TPM", "FPKM", "NumReads")
# get transcript order from reference
transcripts = readDNAStringSet("data/select_transcripts.fa")
transcript_names = names(transcripts)
transcript_ids = unlist(lapply(transcript_names, function(x) { substr(x, 1, 15) }))
#empirical counts in reference order
count_mat = round(empirical_counts[transcript_ids,]$NumReads)
#simulate RNA-Seq samples
simulate_experiment_countmat(fasta="data/select_transcripts.fa",
num_reps=1,
distr='empirical',
readlen=100,
readmat=matrix(count_mat, ncol=2, nrow=length(count_mat)),
paired=TRUE,
error_model='illumina5',
outdir="data")