Skip to content

Commit e53bbae

Browse files
authored
Merge pull request #257 from qbic-pipelines/dev
Release 2.5
2 parents ffbf84a + 64d0e3d commit e53bbae

14 files changed

Lines changed: 124 additions & 47 deletions

File tree

.github/workflows/ci.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,14 @@ jobs:
3939
environment.yml
4040
- name: Build new docker image
4141
if: env.MATCHED_FILES
42-
run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.4
42+
run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.5
4343

4444
# Change the version above and the third version below before/after release
4545
- name: Pull docker image
4646
if: ${{ !env.MATCHED_FILES }}
4747
run: |
4848
docker pull ghcr.io/qbic-pipelines/rnadeseq:dev
49-
docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.4
49+
docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.5
5050
5151
- name: Install Nextflow
5252
uses: nf-core/setup-nextflow@v1
@@ -93,14 +93,14 @@ jobs:
9393
environment.yml
9494
- name: Build new docker image
9595
if: env.MATCHED_FILES
96-
run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.4
96+
run: docker build --no-cache . -t ghcr.io/qbic-pipelines/rnadeseq:2.5
9797

9898
# Change the version above and the third version below before/after release
9999
- name: Pull docker image
100100
if: ${{ !env.MATCHED_FILES }}
101101
run: |
102102
docker pull ghcr.io/qbic-pipelines/rnadeseq:dev
103-
docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.4
103+
docker tag ghcr.io/qbic-pipelines/rnadeseq:dev ghcr.io/qbic-pipelines/rnadeseq:2.5
104104
105105
- name: Install Nextflow
106106
uses: nf-core/setup-nextflow@v1
@@ -129,7 +129,7 @@ jobs:
129129
130130
- name: Upload logs on failure
131131
if: failure()
132-
uses: actions/upload-artifact@v2
132+
uses: actions/upload-artifact@v4
133133
with:
134134
name: logs-${{ matrix.profile }}
135135
path: |

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,31 @@
33
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
44
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
55

6+
## 2.5 - The Potato Eaters
7+
8+
### Added
9+
10+
- [#256](https://github.com/qbic-pipelines/rnadeseq/pull/256) Add trycatch to pathway enrichment plots so they are skipped when too large instead of throwing an error
11+
- [#255](https://github.com/qbic-pipelines/rnadeseq/pull/255) Add usage docu for datasources, heatmaps_cluster_rows/cols and pathway_adj_pval_threshold params
12+
- [#251](https://github.com/qbic-pipelines/rnadeseq/pull/251) Get raw gene count tables from either Salmon and RSEM analysis
13+
- [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Added clearer error message for incorrect contrast_pairs
14+
15+
### Changed
16+
17+
- [#260](https://github.com/qbic-pipelines/rnadeseq/pull/260) Release 2.5
18+
- [#259](https://github.com/qbic-pipelines/rnadeseq/pull/259) Bump versions for release 2.5
19+
20+
### Fixed
21+
22+
- [#258](https://github.com/qbic-pipelines/rnadeseq/pull/258) Fixed some comments for release (removed excess checks for pathway_adj_pval_threshold, added default explanation of that param to Execute_report.R, fixed some whitespace)
23+
- [#252](https://github.com/qbic-pipelines/rnadeseq/pull/252) Fixed github CI bug by updating actions/upload-artifact
24+
- [#250](https://github.com/qbic-pipelines/rnadeseq/pull/250) Fixed incorrect reading and indexing of contrast_pairs
25+
626
## 2.4 - A Pair of Shoes
727

828
### Added
929

30+
- [#253](https://github.com/qbic-pipelines/rnadeseq/pull/253) Added separate param for adjusted p-value threshold for gprofiler
1031
- [#245](https://github.com/qbic-pipelines/rnadeseq/pull/245) Added background gene list to pathway analysis output
1132

1233
### Changed

Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ LABEL org.opencontainers.image.authors="Gisela Gabernet, Alexander Peltzer, Oska
55
LABEL org.opencontainers.image.licenses=MIT
66
COPY environment.yml /
77
#RUN conda install -c conda-forge mamba
8-
RUN mamba env create --file /environment.yml -p /opt/conda/envs/qbic-pipelines-rnadeseq-2.4 && \
8+
RUN mamba env create --file /environment.yml -p /opt/conda/envs/qbic-pipelines-rnadeseq-2.5 && \
99
mamba clean --all --yes
1010
RUN apt-get update -qq && \
1111
apt-get install -y zip procps ghostscript
1212
# Add conda installation dir to PATH
13-
ENV PATH /opt/conda/envs/qbic-pipelines-rnadeseq-2.4/bin:$PATH
13+
ENV PATH /opt/conda/envs/qbic-pipelines-rnadeseq-2.5/bin:$PATH
1414
# Dump the details of the installed packates to a file for posterity
15-
RUN mamba env export --name qbic-pipelines-rnadeseq-2.4 > qbic-pipelines-rnadeseq-2.4.yml
15+
RUN mamba env export --name qbic-pipelines-rnadeseq-2.5 > qbic-pipelines-rnadeseq-2.5.yml
1616
# Instruct R processes to use these empty files instead of clashing with a local config
1717
RUN touch .Rprofile
1818
RUN touch .Renviron

assets/RNAseq_report.Rmd

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ params:
4646
datasources: ''
4747
heatmaps_cluster_rows: ''
4848
heatmaps_cluster_cols: ''
49+
pathway_adj_pval_threshold: ''
4950

5051
#Additional args for the report
5152
path_proj_summary: ''
@@ -456,7 +457,7 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) {
456457
457458
# Write raw counts to file
458459
count_table_names <- merge(x=gene_names, y=count.table, by.x = "Ensembl_ID", by.y="row.names")
459-
write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv",sep=""), append = FALSE, quote = FALSE, sep = "\t",eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double"))
460+
write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv", sep=""), append = FALSE, quote = FALSE, sep = "\t", eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double"))
460461
}
461462
462463
# to get all possible pairwise comparisons, make a combined factor
@@ -591,6 +592,10 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) {
591592
592593
#dds from SummarizedExperiment <se>, then run DESeq
593594
cds <- DESeqDataSet(se, design = as.formula(eval(parse(text=as.character(design[[1]])))))
595+
# get raw counts
596+
count_table_names <- merge(x=gene_names, y=assay(cds), by.x = "Ensembl_ID", by.y="row.names")
597+
count_table_names <- count_table_names[order(count_table_names$Ensembl_ID),]
598+
write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv", sep=""), append = FALSE, quote = FALSE, sep = "\t", eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double"))
594599
595600
# Load salmon count files
596601
} else if (params$input_type == "salmon") {
@@ -624,6 +629,10 @@ if (params$input_type %in% c("featurecounts", "smrnaseq")) {
624629
coldata$combfactor <- metadata$combfactor
625630
rownames(coldata) <- qbicCodes
626631
cds <- DESeqDataSetFromTximport(txi=txi.salmon, colData =coldata, design = eval(parse(text=as.character(design[[1]]))))
632+
# get raw counts
633+
count_table_names <- merge(x=gene_names, y=assay(cds), by.x = "Ensembl_ID", by.y="row.names")
634+
count_table_names <- count_table_names[order(count_table_names$Ensembl_ID),]
635+
write.table(count_table_names, paste("differential_gene_expression/gene_counts_tables/raw_gene_counts.tsv", sep=""), append = FALSE, quote = FALSE, sep = "\t", eol = "\n", na = "NA", dec = ".", row.names = F, qmethod = c("escape", "double"))
627636
}
628637
} else {
629638
stop(paste0("Invalid input type: ", params$input_type, "! Input type must be one of: featurecounts, rsem, salmon, smrnaseq!"))
@@ -1526,19 +1535,21 @@ if (isProvided(params$path_contrast_list)) {
15261535
contrast_names <- append(contrast_names, contname)
15271536
}
15281537
}
1529-
15301538
if (isProvided(params$path_contrast_pairs)) {
1531-
contrasts <- read.table(path_contrast_pairs, sep="\t", header = T, colClasses = "character")
1539+
contrasts <- read.table(params$path_contrast_pairs, sep="\t", header = T, colClasses = "character")
15321540
write.table(contrasts, file="differential_gene_expression/metadata/contrast_pairs.tsv", sep="\t", quote=F, col.names = T, row.names = F)
15331541
15341542
# Contrast calculation for contrast pairs
15351543
for (i in c(1:nrow(contrasts))) {
15361544
cont <- as.character(contrasts[i,])
1537-
contname <- cont[0]
1538-
if (!(cont[2] %in% coefficients & cont[3] %in% coefficients)){
1539-
stop(paste("Provided contrast name is invalid, it needs to be contained in", coefficients))
1545+
contname <- cont[1]
1546+
if (!(cont[2] %in% coefficients)){
1547+
stop(paste0("Provided contrast name ", cont[2], " is invalid, it needs to be contained in ", paste(coefficients, collapse=", ")))
1548+
}
1549+
if (!(cont[3] %in% coefficients)){
1550+
stop(paste0("Provided contrast name ", cont[3], " is invalid, it needs to be contained in ", paste(coefficients, collapse=", ")))
15401551
}
1541-
results_DEseq_contrast <- results(cds, contrast=list(cont[1],cont[2]))
1552+
results_DEseq_contrast <- results(cds, contrast=list(cont[2],cont[3]))
15421553
results_DEseq_contrast <- as.data.frame(results_DEseq_contrast)
15431554
print("Analyzing contrast:")
15441555
print(contname)
@@ -2056,6 +2067,7 @@ if (!isProvided(params$datasources)) {
20562067
# ------------------
20572068
# Set default params
20582069
# ------------------
2070+
pathway_pval_text <- format(params$pathway_adj_pval_threshold, scientific=F)
20592071
20602072
# Set theme for graphs
20612073
theme_set(theme_classic())
@@ -2094,7 +2106,7 @@ for (file in contrast_files){
20942106
correction_method="fdr",
20952107
sources=datasources,
20962108
evcodes=TRUE,
2097-
user_threshold=params$adj_pval_threshold,
2109+
user_threshold=params$pathway_adj_pval_threshold,
20982110
custom_bg=custom_background,
20992111
domain_scope="custom_annotated"
21002112
)
@@ -2110,7 +2122,7 @@ for (file in contrast_files){
21102122
correction_method="fdr",
21112123
sources=datasources,
21122124
evcodes=TRUE,
2113-
user_threshold=params$adj_pval_threshold,
2125+
user_threshold=params$pathway_adj_pval_threshold,
21142126
domain_scope="annotated"
21152127
)
21162128
pathway_gostres_nobg <- gostres_nobg$result
@@ -2124,7 +2136,7 @@ for (file in contrast_files){
21242136
correction_method="fdr",
21252137
sources=datasources,
21262138
evcodes=TRUE,
2127-
user_threshold=params$adj_pval_threshold,
2139+
user_threshold=params$pathway_adj_pval_threshold,
21282140
domain_scope="annotated"
21292141
)
21302142
}
@@ -2214,9 +2226,32 @@ for (file in contrast_files){
22142226
scale_fill_continuous(high = "#132B43", low = "#56B1F7") +
22152227
ggtitle("Enriched pathways") +
22162228
xlab("") + ylab("Gene fraction (DE genes / Pathway size)")
2217-
ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.pdf"), device = "pdf", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F)
2218-
ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.png"), device = "png", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F)
2219-
ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.svg"), device = "svg", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F)
2229+
2230+
# If the plots are huge ggsave will throw an error even if limitsize=T, so I'm leaving limitsize=F and instead using trycatch
2231+
tryCatch(
2232+
{
2233+
ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.pdf"), device = "pdf", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F)
2234+
},
2235+
error=function(e) {
2236+
print(paste0("Could not save pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.pdf because of the following error:\n", e))
2237+
}
2238+
)
2239+
tryCatch(
2240+
{
2241+
ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.png"), device = "png", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F)
2242+
},
2243+
error=function(e) {
2244+
print(paste0("Could not save pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.png because of the following error:\n", e))
2245+
}
2246+
)
2247+
tryCatch(
2248+
{
2249+
ggsave(p, filename = paste0("pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.svg"), device = "svg", height = 5+0.5*nrow(df_subset), units = "cm", limitsize=F)
2250+
},
2251+
error=function(e) {
2252+
print(paste0("Could not save pathway_analysis", "/", fname, "/enrichment_plots/", make.names(db_source), "_pathway_enrichment_plot.svg because of the following error:\n", e))
2253+
}
2254+
)
22202255
22212256
# Plotting heatmaps and KEGG pathways for all pathways
22222257
print("Plotting heatmaps...")
@@ -2399,7 +2434,7 @@ Inside the pathway analysis results folder, a subfolder for each contrast used f
23992434
- `*_gost_pathway_venn_diagram.pdf/png`
24002435
- Venn diagrams showing the numbers of enriched pathways when using a background gene list vs when not using a bg list.
24012436
- `enrichment_plots/*_pathway_enrichment_plot.{pdf/png/svg}`
2402-
- Barplots showing the proportion of differentially expressed genes in the pathway for a certain pathway database.
2437+
- Barplots showing the proportion of differentially expressed genes in the pathway for a certain pathway database (might be missing if too many pathways were enriched for fitting into a plot).
24032438
- `gost_pathway_gostplot.{pdf/png/svg}`
24042439
- Manhattan plots displaying all enriched pathways.
24052440
- `KEGG_pathways/`
@@ -2411,7 +2446,7 @@ gost_text,
24112446
24122447
"\n
24132448
## Enriched pathways
2414-
The plot below summarizes the pathways that were found significantly enriched in DE genes for each contrast (padj value <= ", pval_text, ").
2449+
The plot below summarizes the pathways that were found significantly enriched in DE genes for each contrast (padj value <= ", pathway_pval_text, ").
24152450
Only contrasts for which an enriched pathway was found are shown.
24162451
Hover over the dots to reveal the pathway names. The table below provides more detail on all enriched pathways."))
24172452
```
@@ -2447,7 +2482,7 @@ if (length(q_list) > 0) {
24472482
significant=T,
24482483
correction_method="fdr",
24492484
sources=datasources,
2450-
user_threshold=params$adj_pval_threshold,
2485+
user_threshold=params$pathway_adj_pval_threshold,
24512486
custom_bg=custom_background,
24522487
domain_scope="custom_annotated"
24532488
)
@@ -2457,7 +2492,7 @@ if (length(q_list) > 0) {
24572492
significant=T,
24582493
correction_method="fdr",
24592494
sources=datasources,
2460-
user_threshold=params$adj_pval_threshold,
2495+
user_threshold=params$pathway_adj_pval_threshold,
24612496
domain_scope="annotated"
24622497
)
24632498
}
@@ -2470,7 +2505,7 @@ if (length(q_list) > 0) {
24702505
24712506
if (nrow(path_enrich) > 0){
24722507
pg2 <- gostplot(gostres, capped=T, interactive=T)
2473-
pg2[['x']][['layout']][['annotations']][[1]][['x']] <- -params$adj_pval_threshold
2508+
pg2[['x']][['layout']][['annotations']][[1]][['x']] <- -params$pathway_adj_pval_threshold
24742509
24752510
# limit gostplot y maximum dynamically for all subplots
24762511
for (counter in c(1:length(contrast_files))) {
@@ -2691,7 +2726,7 @@ For pathway analysis, the R packages `gprofiler2 v", version_gprofiler2,
26912726
" `, `AnnotationDbi v", version_annotation,
26922727
"` and `", name_species, " v", version_annotation,
26932728
"` were used. ", database_string, ".\n",
2694-
"Pathways were classified as enriched for those genes with an adjusted p-value <= ", pval_text, "."
2729+
"Pathways were classified as enriched for those genes with an adjusted p-value <= ", pathway_pval_text, "."
26952730
))
26962731
```
26972732

bin/Execute_report.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ option_list = list(
3737
make_option("--datasources", type="character", default=NULL, help="Which datasources to use for pathway analysis.", metavar="character"),
3838
make_option("--heatmaps_cluster_rows", action="store_true", default=FALSE, help="Whether to activate row clustering when generating heatmaps of gene expression in enriched pathways."),
3939
make_option("--heatmaps_cluster_cols", action="store_true", default=FALSE, help="Whether to activate column clustering when generating heatmaps of gene expression in enriched pathways."),
40+
make_option("--pathway_adj_pval_threshold", type="double", default=-1, help="Which adjusted p value threshold to use for pathway analysis. Will by default use the same value as the value of --adj_pval_threshold (default 0.05)."),
4041

4142
make_option(c("-s", "--proj_summary"), type="character", default=NULL, help="Project summary file", metavar="character"),
4243
make_option(c("--path_quote"), type="character", default=NULL, help="Path to the quote PDF", metavar="character"),
@@ -89,6 +90,7 @@ rmarkdown::render(opt$report, output_file = opt$output, knit_root_dir = wd, outp
8990
datasources = opt$datasources,
9091
heatmaps_cluster_rows = opt$heatmaps_cluster_rows,
9192
heatmaps_cluster_cols = opt$heatmaps_cluster_cols,
93+
pathway_adj_pval_threshold = opt$pathway_adj_pval_threshold,
9294

9395
path_proj_summary = opt$proj_summary,
9496
path_quote = opt$path_quote,

conf/test.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ params {
2828
software_versions = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/software_versions.csv'
2929
multiqc = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/MultiQC.zip'
3030
run_pathway_analysis = true
31+
pathway_adj_pval_threshold = 0.0004
3132
datasources = 'KEGG,REAC'
3233
genome = 'GRCm38'
3334
quote = 'https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/dev/testdata/offer_example.pdf'

0 commit comments

Comments
 (0)