Skip to content

Commit 9c3a950

Browse files
Copilotosorensen
andcommitted
Optimize data frame construction to avoid inefficient rbind in loops
Co-authored-by: osorensen <21175639+osorensen@users.noreply.github.com>
1 parent aa92844 commit 9c3a950

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

R/plot.R

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ plot_alpha_smc <- function(x) {
9595

9696
# Create a data frame for plotting
9797
# Repeat each alpha value according to its weight to create weighted histogram
98-
plot_data <- data.frame()
98+
plot_data_list <- vector("list", n_clusters)
9999

100100
for (cluster in seq_len(n_clusters)) {
101101
alpha_vals <- alpha_matrix[cluster, ]
@@ -108,13 +108,14 @@ plot_alpha_smc <- function(x) {
108108
replace = TRUE, prob = sample_probs)
109109
sampled_alpha <- alpha_vals[sampled_indices]
110110

111-
cluster_data <- data.frame(
111+
plot_data_list[[cluster]] <- data.frame(
112112
value = sampled_alpha,
113113
cluster = if (n_clusters > 1) paste0("Cluster ", cluster) else "All"
114114
)
115-
plot_data <- rbind(plot_data, cluster_data)
116115
}
117116

117+
plot_data <- do.call(rbind, plot_data_list)
118+
118119
# Create histogram
119120
p <- ggplot2::ggplot(plot_data, ggplot2::aes(x = value)) +
120121
ggplot2::geom_histogram(bins = 30, fill = "steelblue", color = "white") +
@@ -143,7 +144,7 @@ plot_tau_smc <- function(x) {
143144
n_particles <- ncol(tau_matrix)
144145

145146
# Create a data frame for plotting
146-
plot_data <- data.frame()
147+
plot_data_list <- vector("list", n_clusters)
147148

148149
for (cluster in seq_len(n_clusters)) {
149150
tau_vals <- tau_matrix[cluster, ]
@@ -155,13 +156,14 @@ plot_tau_smc <- function(x) {
155156
replace = TRUE, prob = sample_probs)
156157
sampled_tau <- tau_vals[sampled_indices]
157158

158-
cluster_data <- data.frame(
159+
plot_data_list[[cluster]] <- data.frame(
159160
value = sampled_tau,
160161
cluster = if (n_clusters > 1) paste0("Cluster ", cluster) else "All"
161162
)
162-
plot_data <- rbind(plot_data, cluster_data)
163163
}
164164

165+
plot_data <- do.call(rbind, plot_data_list)
166+
165167
# Create histogram
166168
p <- ggplot2::ggplot(plot_data, ggplot2::aes(x = value)) +
167169
ggplot2::geom_histogram(bins = 30, fill = "steelblue", color = "white") +
@@ -208,7 +210,8 @@ plot_rho_smc <- function(x, items = NULL) {
208210
}
209211

210212
# Create data frame for plotting
211-
plot_data <- data.frame()
213+
plot_data_list <- vector("list", length(items) * n_clusters * n_items)
214+
idx <- 1
212215

213216
for (item_idx in items) {
214217
for (cluster in seq_len(n_clusters)) {
@@ -220,17 +223,19 @@ plot_rho_smc <- function(x, items = NULL) {
220223
# Weight of particles where this item has this rank
221224
prob <- sum(weights[rankings == rank]) / sum(weights)
222225

223-
cluster_data <- data.frame(
226+
plot_data_list[[idx]] <- data.frame(
224227
item = paste0("Item ", item_idx),
225228
cluster = if (n_clusters > 1) paste0("Cluster ", cluster) else "All",
226229
rank = rank,
227230
probability = prob
228231
)
229-
plot_data <- rbind(plot_data, cluster_data)
232+
idx <- idx + 1
230233
}
231234
}
232235
}
233236

237+
plot_data <- do.call(rbind, plot_data_list)
238+
234239
# Create bar chart
235240
p <- ggplot2::ggplot(plot_data, ggplot2::aes(x = factor(rank), y = probability)) +
236241
ggplot2::geom_col(fill = "steelblue") +

0 commit comments

Comments
 (0)