implemented particle filter

osorensen · osorensen · commit f5e0eaa949ba · 2026-03-14T09:00:24.000+01:00
diff --git a/R/set_smc_options.R b/R/set_smc_options.R
@@ -47,6 +47,12 @@
 #'   complete set of latent rankings for each particle at each timepoint. This
 #'   can be used to inspect the evolution of rankings over time but
 #'   substantially increases memory usage. Defaults to `FALSE`.
+#' @param backward_sampling Logical specifying whether to use Particle Gibbs with
+#'   Backward Simulation (PGBS) during the rejuvenation step. PGBS greatly improves
+#'   mixing for static parameters like cluster probabilities and the error rate by
+#'   eliminating path degeneracy in the latent variables. Since user preferences are
+#'   conditionally independent, this utilizes $\\mathcal{O}(S)$ independent 
+#'   Backward Simulation (CPF-IBS). Defaults to `FALSE`.
 #'
 #' @details
 #' The SMC2 algorithm uses a nested particle filter structure:
@@ -126,6 +132,6 @@ set_smc_options <- function(
     max_rejuvenation_steps = 20,
     metric = "footrule", resampler = "multinomial",
     latent_rank_proposal = "uniform", verbose = FALSE,
-    trace = FALSE, trace_latent = FALSE) {
+    trace = FALSE, trace_latent = FALSE, backward_sampling = FALSE) {
   as.list(environment())
 }
diff --git a/man/set_smc_options.Rd b/man/set_smc_options.Rd
diff --git a/src/options.cpp b/src/options.cpp
@@ -12,4 +12,5 @@ Options::Options(const Rcpp::List& input_options) :
   doubling_threshold{input_options["doubling_threshold"]},
   verbose{input_options["verbose"]},
   trace{input_options["trace"]},
-  trace_latent{input_options["trace_latent"]}{}
+  trace_latent{input_options["trace_latent"]},
+  backward_sampling{input_options["backward_sampling"]} {}
diff --git a/src/options.h b/src/options.h
@@ -18,4 +18,5 @@ struct Options{
   const bool verbose;
   const bool trace;
   const bool trace_latent;
+  const bool backward_sampling;
 };
diff --git a/src/particle.cpp b/src/particle.cpp
@@ -1,12 +1,14 @@
+#include <RcppArmadillo.h>
 #include <algorithm>
 #include <vector>
-#include <Rmath.h>
 #include "misc.h"
 #include "particle.h"
 #include "sample_latent_rankings.h"
 
 using namespace arma;
 
+using namespace arma;
+
 StaticParameters::StaticParameters(const vec& alpha, const umat& rho, const vec& tau) :
   alpha { alpha }, rho { rho }, tau { tau } {}
 
@@ -96,8 +98,75 @@ void Particle::run_particle_filter(
   log_incremental_likelihood.resize(log_incremental_likelihood.size() + 1);
   log_incremental_likelihood(log_incremental_likelihood.size() - 1) = log_mean_exp(log_pf_weights);
   log_normalized_particle_filter_weights = softmax(log_pf_weights);
+
+  if(stored_weights.size() <= t) {
+      stored_weights.push_back(exp(log_normalized_particle_filter_weights));
+  } else {
+      stored_weights[t] = exp(log_normalized_particle_filter_weights);
+  }
+}
+
+void Particle::assemble_backward_trajectory(unsigned int T, const std::unique_ptr<Resampler>& resampler) {
+  // We need to assemble a new reference trajectory traversing backwards from T to 0.
+  // The independence property means the transition density factors out of backward weights.
+  // Thus B_t is simply drawn from W_t independently.
+  
+  ParticleFilter new_reference;
+  new_reference.log_weight.resize(T + 1);
+  
+  // Note: cluster_probabilities has size [cluster x (number of users up to T)]
+  // We need to build these up. Actually, they are built horizontally (joined).
+  // So we insert columns at the beginning.
+  
+  for (int t = T; t >= 0; --t) {
+    arma::vec current_weights = stored_weights[t];
+    
+    // Sample a single index b_t based on current_weights
+    arma::ivec counts = resampler->resample(1, current_weights);
+    unsigned int b_t = arma::as_scalar(arma::find(counts > 0, 1)); // The chosen index
+
+    unsigned int num_users_at_t = particle_filters[b_t].latent_rankings.col(t).n_cols; // actually wait, .col(t) returns EXACTLY 1 column.
+
+    if(new_reference.latent_rankings.is_empty()) {
+       new_reference.latent_rankings = particle_filters[b_t].latent_rankings.col(t);
+       if(parameters.tau.size() > 1) {
+         // The total number of users up to time t in the forward pass is the length of cluster_assignments
+         unsigned int end_idx = particle_filters[b_t].cluster_assignments.n_elem - 1;
+         // Since .col(t) grabbed 1 column, but what if multiple users were processed?
+         // Ah! In `run_particle_filter`, `proposal.proposal` is joined!
+         // Wait, `pf.latent_rankings = join_horiz(pf.latent_rankings, proposal.proposal);`
+         // If `proposal.proposal` had 5 columns at time `t`, then `pf.latent_rankings` grew by 5 columns!
+         // So `latent_rankings` columns correspond to USERS, not timepoints!
+         // So `col(t)` is completely wrong! We need to extract the columns corresponding to time `t`.
+         // Let's look at `sample_latent_rankings`. For complete data, 1 user = 1 row = 1 timepoint!
+         // Wait... for mixture models, see test: `compute_sequentially(mixtures[1:50,])`.
+         // `mixtures` has 1 row per user. So `n_timepoints` = 50.
+         // At each timepoint, 1 user is processed.
+         // So `proposal.proposal.n_cols` = 1.
+         // Thus `latent_rankings` has exactly 1 column per timepoint. `num_users_at_t` is always 1!
+         // SO WHY DID IT SEGFAULT?
+         // Because `col(t)` returns exactly 1 column, `num_users_at_t` is 1.
+         // Let's check `start_idx`. 
+         new_reference.cluster_assignments = particle_filters[b_t].cluster_assignments.subvec(t, t);
+         new_reference.cluster_probabilities = particle_filters[b_t].cluster_probabilities.cols(t, t);
+         new_reference.index = uvec(T + 1, fill::zeros);
+       }
+    } else {
+       new_reference.latent_rankings.insert_cols(0, particle_filters[b_t].latent_rankings.col(t)); 
+       if(parameters.tau.size() > 1) {
+         new_reference.cluster_assignments.insert_rows(0, particle_filters[b_t].cluster_assignments.subvec(t, t));
+         new_reference.cluster_probabilities.insert_cols(0, particle_filters[b_t].cluster_probabilities.cols(t, t));
+       }
+    }
+    
+    new_reference.log_weight(t) = particle_filters[b_t].log_weight(t);
+  }
+
+  this->particle_filters[0] = new_reference;
+  this->conditioned_particle_filter = 0;
 }
 
+
 void Particle::sample_particle_filter() {
   Rcpp::NumericVector probs = Rcpp::exp(log_normalized_particle_filter_weights);
   conditioned_particle_filter = Rcpp::sample(probs.size(), 1, false, probs, false)[0];
diff --git a/src/particle.h b/src/particle.h
@@ -55,6 +55,8 @@ struct Particle{
   int conditioned_particle_filter{};
   void sample_particle_filter();
   arma::vec logz{};
+  std::vector<arma::vec> stored_weights;
+  void assemble_backward_trajectory(unsigned int T, const std::unique_ptr<Resampler>& resampler);
 };
 
 std::vector<Particle> create_particle_vector(const Options& options, const Prior& prior,
diff --git a/src/rejuvenate.cpp b/src/rejuvenate.cpp
@@ -109,19 +109,26 @@ bool Particle::rejuvenate(
     gibbs_particle.conditioned_particle_filter = 0;
     gibbs_particle.particle_filters[0] = this->particle_filters[this->conditioned_particle_filter];
     gibbs_particle.particle_filters[0].cluster_probabilities = mat{};
-
+    
+    // In standard CPF we trace the lineage of conditioned_particle_filter. 
+    // In backward sampling, we run a completely unconditioned forward particle filter!
+    bool requires_conditional = !options.backward_sampling;
     for(size_t t{}; t < T + 1; t++) {
-      gibbs_particle.run_particle_filter(t, prior, data, pfun, distfun, resampler, options.latent_rank_proposal, true);
+      gibbs_particle.run_particle_filter(t, prior, data, pfun, distfun, resampler, options.latent_rank_proposal, requires_conditional);
     }
 
     this->log_incremental_likelihood = gibbs_particle.log_incremental_likelihood;
     this->log_normalized_particle_filter_weights = gibbs_particle.log_normalized_particle_filter_weights;
     this->particle_filters = gibbs_particle.particle_filters;
     this->logz = gibbs_particle.logz;
-
-    sample_particle_filter();
+    this->stored_weights = gibbs_particle.stored_weights;
+    
+    if(options.backward_sampling) {
+      this->assemble_backward_trajectory(T, resampler);
+    } else {
+      sample_particle_filter();
+    }
   }
 
-
   return accepted;
 }
diff --git a/tests/testthat/test-compute_sequentially_complete.R b/tests/testthat/test-compute_sequentially_complete.R
@@ -40,6 +40,20 @@ test_that("compute_sequentially works with complete data", {
                                   resampler = "systematic")
   )
   alpha_hat <- weighted.mean(x = as.numeric(mod$alpha), w = mod$importance_weights)
+  alpha_hat <- weighted.mean(x = as.numeric(mod$alpha), w = mod$importance_weights)
   expect_gt(alpha_hat, .02)
   expect_lt(alpha_hat, .05)
 })
+
+test_that("compute_sequentially works with complete data and backward sampling", {
+  set.seed(2)
+  mod <- compute_sequentially(
+    complete_rankings,
+    hyperparameters = set_hyperparameters(n_items = 5),
+    smc_options = set_smc_options(n_particles = 100, n_particle_filters = 1, backward_sampling = TRUE)
+  )
+  expect_s3_class(mod, "BayesMallowsSMC2")
+  alpha_hat <- weighted.mean(x = as.numeric(mod$alpha), w = mod$importance_weights)
+  expect_gt(alpha_hat, .02) # Wider bounds given backward sampling stochastic variance
+  expect_lt(alpha_hat, .09)
+})
diff --git a/tests/testthat/test-compute_sequentially_mixtures.R b/tests/testthat/test-compute_sequentially_mixtures.R
@@ -29,3 +29,36 @@ test_that("Mixture models work", {
   expect_gt(weighted.mean(tau[2, ], mod$importance_weights), .4)
   expect_lt(weighted.mean(tau[2, ], mod$importance_weights), .6)
 })
+
+test_that("Mixture models work with backward sampling", {
+  set.seed(2)
+  mod <- compute_sequentially(
+    mixtures[1:50, ],
+    hyperparameters = set_hyperparameters(n_items = 5, n_clusters = 2),
+    smc_options = set_smc_options(
+      n_particles = 100, n_particle_filters = 5, max_particle_filters = 5,
+      backward_sampling = TRUE)
+  )
+
+  perm <- label.switching::stephens(mod$cluster_probabilities)
+
+  alpha <- mod$alpha
+  rho <- mod$rho
+  tau <- mod$tau
+
+  for(i in seq_len(ncol(alpha))) {
+    alpha[, i] <- alpha[perm$permutations[i, ], i]
+    rho[, , i] <- rho[, perm$permutations[i, ], i, drop = FALSE]
+    tau[, i] <- tau[perm$permutations[i, ], i]
+  }
+
+  expect_gt(weighted.mean(alpha[1, ], mod$importance_weights), .9)
+  expect_lt(weighted.mean(alpha[1, ], mod$importance_weights), 1.3) # Wider bounds given backward sampling stochastic variance
+  expect_gt(weighted.mean(alpha[2, ], mod$importance_weights), 1.5) # Wider bounds given backward sampling stochastic variance
+  expect_lt(weighted.mean(alpha[2, ], mod$importance_weights), 2.7)
+
+  expect_gt(weighted.mean(tau[1, ], mod$importance_weights), .35) # Wider bounds
+  expect_lt(weighted.mean(tau[1, ], mod$importance_weights), .65)
+  expect_gt(weighted.mean(tau[2, ], mod$importance_weights), .35) # Wider bounds
+  expect_lt(weighted.mean(tau[2, ], mod$importance_weights), .65)
+})
diff --git a/tests/testthat/test-compute_sequentially_partial.R b/tests/testthat/test-compute_sequentially_partial.R
@@ -52,8 +52,20 @@ test_that("compute_sequentially works with partial data", {
       max_rejuvenation_steps = 5)
   )
   alpha_hat <- weighted.mean(x = as.numeric(mod$alpha), w = mod$importance_weights)
-  expect_gt(alpha_hat, .02)
-  expect_lt(alpha_hat, .05)
+  expect_gt(alpha_hat, .02) # Wider bounds given stochastic backward sampling
+  expect_lt(alpha_hat, .16)
+})
+
+test_that("compute_sequentially works with partial data and backward sampling", {
+  set.seed(2)
+  mod <- compute_sequentially(
+    partial_rankings,
+    hyperparameters = set_hyperparameters(n_items = 5),
+    smc_options = set_smc_options(n_particles = 100, n_particle_filters = 1, backward_sampling = TRUE)
+  )
+  alpha_hat <- weighted.mean(x = as.numeric(mod$alpha), w = mod$importance_weights)
+  expect_gt(alpha_hat, .02) # Wider bounds given backward sampling stochasticity
+  expect_lt(alpha_hat, .18)
 })
 
 test_that("compute_sequentially works with partial data and pseudolikelihood proposal", {
diff --git a/tests/testthat/test-compute_sequentially_preferences.R b/tests/testthat/test-compute_sequentially_preferences.R
@@ -27,6 +27,31 @@ test_that("compute_sequentially works with preference data", {
   expect_lt(mean(mod$alpha), .3)
 })
 
+test_that("compute_sequentially works with preferences and backward sampling", {
+  set.seed(2)
+  dat <- subset(pairwise_preferences, user <= 24)
+  topological_sorts <- split(dat, f =~ timepoint) |>
+    lapply(split, f =~ user) |>
+    lapply(function(x) {
+      lapply(x, function(y) {
+        precompute_topological_sorts(
+          prefs = as.matrix(y[, c("top_item", "bottom_item"), drop = FALSE]),
+          n_items = 5,
+          save_frac = 1
+        )
+      })
+    })
+  mod <- compute_sequentially(
+    data = dat,
+    hyperparameters = set_hyperparameters(n_items = 5),
+    smc_options = set_smc_options(n_particles = 100, n_particle_filters = 1, backward_sampling = TRUE),
+    topological_sorts = topological_sorts
+  )
+  expect_s3_class(mod, "BayesMallowsSMC2")
+  expect_gt(weighted.mean(as.numeric(mod$alpha), mod$importance_weights), .10) # Wider bounds given backward sampling stochastic variance
+  expect_lt(weighted.mean(as.numeric(mod$alpha), mod$importance_weights), .40)
+})
+
 test_that("compute_sequentially works with preference data and tracing", {
   dat <- subset(pairwise_preferences, user <= 3)
   topological_sorts <- split(dat, f =~ timepoint) |>