Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@
^Makefile$
^Dockerfile$
^CRAN-SUBMISSION$
^agents\.md$
^.*\.code-workspace$
^agents\.md$
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ parameter_trace
*.Rcheck
*.tar.gz
README.html
_codeql_detected_source_root
_codeql_detected_source_root
dev/
agents.md
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: BayesMallowsSMC2
Type: Package
Title: Nested Sequential Monte Carlo for the Bayesian Mallows Model
Version: 0.2.1
Version: 0.3.0
Authors@R: c(person("Oystein", "Sorensen",
email = "oystein.sorensen.1985@gmail.com",
role = c("aut", "cre"),
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# BayesMallowsSMC2 version 0.3.0

## New features

* Implemented Conditional Particle Filter with Independent Backward Simulation (CPF-IBS) to mitigate path degeneracy during the rejuvenation step. This can be enabled via `backward_sampling = TRUE` in `set_smc_options()`.

# BayesMallowsSMC2 version 0.2.1

## Bug fixes
Expand Down
42 changes: 24 additions & 18 deletions R/compute_sequentially.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,16 @@
#' algorithm automatically performs resampling and rejuvenation steps when the
#' effective sample size drops below the specified threshold.
#'
#' Rejuvenation steps can optionally use Particle Gibbs with Backward Simulation
#' \insertCite{Whiteley2010Discussion,Lindsten2013Backward}{BayesMallowsSMC2}, enabled via the `backward_sampling` argument in [set_smc_options()].
#'
#' The returned object has S3 methods for printing ([print.BayesMallowsSMC2]),
#' summarizing ([summary.BayesMallowsSMC2]), and plotting ([plot.BayesMallowsSMC2]).
#' For visualization of parameter evolution over time, see [trace_plot()].
#'
#' @references
#' \insertAllCited{}
#'
#' \insertRef{10.1214/25-BA1564}{BayesMallowsSMC2}
#'
#' @export
Expand All @@ -100,31 +105,31 @@
#' plot(mod, parameter = "alpha")
#'
compute_sequentially <- function(
data,
hyperparameters = set_hyperparameters(),
smc_options = set_smc_options(),
topological_sorts = NULL
){
data,
hyperparameters = set_hyperparameters(),
smc_options = set_smc_options(),
topological_sorts = NULL
) {
rank_columns <- grepl("item[0-9]+", colnames(data))
preference_columns <- grepl("top\\_item|bottom\\_item", colnames(data))

if(any(rank_columns)) {
input_timeseries <- split(data, f = ~ timepoint) |>
lapply(split, f = ~ user) |>
if (any(rank_columns)) {
input_timeseries <- split(data, f = ~timepoint) |>
lapply(split, f = ~user) |>
lapply(function(x) lapply(x, function(y) as.numeric(y[rank_columns])))

if(any(is.na(data[rank_columns]))) {
if (any(is.na(data[rank_columns]))) {
attr(input_timeseries, "type") <- "partial rankings"
} else {
attr(input_timeseries, "type") <- "complete rankings"
}
sort_matrices <- sort_counts <- list()
} else if(sum(preference_columns) == 2) {
if(is.null(topological_sorts)) {
} else if (sum(preference_columns) == 2) {
if (is.null(topological_sorts)) {
stop("topological_sorts must be provided with preference data.")
}
input_timeseries <- split(data, f = ~ timepoint) |>
lapply(split, f = ~ user) |>
input_timeseries <- split(data, f = ~timepoint) |>
lapply(split, f = ~user) |>
lapply(function(x) lapply(x, function(y) as.matrix(y[preference_columns])))
attr(input_timeseries, "type") <- "pairwise preferences"

Expand All @@ -139,15 +144,16 @@ compute_sequentially <- function(
stop("Something wrong with data")
}

if(max(table(data$user)) > 1 &&
attr(input_timeseries, "type") != "pairwise preferences") {
if (max(table(data$user)) > 1 &&
attr(input_timeseries, "type") != "pairwise preferences") {
stop("Updated users not supported.")
}

ret <- run_smc(input_timeseries, hyperparameters, smc_options,
sort_matrices, sort_counts)
ret <- run_smc(
input_timeseries, hyperparameters, smc_options,
sort_matrices, sort_counts
)

class(ret) <- "BayesMallowsSMC2"
ret
}

18 changes: 12 additions & 6 deletions R/set_smc_options.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
#' complete set of latent rankings for each particle at each timepoint. This
#' can be used to inspect the evolution of rankings over time but
#' substantially increases memory usage. Defaults to `FALSE`.
#' @param backward_sampling Logical specifying whether to use Particle Gibbs with
#' Backward Simulation (PGBS) \insertCite{Whiteley2010Discussion,Lindsten2013Backward}{BayesMallowsSMC2} during the rejuvenation step. Defaults to `FALSE`.
#'
#' @details
#' The SMC2 algorithm uses a nested particle filter structure:
Expand All @@ -73,6 +75,9 @@
#'
#' @seealso [compute_sequentially()], [set_hyperparameters()]
#'
#' @references
#' \insertAllCited{}
#'
#' @export
#'
#' @examples
Expand Down Expand Up @@ -121,11 +126,12 @@
#' )
#'
set_smc_options <- function(
n_particles = 1000, n_particle_filters = 50, max_particle_filters = 10000,
resampling_threshold = n_particles / 2, doubling_threshold = .2,
max_rejuvenation_steps = 20,
metric = "footrule", resampler = "multinomial",
latent_rank_proposal = "uniform", verbose = FALSE,
trace = FALSE, trace_latent = FALSE) {
n_particles = 1000, n_particle_filters = 50, max_particle_filters = 10000,
resampling_threshold = n_particles / 2, doubling_threshold = .2,
max_rejuvenation_steps = 20,
metric = "footrule", resampler = "multinomial",
latent_rank_proposal = "uniform", verbose = FALSE,
trace = FALSE, trace_latent = FALSE, backward_sampling = FALSE
) {
as.list(environment())
}
26 changes: 26 additions & 0 deletions agents.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Role and Identity
You are an expert C++ and RcppArmadillo developer specializing in Bayesian computation, Sequential Monte Carlo (SMC2), and Particle Markov Chain Monte Carlo (PMCMC) methods. You are working on the `BayesMallowsSMC2` R package.

# Project Context
The `BayesMallowsSMC2` package implements sequential inference for the Bayesian Mallows Model using an SMC2 algorithm. Currently, the package uses a standard Conditional Particle Filter (CPF) for the Particle Gibbs rejuvenation step. This standard approach suffers from path degeneracy because it relies on deterministic forward ancestral tracing, which severely limits the mixing of latent variables for early users.

# Objective
Your goal is to upgrade the rejuvenation step by implementing **Conditional Particle Filter with Independent Backward Simulation (CPF-IBS)**. Because cross-sectional user batches are conditionally independent given the static parameters, the standard O(S^2) backward simulation simplifies exactly to O(S) independent categorical draws from the marginal forward filtering weights.

# Mathematical Specification of CPF-IBS
1. **Forward Pass:** The conditional particle filter processes users at time t = 1 ... T. It computes normalized weights W_t and latent states x_t for particles s = 1 ... S.
*Crucial Change:* We no longer need to track or store the forward ancestor indices (a_t).
2. **Backward Pass:** Instead of recursive pointer lookups (e.g. b_{t-1} = a_{t-1}^{b_t}), we assemble the new reference trajectory by looping backward from t = T down to 1.
3. **Sampling:** At each timestep t, sample an index b_t in {1, ..., S} independently with probabilities W_t.
4. **Extraction:** Extract the latent variables for time t from the sampled particle b_t.

# Codebase Rules & Architectural Constraints (RcppArmadillo)
1. **Memory Optimization:** Locate the T x S matrix of ancestor variables (likely an `arma::umat a` matrix inside the conditional particle filter / SMC structures) and completely remove it. This will drastically reduce the memory footprint. You only need to track the weights W_t.
2. **Weighted Sampling in C++:** When sampling the index b_t ~ W_t during trajectory assembly, utilize `arma::randi` (with the custom normalized W_t probabilities) to sample b_t independently. Ensure you are using a statistically rigorous weighted sampling function compatible with R's RNG scope so that `set.seed()` from R remains perfectly reproducible.
3. **Clean Integration:** Modify the Rejuvenation functions (Algorithm 4 / S3) so that trajectory assembly happens *after* the forward pass completes, iterating in reverse.
4. **Testing:** Ensure that the R package still compiles (`Rcpp::compileAttributes()`, `devtools::document()`, `devtools::load_all()`) and passes all existing tests (`devtools::test()`).

# Interaction Guidelines
- Before writing code, use search tools to find the exact C++ files handling the Conditional Particle Filter and Rejuvenation steps in the `src/` directory.
- Explain your planned modifications to the C++ logic before executing them.
- Keep performance in mind: cache locality and avoiding deep copies of Armadillo matrices inside the t-loop are critical.
Comment on lines +1 to +26
9 changes: 3 additions & 6 deletions cran-comments.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,9 @@

## Submission notes

This is a resubmission to fix test failures reported on CRAN checks for the BayesMallowsSMC2 package.
This release introduces major algorithmic improvements for the SMC2 rejuvenation step.

### Changes in this version

* Adjusted numerical tolerance in `test-compute_sequentially_partial.R` (line 11) to account for platform-specific differences in Monte Carlo results. The test was failing on r-oldrel-macos and noLD platforms due to slight variations in the computed alpha_hat value (0.046 vs expected > 0.06). The tolerance has been relaxed from 0.06 to 0.04 to accommodate these platform differences while still ensuring the test validates the expected behavior.

## Previous submission

This package was initially released to CRAN as version 0.2.0. The test failures appeared after release on specific platforms (r-oldrel-macos and noLD) due to numerical differences in stochastic computations.
* Implemented Conditional Particle Filter with Independent Backward Simulation (CPF-IBS) for the rejuvenation step. This resolves path degeneracy in latent variables during Particle Gibbs and improves mixing. It is available via the `backward_sampling` argument in `set_smc_options()`.
* Retained numerical tolerance fixes from 0.2.1 that accommodated platform-specific variations on r-oldrel-macos and noLD.
20 changes: 20 additions & 0 deletions inst/REFERENCES.bib
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,23 @@ @article{10.1214/25-BA1564
doi = {10.1214/25-BA1564},
URL = {https://doi.org/10.1214/25-BA1564}
}

@article{Whiteley2010Discussion,
author = {Whiteley, N.},
title = {{Discussion on 'Particle Markov chain Monte Carlo methods'}},
journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
volume = {72},
number = {3},
pages = {306--307},
year = {2010}
}
@article{Lindsten2013Backward,
title={{Backward simulation methods for Monte Carlo statistical inference}},
author={Lindsten, Fredrik and Sch{\"o}n, Thomas B.},
journal={Foundations and Trends{\textregistered} in Machine Learning},
volume={6},
number={1},
pages={1--143},
year={2013},
publisher={Now Publishers, Inc.}
}
5 changes: 5 additions & 0 deletions man/compute_sequentially.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion man/set_smc_options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion src/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ Options::Options(const Rcpp::List& input_options) :
doubling_threshold{input_options["doubling_threshold"]},
verbose{input_options["verbose"]},
trace{input_options["trace"]},
trace_latent{input_options["trace_latent"]}{}
trace_latent{input_options["trace_latent"]},
backward_sampling{input_options["backward_sampling"]} {}
1 change: 1 addition & 0 deletions src/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ struct Options{
const bool verbose;
const bool trace;
const bool trace_latent;
const bool backward_sampling;
};
Loading
Loading