Skip to content

Commit ab38a60

Browse files
authored
Merge pull request #39 from osorensen/improve-docs
Improve docs
2 parents 349fa99 + 14b69ec commit ab38a60

File tree

4 files changed

+439
-36
lines changed

4 files changed

+439
-36
lines changed

R/compute_sequentially.R

Lines changed: 112 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,33 @@
11
#' Compute the Bayesian Mallows model sequentially
22
#'
3+
#' This function implements the nested sequential Monte Carlo (SMC2) algorithm
4+
#' for sequential learning of rank and preference data using the Bayesian
5+
#' Mallows model. The algorithm processes data sequentially over time,
6+
#' maintaining a particle approximation to the posterior distribution.
7+
#'
8+
#' @details
9+
#' The nested SMC2 algorithm consists of two levels of sequential Monte Carlo:
10+
#'
11+
#' **Outer SMC (Parameter Level)**: Maintains particles representing samples
12+
#' from the posterior distribution of static parameters (alpha, rho, tau).
13+
#' Each particle contains its own set of parameter values.
14+
#'
15+
#' **Inner SMC (Latent State Level)**: For each parameter particle, maintains
16+
#' multiple particle filters that track the evolution of latent rankings and
17+
#' cluster assignments over time. This nested structure allows the algorithm
18+
#' to handle the complex dependency structure between parameters and latent
19+
#' states.
20+
#'
21+
#' At each timepoint, the algorithm:
22+
#' 1. Propagates each parameter particle forward using MCMC moves
23+
#' 2. For each parameter particle, runs multiple particle filters to sample
24+
#' latent rankings and cluster assignments
25+
#' 3. Computes importance weights based on the likelihood of observed data
26+
#' 4. Resamples particles when effective sample size drops below threshold
27+
#' 5. Applies rejuvenation moves to maintain particle diversity
28+
#'
29+
#' The nested structure is essential for maintaining proper uncertainty
30+
#' quantification in the joint parameter-latent state space.
331
#'
432
#' @param data A dataframe containing partial rankings or pairwise preferences.
533
#' If `data` contains complete or partial rankings, it must have the following
@@ -24,12 +52,93 @@
2452
#' }
2553
#'
2654
#' @param hyperparameters A list returned from [set_hyperparameters()].
27-
#' @param smc_options A list returned from [set_smc_options()]
55+
#' @param smc_options A list returned from [set_smc_options()]. Controls the
56+
#' nested SMC2 algorithm parameters including number of parameter particles,
57+
#' number of particle filters per parameter particle, and MCMC move parameters.
2858
#' @param topological_sorts A list returned from
2959
#' [precompute_topological_sorts()]. Only used with preference data, and
30-
#' defaults to `NULL`.
60+
#' defaults to `NULL`. Contains precomputed topological sorts for efficient
61+
#' sampling from constrained ranking spaces.
62+
#'
63+
#' @return An object of class BayesMallowsSMC2 containing posterior samples
64+
#' and algorithm diagnostics.
65+
#'
66+
#' @references
67+
#' Sørensen, Ø., Stein, A., Netto, W. L., & Leslie, D. S. (2025).
68+
#' Sequential Rank and Preference Learning with the Bayesian Mallows Model.
69+
#' \emph{Bayesian Analysis}. DOI: 10.1214/25-BA1564.
70+
#'
71+
#' @seealso [set_hyperparameters()], [set_smc_options()], [precompute_topological_sorts()]
72+
#'
73+
#' @examples
74+
#' # Example with complete rankings
75+
#' library(BayesMallowsSMC2)
76+
#'
77+
#' # Generate synthetic ranking data
78+
#' set.seed(123)
79+
#' n_items <- 5
80+
#' n_users <- 20
81+
#' n_timepoints <- 10
82+
#'
83+
#' # Create synthetic data
84+
#' data <- expand.grid(
85+
#' timepoint = 1:n_timepoints,
86+
#' user = 1:n_users
87+
#' )
88+
#'
89+
#' # Add random rankings for each item
90+
#' for(i in 1:n_items) {
91+
#' data[[paste0("item", i)]] <- sample(1:n_items, nrow(data), replace = TRUE)
92+
#' }
93+
#'
94+
#' # Set hyperparameters
95+
#' hyperparams <- set_hyperparameters(
96+
#' n_items = n_items,
97+
#' alpha_shape = 2,
98+
#' alpha_rate = 1,
99+
#' n_clusters = 2
100+
#' )
101+
#'
102+
#' # Set SMC options
103+
#' smc_opts <- set_smc_options(
104+
#' n_particles = 100,
105+
#' n_particle_filters = 20,
106+
#' metric = "kendall",
107+
#' verbose = TRUE
108+
#' )
109+
#'
110+
#' # Run sequential computation
111+
#' result <- compute_sequentially(
112+
#' data = data,
113+
#' hyperparameters = hyperparams,
114+
#' smc_options = smc_opts
115+
#' )
116+
#'
117+
#' # Example with pairwise preferences
118+
#' # First precompute topological sorts
119+
#' prefs_matrix <- matrix(c(1, 2, 2, 3, 3, 1), ncol = 2, byrow = TRUE)
120+
#' topo_sorts <- precompute_topological_sorts(
121+
#' prefs = prefs_matrix,
122+
#' n_items = 3,
123+
#' save_frac = 0.1
124+
#' )
125+
#'
126+
#' # Create preference data
127+
#' pref_data <- data.frame(
128+
#' timepoint = c(1, 1, 2, 2),
129+
#' user = c(1, 2, 1, 2),
130+
#' top_item = c(1, 2, 3, 1),
131+
#' bottom_item = c(2, 3, 1, 3)
132+
#' )
133+
#'
134+
#' # Run with preferences
135+
#' result_prefs <- compute_sequentially(
136+
#' data = pref_data,
137+
#' hyperparameters = set_hyperparameters(n_items = 3),
138+
#' smc_options = set_smc_options(n_particles = 50),
139+
#' topological_sorts = topo_sorts
140+
#' )
31141
#'
32-
#' @return An object of class BayesMallowsSMC2.
33142
#' @export
34143
#'
35144
compute_sequentially <- function(

R/set_hyperparameters.R

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,83 @@
1-
#' Set hyperparameters
1+
#' Set hyperparameters for Bayesian Mallows model
22
#'
3-
#' @param n_items Integer defining the number of items.
4-
#' @param alpha_shape Shape of gamma prior for alpha.
5-
#' @param alpha_rate Rate of gamma prior for alpha.
6-
#' @param cluster_concentration Concentration parameter of Dirichlet distribution for cluster probabilities.
7-
#' @param n_clusters Integer defining the number of clusters.
3+
#' Configure prior distributions and model structure for the Bayesian Mallows
4+
#' model used in sequential estimation. This function sets hyperparameters
5+
#' for the precision parameters, modal rankings, and cluster structure.
6+
#'
7+
#' @details
8+
#' The Bayesian Mallows model assumes:
9+
#'
10+
#' **Precision Parameters**: Each cluster k has a precision parameter alpha_k
11+
#' that controls the concentration around the modal ranking. Higher values
12+
#' indicate stronger agreement. The prior is Gamma(alpha_shape, alpha_rate).
13+
#'
14+
#' **Modal Rankings**: Each cluster has a modal ranking rho_k that represents
15+
#' the "consensus" ranking for that cluster. These are sampled uniformly
16+
#' from the space of permutations.
17+
#'
18+
#' **Cluster Probabilities**: The probability of assignment to each cluster
19+
#' follows a Dirichlet distribution with concentration parameter
20+
#' cluster_concentration.
21+
#'
22+
#' **Cluster Structure**: The number of clusters must be specified a priori.
23+
#' Model selection can be performed by comparing models with different
24+
#' numbers of clusters.
25+
#'
26+
#' @param n_items Integer. Number of items being ranked. Must be provided
27+
#' and determines the dimensionality of the ranking space.
28+
#' @param alpha_shape Numeric. Shape parameter of the Gamma prior distribution
29+
#' for precision parameters alpha_k. Higher values concentrate the prior
30+
#' around higher precision values. Default is 1 (exponential prior).
31+
#' @param alpha_rate Numeric. Rate parameter of the Gamma prior distribution
32+
#' for precision parameters alpha_k. Higher values favor lower precision
33+
#' (more dispersed rankings). Default is 0.5.
34+
#' @param cluster_concentration Numeric. Concentration parameter of the
35+
#' Dirichlet prior for cluster assignment probabilities. Higher values
36+
#' favor more equal cluster sizes, while lower values allow more unbalanced
37+
#' clusters. Default is 10.
38+
#' @param n_clusters Integer. Number of mixture components (clusters) in the
39+
#' model. Each cluster has its own modal ranking and precision parameter.
40+
#' Default is 1 (single-cluster model).
41+
#'
42+
#' @return A list containing all hyperparameter values for use in
43+
#' [compute_sequentially()].
44+
#'
45+
#' @references
46+
#' Sørensen, Ø., Stein, A., Netto, W. L., & Leslie, D. S. (2025).
47+
#' Sequential Rank and Preference Learning with the Bayesian Mallows Model.
48+
#' \emph{Bayesian Analysis}. DOI: 10.1214/25-BA1564.
49+
#'
50+
#' @seealso [compute_sequentially()], [set_smc_options()]
51+
#'
52+
#' @examples
53+
#' # Basic hyperparameters for 5 items, single cluster
54+
#' basic_hyper <- set_hyperparameters(n_items = 5)
55+
#'
56+
#' # Multiple clusters with informative priors
57+
#' multi_cluster <- set_hyperparameters(
58+
#' n_items = 10,
59+
#' alpha_shape = 2, # More concentrated precision prior
60+
#' alpha_rate = 1, # Moderate precision values
61+
#' n_clusters = 3, # Three mixture components
62+
#' cluster_concentration = 5 # Allow unbalanced clusters
63+
#' )
64+
#'
65+
#' # High-precision scenario (strong agreement expected)
66+
#' high_precision <- set_hyperparameters(
67+
#' n_items = 8,
68+
#' alpha_shape = 5, # Strong prior for high precision
69+
#' alpha_rate = 0.5, # Favor high alpha values
70+
#' n_clusters = 2
71+
#' )
72+
#'
73+
#' # Low-precision scenario (weak agreement expected)
74+
#' low_precision <- set_hyperparameters(
75+
#' n_items = 6,
76+
#' alpha_shape = 1, # Weak prior
77+
#' alpha_rate = 2, # Favor low alpha values
78+
#' n_clusters = 1
79+
#' )
880
#'
9-
#' @return A list
1081
#' @export
1182
#'
1283
set_hyperparameters <- function(

R/set_smc_options.R

Lines changed: 99 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,102 @@
1-
#' Set SMC options
2-
#'
3-
#' @param n_particles Number of particles
4-
#' @param n_particle_filters Initial number of particle filters for each
5-
#' particle
6-
#' @param max_particle_filters Maximum number of particle filters.
7-
#' @param resampling_threshold Effective sample size threshold for resampling
8-
#' @param doubling_threshold Threshold for particle filter doubling. If the
9-
#' acceptance rate of the rejuvenation step falls below this threshold, the
10-
#' number of particle filters is doubled. Defaults to 0.2.
11-
#' @param max_rejuvenation_steps Maximum number of rejuvenation steps. If the
12-
#' number of unique particles has not exceeded half the number of particles
13-
#' after this many steps, the rejuvenation is still stopped.
14-
#' @param metric Metric
15-
#' @param resampler resampler
16-
#' @param latent_rank_proposal latent rank proposal
17-
#' @param verbose Boolean
18-
#' @param trace Logical specifying whether to save static parameters at each
19-
#' timestep.
20-
#' @param trace_latent Logical specifying whether to sample and save one
21-
#' complete set of latent rankings for each particle and each timepoint.
22-
#'
23-
#' @return A list
1+
#' Set SMC options for nested sequential Monte Carlo algorithm
2+
#'
3+
#' Configure parameters for the nested SMC2 algorithm used in sequential
4+
#' Bayesian Mallows model estimation. This function sets both outer-level
5+
#' (parameter particle) and inner-level (latent state particle filter)
6+
#' algorithm parameters.
7+
#'
8+
#' @details
9+
#' The nested SMC2 algorithm requires careful tuning of both levels:
10+
#'
11+
#' **Outer SMC Level**: Controls the number of parameter particles and their
12+
#' rejuvenation through MCMC moves. More particles provide better approximation
13+
#' but increase computational cost.
14+
#'
15+
#' **Inner SMC Level**: Each parameter particle maintains multiple particle
16+
#' filters for latent rankings. More filters improve latent state estimation
17+
#' but multiply computational cost by the number of parameter particles.
18+
#'
19+
#' @param n_particles Integer. Number of parameter particles in the outer SMC
20+
#' sampler. Each particle represents a sample from the posterior distribution
21+
#' of static parameters (alpha, rho, tau). Larger values improve posterior
22+
#' approximation accuracy but increase computational cost linearly.
23+
#' @param n_particle_filters Integer. Initial number of particle filters
24+
#' maintained by each parameter particle for sampling latent rankings and
25+
#' cluster assignments. This creates the nested structure where each of the
26+
#' `n_particles` parameter particles runs `n_particle_filters` inner particle
27+
#' filters.
28+
#' @param max_particle_filters Integer. Maximum number of particle filters
29+
#' allowed per parameter particle. The algorithm may double the number of
30+
#' filters when rejuvenation acceptance rates are low, up to this limit.
31+
#' @param resampling_threshold Numeric. Effective sample size threshold for
32+
#' triggering resampling of parameter particles. When ESS falls below this
33+
#' value, multinomial resampling is performed. Default is `n_particles / 2`.
34+
#' @param doubling_threshold Numeric. Acceptance rate threshold for particle
35+
#' filter doubling during rejuvenation. If the acceptance rate of MCMC
36+
#' rejuvenation moves falls below this threshold, the number of particle
37+
#' filters is doubled to improve mixing. Defaults to 0.2.
38+
#' @param max_rejuvenation_steps Integer. Maximum number of MCMC rejuvenation
39+
#' steps applied to parameter particles. Rejuvenation stops early if the
40+
#' number of unique particles exceeds half the total number of particles,
41+
#' indicating sufficient diversity.
42+
#' @param metric Character. Distance metric for the Mallows model. Options
43+
#' include "kendall", "cayley", "hamming", "footrule", "spearman", and "ulam".
44+
#' Different metrics capture different aspects of ranking disagreement.
45+
#' @param resampler Character. Resampling algorithm for parameter particles.
46+
#' Options include "multinomial", "residual", "stratified", and "systematic".
47+
#' Systematic resampling often provides better performance.
48+
#' @param latent_rank_proposal Character. Proposal mechanism for sampling
49+
#' latent rankings in the inner particle filters. Options include "uniform"
50+
#' and other problem-specific proposals.
51+
#' @param verbose Logical. Whether to print algorithm progress and diagnostics
52+
#' during execution. Useful for monitoring convergence and performance.
53+
#' @param trace Logical. Whether to save static parameter values (alpha, rho,
54+
#' tau) from all particles at each timestep. Enables detailed posterior
55+
#' analysis but increases memory usage.
56+
#' @param trace_latent Logical. Whether to sample and save one complete set
57+
#' of latent rankings for each parameter particle at each timepoint. Provides
58+
#' full posterior samples of latent states but significantly increases memory
59+
#' requirements.
60+
#'
61+
#' @return A list containing all SMC2 algorithm parameters for use in
62+
#' [compute_sequentially()].
63+
#'
64+
#' @references
65+
#' Sørensen, Ø., Stein, A., Netto, W. L., & Leslie, D. S. (2025).
66+
#' Sequential Rank and Preference Learning with the Bayesian Mallows Model.
67+
#' \emph{Bayesian Analysis}. DOI: 10.1214/25-BA1564.
68+
#'
69+
#' @seealso [compute_sequentially()], [set_hyperparameters()]
70+
#'
71+
#' @examples
72+
#' # Basic SMC options for small problems
73+
#' basic_opts <- set_smc_options(
74+
#' n_particles = 100,
75+
#' n_particle_filters = 20,
76+
#' metric = "kendall"
77+
#' )
78+
#'
79+
#' # High-precision options for larger problems
80+
#' precise_opts <- set_smc_options(
81+
#' n_particles = 1000,
82+
#' n_particle_filters = 100,
83+
#' max_particle_filters = 500,
84+
#' resampling_threshold = 500,
85+
#' metric = "footrule",
86+
#' resampler = "systematic",
87+
#' verbose = TRUE,
88+
#' trace = TRUE
89+
#' )
90+
#'
91+
#' # Memory-efficient options
92+
#' efficient_opts <- set_smc_options(
93+
#' n_particles = 200,
94+
#' n_particle_filters = 30,
95+
#' trace = FALSE,
96+
#' trace_latent = FALSE,
97+
#' verbose = FALSE
98+
#' )
99+
#'
24100
#' @export
25101
#'
26102
set_smc_options <- function(

0 commit comments

Comments
 (0)