Skip to content

Commit 65518a8

Browse files
authored
Merge pull request #44 from osorensen/update-docs
Update docs
2 parents 55d2530 + da21639 commit 65518a8

File tree

10 files changed

+367
-110
lines changed

10 files changed

+367
-110
lines changed

DESCRIPTION

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ LinkingTo:
2121
Rcpp,
2222
RcppArmadillo
2323
Imports:
24-
Rcpp
24+
Rcpp,
25+
Rdpack
26+
RdMacros: Rdpack
2527
Depends:
2628
R (>= 4.1.0)
2729
Suggests:

R/compute_sequentially.R

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,72 @@
11
#' Compute the Bayesian Mallows model sequentially
22
#'
3+
#' This function performs sequential Bayesian inference for the Mallows model
4+
#' using the SMC² (Sequential Monte Carlo squared) algorithm. It can handle
5+
#' both complete/partial rankings and pairwise preference data that arrive
6+
#' sequentially over time.
37
#'
4-
#' @param data A dataframe containing partial rankings or pairwise preferences.
5-
#' If `data` contains complete or partial rankings, it must have the following
6-
#' columns:
8+
#' @param data A data frame containing ranking or preference data with temporal
9+
#' structure. The data frame must include `timepoint` and `user` columns.
710
#'
8-
#' \itemize{
9-
#' \item `timepoint`: a numeric vector denoting the timepoint, starting at 1.
10-
#' \item `user`: a vector identifying the user.
11-
#' \item `item1`: ranking of item 1.
12-
#' \item `item2`: ranking of item 2.
13-
#' \item etc.
14-
#' }
11+
#' For complete or partial rankings, additional columns should be:
12+
#' \itemize{
13+
#' \item `timepoint`: Numeric vector denoting the timepoint, starting at 1.
14+
#' \item `user`: Vector identifying the user providing the ranking.
15+
#' \item `item1`, `item2`, etc.: Rankings of items (use NA for missing items
16+
#' in partial rankings).
17+
#' }
1518
#'
16-
#' If data contains pairwise preferences, it must have the following
17-
#' structure:
19+
#' For pairwise preferences, the structure should be:
20+
#' \itemize{
21+
#' \item `timepoint`: Numeric vector denoting the timepoint, starting at 1.
22+
#' \item `user`: Vector identifying the user providing the preference.
23+
#' \item `top_item`: Identifier for the preferred item.
24+
#' \item `bottom_item`: Identifier for the less preferred item.
25+
#' }
1826
#'
19-
#' \itemize{
20-
#' \item `timepoint`: a numeric vector denoting the timepoint, starting at 1.
21-
#' \item `user`: a vector identifying the user.
22-
#' \item `top_item`: identifier for the preferred item.
23-
#' \item `bottom_item`: identifier for the dispreferred item.
24-
#' }
25-
#'
26-
#' @param hyperparameters A list returned from [set_hyperparameters()].
27-
#' @param smc_options A list returned from [set_smc_options()]
27+
#' @param hyperparameters A list of hyperparameters returned from
28+
#' \code{\link{set_hyperparameters}}. Defines the prior distributions for
29+
#' model parameters.
30+
#' @param smc_options A list of SMC algorithm options returned from
31+
#' \code{\link{set_smc_options}}. Controls the behavior of the particle
32+
#' filtering algorithm.
2833
#' @param topological_sorts A list returned from
29-
#' [precompute_topological_sorts()]. Only used with preference data, and
30-
#' defaults to `NULL`.
34+
#' \code{\link{precompute_topological_sorts}}. Required when using pairwise
35+
#' preference data, otherwise should be \code{NULL} (default).
36+
#'
37+
#' @return An object of class \code{BayesMallowsSMC2} containing the results
38+
#' of the sequential inference, including parameter traces, log marginal
39+
#' likelihood estimates, and other algorithm diagnostics.
40+
#'
41+
#' @examples
42+
#' # Example with complete rankings
43+
#' set.seed(123)
44+
#' n_items <- 4
45+
#'
46+
#' # Create synthetic ranking data
47+
#' ranking_data <- data.frame(
48+
#' timepoint = c(1, 1, 2, 2),
49+
#' user = c(1, 2, 3, 4),
50+
#' item1 = c(1, 2, 1, 3),
51+
#' item2 = c(2, 1, 3, 1),
52+
#' item3 = c(3, 4, 2, 4),
53+
#' item4 = c(4, 3, 4, 2)
54+
#' )
55+
#'
56+
#' # Set up hyperparameters and options
57+
#' hyper <- set_hyperparameters(n_items = n_items)
58+
#' opts <- set_smc_options(n_particles = 100, verbose = FALSE)
59+
#'
60+
#' # Run sequential inference
61+
#' result <- compute_sequentially(
62+
#' data = ranking_data,
63+
#' hyperparameters = hyper,
64+
#' smc_options = opts
65+
#' )
66+
#'
67+
#' @references
68+
#' \insertRef{sorensen2025sequential}{BayesMallowsSMC2}
3169
#'
32-
#' @return An object of class BayesMallowsSMC2.
3370
#' @export
3471
#'
3572
compute_sequentially <- function(
@@ -76,7 +113,7 @@ compute_sequentially <- function(
76113
user_timepoint_combinations <- unique(data[c("user", "timepoint")])
77114
if(max(table(user_timepoint_combinations$user)) > 1) {
78115
stop("Each user can only enter the pool once. Users appearing at multiple timepoints: ",
79-
paste(names(table(user_timepoint_combinations$user))[table(user_timepoint_combinations$user) > 1],
116+
paste(names(table(user_timepoint_combinations$user))[table(user_timepoint_combinations$user) > 1],
80117
collapse = ", "))
81118
}
82119

R/set_hyperparameters.R

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,41 @@
1-
#' Set hyperparameters
1+
#' Set hyperparameters for Bayesian Mallows model
22
#'
3-
#' @param n_items Integer defining the number of items.
4-
#' @param alpha_shape Shape of gamma prior for alpha.
5-
#' @param alpha_rate Rate of gamma prior for alpha.
6-
#' @param cluster_concentration Concentration parameter of Dirichlet distribution for cluster probabilities.
7-
#' @param n_clusters Integer defining the number of clusters.
3+
#' This function creates a list of hyperparameters for the Bayesian Mallows model
4+
#' used in sequential Monte Carlo inference. The hyperparameters define the prior
5+
#' distributions for the model parameters.
86
#'
9-
#' @return A list
7+
#' @param n_items Integer defining the number of items to be ranked. Must be a
8+
#' positive integer.
9+
#' @param alpha_shape Positive numeric value specifying the shape parameter of
10+
#' the gamma prior distribution for the scale parameter alpha. Default is 1.
11+
#' @param alpha_rate Positive numeric value specifying the rate parameter of
12+
#' the gamma prior distribution for the scale parameter alpha. Default is 0.5.
13+
#' @param cluster_concentration Positive numeric value specifying the
14+
#' concentration parameter of the Dirichlet distribution for cluster
15+
#' probabilities. Default is 10.
16+
#' @param n_clusters Positive integer defining the number of clusters in the
17+
#' mixture model. Default is 1.
18+
#'
19+
#' @return A list containing the hyperparameter values with elements:
20+
#' \item{n_items}{Number of items}
21+
#' \item{alpha_shape}{Shape parameter for alpha prior}
22+
#' \item{alpha_rate}{Rate parameter for alpha prior}
23+
#' \item{cluster_concentration}{Concentration parameter for cluster probabilities}
24+
#' \item{n_clusters}{Number of clusters}
25+
#'
26+
#' @examples
27+
#' # Basic hyperparameters for 5 items
28+
#' hyper <- set_hyperparameters(n_items = 5)
29+
#'
30+
#' # Custom hyperparameters with multiple clusters
31+
#' hyper <- set_hyperparameters(
32+
#' n_items = 10,
33+
#' alpha_shape = 2,
34+
#' alpha_rate = 1,
35+
#' cluster_concentration = 5,
36+
#' n_clusters = 3
37+
#' )
38+
#'
1039
#' @export
1140
#'
1241
set_hyperparameters <- function(

R/set_smc_options.R

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,62 @@
1-
#' Set SMC options
1+
#' Set SMC options for sequential inference
22
#'
3-
#' @param n_particles Number of particles
4-
#' @param n_particle_filters Initial number of particle filters for each
5-
#' particle
6-
#' @param max_particle_filters Maximum number of particle filters.
7-
#' @param resampling_threshold Effective sample size threshold for resampling
8-
#' @param doubling_threshold Threshold for particle filter doubling. If the
9-
#' acceptance rate of the rejuvenation step falls below this threshold, the
10-
#' number of particle filters is doubled. Defaults to 0.2.
11-
#' @param max_rejuvenation_steps Maximum number of rejuvenation steps. If the
12-
#' number of unique particles has not exceeded half the number of particles
13-
#' after this many steps, the rejuvenation is still stopped.
14-
#' @param metric Metric
15-
#' @param resampler resampler
16-
#' @param latent_rank_proposal latent rank proposal
17-
#' @param verbose Boolean
18-
#' @param trace Logical specifying whether to save static parameters at each
19-
#' timestep.
20-
#' @param trace_latent Logical specifying whether to sample and save one
21-
#' complete set of latent rankings for each particle and each timepoint.
3+
#' This function creates a list of options for the Sequential Monte Carlo (SMC²)
4+
#' algorithm used in Bayesian inference for the Mallows model. These options
5+
#' control the behavior of the particle filtering and resampling procedures.
226
#'
23-
#' @return A list
7+
#' @param n_particles Positive integer specifying the number of particles to use
8+
#' in the SMC algorithm. Default is 1000.
9+
#' @param n_particle_filters Positive integer specifying the initial number of
10+
#' particle filters for each particle. Default is 50.
11+
#' @param max_particle_filters Positive integer specifying the maximum number
12+
#' of particle filters allowed. Default is 10000.
13+
#' @param resampling_threshold Positive numeric value specifying the effective
14+
#' sample size threshold for triggering resampling. Default is n_particles/2.
15+
#' @param doubling_threshold Numeric value between 0 and 1 specifying the
16+
#' threshold for particle filter doubling. If the acceptance rate of the
17+
#' rejuvenation step falls below this threshold, the number of particle
18+
#' filters is doubled. Default is 0.2.
19+
#' @param max_rejuvenation_steps Positive integer specifying the maximum number
20+
#' of rejuvenation steps. If the number of unique particles has not exceeded
21+
#' half the number of particles after this many steps, the rejuvenation is
22+
#' stopped. Default is 20.
23+
#' @param metric Character string specifying the distance metric to use.
24+
#' Options include "footrule", "kendall", "spearman", "cayley", "hamming",
25+
#' and "ulam". Default is "footrule".
26+
#' @param resampler Character string specifying the resampling method.
27+
#' Options include "multinomial", "residual", "stratified", and "systematic".
28+
#' Default is "multinomial".
29+
#' @param latent_rank_proposal Character string specifying the proposal
30+
#' distribution for latent rankings. Default is "uniform".
31+
#' @param verbose Logical value indicating whether to print progress messages
32+
#' during computation. Default is FALSE.
33+
#' @param trace Logical value specifying whether to save static parameters at
34+
#' each timestep. Default is FALSE.
35+
#' @param trace_latent Logical value specifying whether to sample and save one
36+
#' complete set of latent rankings for each particle and each timepoint.
37+
#' Default is FALSE.
38+
#'
39+
#' @return A list containing all the SMC options with the specified values.
40+
#'
41+
#' @examples
42+
#' # Default SMC options
43+
#' opts <- set_smc_options()
44+
#'
45+
#' # Custom SMC options with fewer particles and Kendall distance
46+
#' opts <- set_smc_options(
47+
#' n_particles = 500,
48+
#' n_particle_filters = 25,
49+
#' metric = "kendall",
50+
#' verbose = TRUE
51+
#' )
52+
#'
53+
#' # Options for tracing parameters
54+
#' opts <- set_smc_options(
55+
#' n_particles = 100,
56+
#' trace = TRUE,
57+
#' trace_latent = TRUE
58+
#' )
59+
#'
2460
#' @export
2561
#'
2662
set_smc_options <- function(

README.Rmd

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22
output: github_document
3+
bibliography: inst/REFERENCES.bib
34
---
45

56
<!-- README.md is generated from README.Rmd. Please edit that file -->
@@ -19,7 +20,9 @@ knitr::opts_chunk$set(
1920
[![R-CMD-check](https://github.com/osorensen/BayesMallowsSMC2/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/osorensen/BayesMallowsSMC2/actions/workflows/R-CMD-check.yaml)
2021
<!-- badges: end -->
2122

22-
BayesMallowsSMC2 provides functions for performing sequential inference in the Bayesian Mallows model using the SMC$^{2}$ algorithm.
23+
BayesMallowsSMC2 provides functions for performing sequential inference in the Bayesian Mallows model using the SMC$^{2}$ algorithm. The package implements the methods described in @sorensen2025sequential.
24+
25+
The Bayesian Mallows model is a probabilistic framework for analyzing ranking data, and this package extends it to handle sequential learning scenarios where rankings arrive over time. The SMC$^{2}$ (Sequential Monte Carlo squared) algorithm enables efficient Bayesian inference by combining particle filtering with MCMC methods.
2326

2427
## Installation
2528

@@ -32,6 +35,17 @@ devtools::install_github("osorensen/BayesMallowsSMC2")
3235

3336
## Usage
3437

35-
This package is under development, and is not yet well documented. For examples on how to use it, see the code in the OSF repository https://osf.io/pquk4/.
38+
This package implements sequential Bayesian inference for ranking data using the Mallows model. The main function is `compute_sequentially()`, which performs SMC$^{2}$ inference as rankings arrive over time.
39+
40+
```r
41+
library(BayesMallowsSMC2)
42+
43+
# Example usage (see vignettes for detailed examples)
44+
# result <- compute_sequentially(data, hyperparameters, smc_options)
45+
```
46+
47+
For detailed examples and reproducible code, see the OSF repository at https://osf.io/pquk4/.
48+
49+
## References
3650

3751

README.md

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,15 @@
99
<!-- badges: end -->
1010

1111
BayesMallowsSMC2 provides functions for performing sequential inference
12-
in the Bayesian Mallows model using the SMC$^{2}$ algorithm.
12+
in the Bayesian Mallows model using the SMC$^{2}$ algorithm. The package
13+
implements the methods described in Sørensen, Frigessi, and Scheel
14+
(2025).
15+
16+
The Bayesian Mallows model is a probabilistic framework for analyzing
17+
ranking data, and this package extends it to handle sequential learning
18+
scenarios where rankings arrive over time. The SMC$^{2}$ (Sequential
19+
Monte Carlo squared) algorithm enables efficient Bayesian inference by
20+
combining particle filtering with MCMC methods.
1321

1422
## Installation
1523

@@ -23,6 +31,31 @@ devtools::install_github("osorensen/BayesMallowsSMC2")
2331

2432
## Usage
2533

26-
This package is under development, and is not yet well documented. For
27-
examples on how to use it, see the code in the OSF repository
34+
This package implements sequential Bayesian inference for ranking data
35+
using the Mallows model. The main function is `compute_sequentially()`,
36+
which performs SMC$^{2}$ inference as rankings arrive over time.
37+
38+
``` r
39+
library(BayesMallowsSMC2)
40+
41+
# Example usage (see vignettes for detailed examples)
42+
# result <- compute_sequentially(data, hyperparameters, smc_options)
43+
```
44+
45+
For detailed examples and reproducible code, see the OSF repository at
2846
<https://osf.io/pquk4/>.
47+
48+
## References
49+
50+
<div id="refs" class="references csl-bib-body hanging-indent"
51+
entry-spacing="0">
52+
53+
<div id="ref-sorensen2025sequential" class="csl-entry">
54+
55+
Sørensen, Øystein, Arnoldo Frigessi, and Ida Scheel. 2025. “Sequential
56+
Rank and Preference Learning with the Bayesian Mallows Model.” *Bayesian
57+
Analysis*. <https://doi.org/10.1214/25-BA1564>.
58+
59+
</div>
60+
61+
</div>

inst/REFERENCES.bib

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
@article{sorensen2025sequential,
2+
title={Sequential Rank and Preference Learning with the Bayesian Mallows Model},
3+
author={S{\o}rensen, {\O}ystein and Frigessi, Arnoldo and Scheel, Ida},
4+
journal={Bayesian Analysis},
5+
year={2025},
6+
publisher={International Society for Bayesian Analysis},
7+
doi={10.1214/25-BA1564},
8+
url={https://projecteuclid.org/journals/bayesian-analysis/advance-publication/Sequential-Rank-and-Preference-Learning-with-the-Bayesian-Mallows-Model/10.1214/25-BA1564.full}
9+
}

0 commit comments

Comments
 (0)