diff --git a/DESCRIPTION b/DESCRIPTION
index edcc18c..51e137e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -21,7 +21,9 @@ LinkingTo:
Rcpp,
RcppArmadillo
Imports:
- Rcpp
+ Rcpp,
+ Rdpack
+RdMacros: Rdpack
Depends:
R (>= 4.1.0)
Suggests:
diff --git a/R/compute_sequentially.R b/R/compute_sequentially.R
index 14123fc..4ae8b92 100644
--- a/R/compute_sequentially.R
+++ b/R/compute_sequentially.R
@@ -1,35 +1,72 @@
#' Compute the Bayesian Mallows model sequentially
#'
+#' This function performs sequential Bayesian inference for the Mallows model
+#' using the SMC² (Sequential Monte Carlo squared) algorithm. It can handle
+#' both complete/partial rankings and pairwise preference data that arrive
+#' sequentially over time.
#'
-#' @param data A dataframe containing partial rankings or pairwise preferences.
-#' If `data` contains complete or partial rankings, it must have the following
-#' columns:
+#' @param data A data frame containing ranking or preference data with temporal
+#' structure. The data frame must include `timepoint` and `user` columns.
#'
-#' \itemize{
-#' \item `timepoint`: a numeric vector denoting the timepoint, starting at 1.
-#' \item `user`: a vector identifying the user.
-#' \item `item1`: ranking of item 1.
-#' \item `item2`: ranking of item 2.
-#' \item etc.
-#' }
+#' For complete or partial rankings, additional columns should be:
+#' \itemize{
+#' \item `timepoint`: Numeric vector denoting the timepoint, starting at 1.
+#' \item `user`: Vector identifying the user providing the ranking.
+#' \item `item1`, `item2`, etc.: Rankings of items (use NA for missing items
+#' in partial rankings).
+#' }
#'
-#' If data contains pairwise preferences, it must have the following
-#' structure:
+#' For pairwise preferences, the structure should be:
+#' \itemize{
+#' \item `timepoint`: Numeric vector denoting the timepoint, starting at 1.
+#' \item `user`: Vector identifying the user providing the preference.
+#' \item `top_item`: Identifier for the preferred item.
+#' \item `bottom_item`: Identifier for the less preferred item.
+#' }
#'
-#' \itemize{
-#' \item `timepoint`: a numeric vector denoting the timepoint, starting at 1.
-#' \item `user`: a vector identifying the user.
-#' \item `top_item`: identifier for the preferred item.
-#' \item `bottom_item`: identifier for the dispreferred item.
-#' }
-#'
-#' @param hyperparameters A list returned from [set_hyperparameters()].
-#' @param smc_options A list returned from [set_smc_options()]
+#' @param hyperparameters A list of hyperparameters returned from
+#' \code{\link{set_hyperparameters}}. Defines the prior distributions for
+#' model parameters.
+#' @param smc_options A list of SMC algorithm options returned from
+#' \code{\link{set_smc_options}}. Controls the behavior of the particle
+#' filtering algorithm.
#' @param topological_sorts A list returned from
-#' [precompute_topological_sorts()]. Only used with preference data, and
-#' defaults to `NULL`.
+#' \code{\link{precompute_topological_sorts}}. Required when using pairwise
+#' preference data, otherwise should be \code{NULL} (default).
+#'
+#' @return An object of class \code{BayesMallowsSMC2} containing the results
+#' of the sequential inference, including parameter traces, log marginal
+#' likelihood estimates, and other algorithm diagnostics.
+#'
+#' @examples
+#' # Example with complete rankings
+#' set.seed(123)
+#' n_items <- 4
+#'
+#' # Create synthetic ranking data
+#' ranking_data <- data.frame(
+#' timepoint = c(1, 1, 2, 2),
+#' user = c(1, 2, 3, 4),
+#' item1 = c(1, 2, 1, 3),
+#' item2 = c(2, 1, 3, 1),
+#' item3 = c(3, 4, 2, 4),
+#' item4 = c(4, 3, 4, 2)
+#' )
+#'
+#' # Set up hyperparameters and options
+#' hyper <- set_hyperparameters(n_items = n_items)
+#' opts <- set_smc_options(n_particles = 100, verbose = FALSE)
+#'
+#' # Run sequential inference
+#' result <- compute_sequentially(
+#' data = ranking_data,
+#' hyperparameters = hyper,
+#' smc_options = opts
+#' )
+#'
+#' @references
+#' \insertRef{sorensen2025sequential}{BayesMallowsSMC2}
#'
-#' @return An object of class BayesMallowsSMC2.
#' @export
#'
compute_sequentially <- function(
@@ -76,7 +113,7 @@ compute_sequentially <- function(
user_timepoint_combinations <- unique(data[c("user", "timepoint")])
if(max(table(user_timepoint_combinations$user)) > 1) {
stop("Each user can only enter the pool once. Users appearing at multiple timepoints: ",
- paste(names(table(user_timepoint_combinations$user))[table(user_timepoint_combinations$user) > 1],
+ paste(names(table(user_timepoint_combinations$user))[table(user_timepoint_combinations$user) > 1],
collapse = ", "))
}
diff --git a/R/set_hyperparameters.R b/R/set_hyperparameters.R
index 9425524..ea53422 100644
--- a/R/set_hyperparameters.R
+++ b/R/set_hyperparameters.R
@@ -1,12 +1,41 @@
-#' Set hyperparameters
+#' Set hyperparameters for Bayesian Mallows model
#'
-#' @param n_items Integer defining the number of items.
-#' @param alpha_shape Shape of gamma prior for alpha.
-#' @param alpha_rate Rate of gamma prior for alpha.
-#' @param cluster_concentration Concentration parameter of Dirichlet distribution for cluster probabilities.
-#' @param n_clusters Integer defining the number of clusters.
+#' This function creates a list of hyperparameters for the Bayesian Mallows model
+#' used in sequential Monte Carlo inference. The hyperparameters define the prior
+#' distributions for the model parameters.
#'
-#' @return A list
+#' @param n_items Integer defining the number of items to be ranked. Must be a
+#' positive integer.
+#' @param alpha_shape Positive numeric value specifying the shape parameter of
+#' the gamma prior distribution for the scale parameter alpha. Default is 1.
+#' @param alpha_rate Positive numeric value specifying the rate parameter of
+#' the gamma prior distribution for the scale parameter alpha. Default is 0.5.
+#' @param cluster_concentration Positive numeric value specifying the
+#' concentration parameter of the Dirichlet distribution for cluster
+#' probabilities. Default is 10.
+#' @param n_clusters Positive integer defining the number of clusters in the
+#' mixture model. Default is 1.
+#'
+#' @return A list containing the hyperparameter values with elements:
+#' \item{n_items}{Number of items}
+#' \item{alpha_shape}{Shape parameter for alpha prior}
+#' \item{alpha_rate}{Rate parameter for alpha prior}
+#' \item{cluster_concentration}{Concentration parameter for cluster probabilities}
+#' \item{n_clusters}{Number of clusters}
+#'
+#' @examples
+#' # Basic hyperparameters for 5 items
+#' hyper <- set_hyperparameters(n_items = 5)
+#'
+#' # Custom hyperparameters with multiple clusters
+#' hyper <- set_hyperparameters(
+#' n_items = 10,
+#' alpha_shape = 2,
+#' alpha_rate = 1,
+#' cluster_concentration = 5,
+#' n_clusters = 3
+#' )
+#'
#' @export
#'
set_hyperparameters <- function(
diff --git a/R/set_smc_options.R b/R/set_smc_options.R
index 425e7d8..01ebcda 100644
--- a/R/set_smc_options.R
+++ b/R/set_smc_options.R
@@ -1,26 +1,62 @@
-#' Set SMC options
+#' Set SMC options for sequential inference
#'
-#' @param n_particles Number of particles
-#' @param n_particle_filters Initial number of particle filters for each
-#' particle
-#' @param max_particle_filters Maximum number of particle filters.
-#' @param resampling_threshold Effective sample size threshold for resampling
-#' @param doubling_threshold Threshold for particle filter doubling. If the
-#' acceptance rate of the rejuvenation step falls below this threshold, the
-#' number of particle filters is doubled. Defaults to 0.2.
-#' @param max_rejuvenation_steps Maximum number of rejuvenation steps. If the
-#' number of unique particles has not exceeded half the number of particles
-#' after this many steps, the rejuvenation is still stopped.
-#' @param metric Metric
-#' @param resampler resampler
-#' @param latent_rank_proposal latent rank proposal
-#' @param verbose Boolean
-#' @param trace Logical specifying whether to save static parameters at each
-#' timestep.
-#' @param trace_latent Logical specifying whether to sample and save one
-#' complete set of latent rankings for each particle and each timepoint.
+#' This function creates a list of options for the Sequential Monte Carlo (SMC²)
+#' algorithm used in Bayesian inference for the Mallows model. These options
+#' control the behavior of the particle filtering and resampling procedures.
#'
-#' @return A list
+#' @param n_particles Positive integer specifying the number of particles to use
+#' in the SMC algorithm. Default is 1000.
+#' @param n_particle_filters Positive integer specifying the initial number of
+#' particle filters for each particle. Default is 50.
+#' @param max_particle_filters Positive integer specifying the maximum number
+#' of particle filters allowed. Default is 10000.
+#' @param resampling_threshold Positive numeric value specifying the effective
+#' sample size threshold for triggering resampling. Default is n_particles/2.
+#' @param doubling_threshold Numeric value between 0 and 1 specifying the
+#' threshold for particle filter doubling. If the acceptance rate of the
+#' rejuvenation step falls below this threshold, the number of particle
+#' filters is doubled. Default is 0.2.
+#' @param max_rejuvenation_steps Positive integer specifying the maximum number
+#' of rejuvenation steps. If the number of unique particles has not exceeded
+#' half the number of particles after this many steps, the rejuvenation is
+#' stopped. Default is 20.
+#' @param metric Character string specifying the distance metric to use.
+#' Options include "footrule", "kendall", "spearman", "cayley", "hamming",
+#' and "ulam". Default is "footrule".
+#' @param resampler Character string specifying the resampling method.
+#' Options include "multinomial", "residual", "stratified", and "systematic".
+#' Default is "multinomial".
+#' @param latent_rank_proposal Character string specifying the proposal
+#' distribution for latent rankings. Default is "uniform".
+#' @param verbose Logical value indicating whether to print progress messages
+#' during computation. Default is FALSE.
+#' @param trace Logical value specifying whether to save static parameters at
+#' each timestep. Default is FALSE.
+#' @param trace_latent Logical value specifying whether to sample and save one
+#' complete set of latent rankings for each particle and each timepoint.
+#' Default is FALSE.
+#'
+#' @return A list containing all the SMC options with the specified values.
+#'
+#' @examples
+#' # Default SMC options
+#' opts <- set_smc_options()
+#'
+#' # Custom SMC options with fewer particles and Kendall distance
+#' opts <- set_smc_options(
+#' n_particles = 500,
+#' n_particle_filters = 25,
+#' metric = "kendall",
+#' verbose = TRUE
+#' )
+#'
+#' # Options for tracing parameters
+#' opts <- set_smc_options(
+#' n_particles = 100,
+#' trace = TRUE,
+#' trace_latent = TRUE
+#' )
+#'
#' @export
#'
set_smc_options <- function(
diff --git a/README.Rmd b/README.Rmd
index b02b155..1171598 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -1,5 +1,6 @@
---
output: github_document
+bibliography: inst/REFERENCES.bib
---
@@ -19,7 +20,9 @@ knitr::opts_chunk$set(
[](https://github.com/osorensen/BayesMallowsSMC2/actions/workflows/R-CMD-check.yaml)
-BayesMallowsSMC2 provides functions for performing sequential inference in the Bayesian Mallows model using the SMC$^{2}$ algorithm.
+BayesMallowsSMC2 provides functions for performing sequential inference in the Bayesian Mallows model using the SMC$^{2}$ algorithm. The package implements the methods described in @sorensen2025sequential.
+
+The Bayesian Mallows model is a probabilistic framework for analyzing ranking data, and this package extends it to handle sequential learning scenarios where rankings arrive over time. The SMC$^{2}$ (Sequential Monte Carlo squared) algorithm enables efficient Bayesian inference by combining particle filtering with MCMC methods.
## Installation
@@ -32,6 +35,17 @@ devtools::install_github("osorensen/BayesMallowsSMC2")
## Usage
-This package is under development, and is not yet well documented. For examples on how to use it, see the code in the OSF repository https://osf.io/pquk4/.
+This package implements sequential Bayesian inference for ranking data using the Mallows model. The main function is `compute_sequentially()`, which performs SMC$^{2}$ inference as rankings arrive over time.
+
+```r
+library(BayesMallowsSMC2)
+
+# Example usage (see vignettes for detailed examples)
+# result <- compute_sequentially(data, hyperparameters, smc_options)
+```
+
+For detailed examples and reproducible code, see the OSF repository at https://osf.io/pquk4/.
+
+## References
diff --git a/README.md b/README.md
index 935ceb4..6070770 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,15 @@
BayesMallowsSMC2 provides functions for performing sequential inference
-in the Bayesian Mallows model using the SMC$^{2}$ algorithm.
+in the Bayesian Mallows model using the SMC$^{2}$ algorithm. The package
+implements the methods described in Sørensen, Frigessi, and Scheel
+(2025).
+
+The Bayesian Mallows model is a probabilistic framework for analyzing
+ranking data, and this package extends it to handle sequential learning
+scenarios where rankings arrive over time. The SMC$^{2}$ (Sequential
+Monte Carlo squared) algorithm enables efficient Bayesian inference by
+combining particle filtering with MCMC methods.
## Installation
@@ -23,6 +31,31 @@ devtools::install_github("osorensen/BayesMallowsSMC2")
## Usage
-This package is under development, and is not yet well documented. For
-examples on how to use it, see the code in the OSF repository
+This package implements sequential Bayesian inference for ranking data
+using the Mallows model. The main function is `compute_sequentially()`,
+which performs SMC$^{2}$ inference as rankings arrive over time.
+
+``` r
+library(BayesMallowsSMC2)
+
+# Example usage (see vignettes for detailed examples)
+# result <- compute_sequentially(data, hyperparameters, smc_options)
+```
+
+For detailed examples and reproducible code, see the OSF repository at