|
1 | 1 | #' Compute the Bayesian Mallows model sequentially |
2 | 2 | #' |
| 3 | +#' This function performs sequential Bayesian inference for the Mallows model |
| 4 | +#' using the SMC² (Sequential Monte Carlo squared) algorithm. It can handle |
| 5 | +#' both complete/partial rankings and pairwise preference data that arrive |
| 6 | +#' sequentially over time. |
3 | 7 | #' |
4 | | -#' @param data A dataframe containing partial rankings or pairwise preferences. |
5 | | -#' If `data` contains complete or partial rankings, it must have the following |
6 | | -#' columns: |
| 8 | +#' @param data A data frame containing ranking or preference data with temporal |
| 9 | +#' structure. The data frame must include `timepoint` and `user` columns. |
7 | 10 | #' |
8 | | -#' \itemize{ |
9 | | -#' \item `timepoint`: a numeric vector denoting the timepoint, starting at 1. |
10 | | -#' \item `user`: a vector identifying the user. |
11 | | -#' \item `item1`: ranking of item 1. |
12 | | -#' \item `item2`: ranking of item 2. |
13 | | -#' \item etc. |
14 | | -#' } |
| 11 | +#' For complete or partial rankings, additional columns should be: |
| 12 | +#' \itemize{ |
| 13 | +#' \item `timepoint`: Numeric vector denoting the timepoint, starting at 1. |
| 14 | +#' \item `user`: Vector identifying the user providing the ranking. |
| 15 | +#' \item `item1`, `item2`, etc.: Rankings of items (use NA for missing items |
| 16 | +#' in partial rankings). |
| 17 | +#' } |
15 | 18 | #' |
16 | | -#' If data contains pairwise preferences, it must have the following |
17 | | -#' structure: |
| 19 | +#' For pairwise preferences, the structure should be: |
| 20 | +#' \itemize{ |
| 21 | +#' \item `timepoint`: Numeric vector denoting the timepoint, starting at 1. |
| 22 | +#' \item `user`: Vector identifying the user providing the preference. |
| 23 | +#' \item `top_item`: Identifier for the preferred item. |
| 24 | +#' \item `bottom_item`: Identifier for the less preferred item. |
| 25 | +#' } |
18 | 26 | #' |
19 | | -#' \itemize{ |
20 | | -#' \item `timepoint`: a numeric vector denoting the timepoint, starting at 1. |
21 | | -#' \item `user`: a vector identifying the user. |
22 | | -#' \item `top_item`: identifier for the preferred item. |
23 | | -#' \item `bottom_item`: identifier for the dispreferred item. |
24 | | -#' } |
25 | | -#' |
26 | | -#' @param hyperparameters A list returned from [set_hyperparameters()]. |
27 | | -#' @param smc_options A list returned from [set_smc_options()] |
| 27 | +#' @param hyperparameters A list of hyperparameters returned from |
| 28 | +#' \code{\link{set_hyperparameters}}. Defines the prior distributions for |
| 29 | +#' model parameters. |
| 30 | +#' @param smc_options A list of SMC algorithm options returned from |
| 31 | +#' \code{\link{set_smc_options}}. Controls the behavior of the particle |
| 32 | +#' filtering algorithm. |
28 | 33 | #' @param topological_sorts A list returned from |
29 | | -#' [precompute_topological_sorts()]. Only used with preference data, and |
30 | | -#' defaults to `NULL`. |
| 34 | +#' \code{\link{precompute_topological_sorts}}. Required when using pairwise |
| 35 | +#' preference data, otherwise should be \code{NULL} (default). |
| 36 | +#' |
| 37 | +#' @return An object of class \code{BayesMallowsSMC2} containing the results |
| 38 | +#' of the sequential inference, including parameter traces, log marginal |
| 39 | +#' likelihood estimates, and other algorithm diagnostics. |
| 40 | +#' |
| 41 | +#' @examples |
| 42 | +#' # Example with complete rankings |
| 43 | +#' set.seed(123) |
| 44 | +#' n_items <- 4 |
| 45 | +#' |
| 46 | +#' # Create synthetic ranking data |
| 47 | +#' ranking_data <- data.frame( |
| 48 | +#' timepoint = c(1, 1, 2, 2), |
| 49 | +#' user = c(1, 2, 3, 4), |
| 50 | +#' item1 = c(1, 2, 1, 3), |
| 51 | +#' item2 = c(2, 1, 3, 1), |
| 52 | +#' item3 = c(3, 4, 2, 4), |
| 53 | +#' item4 = c(4, 3, 4, 2) |
| 54 | +#' ) |
| 55 | +#' |
| 56 | +#' # Set up hyperparameters and options |
| 57 | +#' hyper <- set_hyperparameters(n_items = n_items) |
| 58 | +#' opts <- set_smc_options(n_particles = 100, verbose = FALSE) |
| 59 | +#' |
| 60 | +#' # Run sequential inference |
| 61 | +#' result <- compute_sequentially( |
| 62 | +#' data = ranking_data, |
| 63 | +#' hyperparameters = hyper, |
| 64 | +#' smc_options = opts |
| 65 | +#' ) |
| 66 | +#' |
| 67 | +#' @references |
| 68 | +#' \insertRef{sorensen2025sequential}{BayesMallowsSMC2} |
31 | 69 | #' |
32 | | -#' @return An object of class BayesMallowsSMC2. |
33 | 70 | #' @export |
34 | 71 | #' |
35 | 72 | compute_sequentially <- function( |
@@ -76,7 +113,7 @@ compute_sequentially <- function( |
76 | 113 | user_timepoint_combinations <- unique(data[c("user", "timepoint")]) |
77 | 114 | if(max(table(user_timepoint_combinations$user)) > 1) { |
78 | 115 | stop("Each user can only enter the pool once. Users appearing at multiple timepoints: ", |
79 | | - paste(names(table(user_timepoint_combinations$user))[table(user_timepoint_combinations$user) > 1], |
| 116 | + paste(names(table(user_timepoint_combinations$user))[table(user_timepoint_combinations$user) > 1], |
80 | 117 | collapse = ", ")) |
81 | 118 | } |
82 | 119 |
|
|
0 commit comments