Skip to content

Commit 04e1022

Browse files
authored
Merge pull request #27 from osorensen/sorts-issue-26
Topological sorts in memory rather than in files
2 parents 7d4e897 + c5e38e3 commit 04e1022

15 files changed

+247
-170
lines changed

R/RcppExports.R

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,11 @@
1111
#' must have two columns, the first of which represents the preferred item
1212
#' and the second of which represents the disfavored item.
1313
#' @param n_items An integer specifying the number of items to sort.
14-
#' @param output_directory A string specifying the directory where the output files will be saved.
1514
#' @param save_frac Number between 0 and 1 specifying which fraction of sorts to save.
1615
#'
1716
#' @details
1817
#' The function generates all possible topological sorts for the provided preference matrix
19-
#' and saves approximately `save_frac` of the sorts as binary file in the specified output directory.
20-
#' The output files are named sequentially as `sort0.bin`, `sort1.bin`, and so on.
18+
#' and saves approximately `save_frac` of the sorts in a matrix which is returned.
2119
#'
2220
#' @return This function returns the number of topological sorts.
2321
#'
@@ -27,19 +25,33 @@
2725
#' prefs <- pairwise_preferences[
2826
#' pairwise_preferences$user == 1, c("top_item", "bottom_item"), drop = FALSE]
2927
#'
30-
#' # Count the number of sorts without saving them.
31-
#' precompute_topological_sorts(
28+
#' # Generate all topological sorts, but don't save them:
29+
#' sorts <- precompute_topological_sorts(
3230
#' prefs = as.matrix(prefs),
3331
#' n_items = 5,
34-
#' output_directory = tempdir(),
3532
#' save_frac = 0
3633
#' )
34+
#' # Number of sorts
35+
#' sorts$sort_count
36+
#' # Empty matrix
37+
#' sorts$sort_matrix
3738
#'
38-
precompute_topological_sorts <- function(prefs, n_items, output_directory, save_frac) {
39-
.Call(`_BayesMallowsSMC2_precompute_topological_sorts`, prefs, n_items, output_directory, save_frac)
39+
#' # Generate all topological sorts and save them:
40+
#' sorts <- precompute_topological_sorts(
41+
#' prefs = as.matrix(prefs),
42+
#' n_items = 5,
43+
#' save_frac = 1
44+
#' )
45+
#' # Number of sorts
46+
#' sorts$sort_count
47+
#' # Matrix with all of them
48+
#' sorts$sort_matrix
49+
#'
50+
precompute_topological_sorts <- function(prefs, n_items, save_frac) {
51+
.Call(`_BayesMallowsSMC2_precompute_topological_sorts`, prefs, n_items, save_frac)
4052
}
4153

42-
run_smc <- function(input_timeseries, input_prior, input_options) {
43-
.Call(`_BayesMallowsSMC2_run_smc`, input_timeseries, input_prior, input_options)
54+
run_smc <- function(input_timeseries, input_prior, input_options, input_sort_matrices, input_sort_counts) {
55+
.Call(`_BayesMallowsSMC2_run_smc`, input_timeseries, input_prior, input_options, input_sort_matrices, input_sort_counts)
4456
}
4557

R/compute_sequentially.R

Lines changed: 17 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,9 @@
2525
#'
2626
#' @param hyperparameters A list returned from [set_hyperparameters()].
2727
#' @param smc_options A list returned from [set_smc_options()]
28-
#' @param topological_sorts_directory Path to a directory where precomputed
29-
#' topological sorts can be found. Must contain subdirectories with names
30-
#' `user1`, `user2`, etc., exactly matching the `user` column of `data`. See
31-
#' [precompute_topological_sorts()]. Defaults to `NULL`, but has to be
32-
#' provided when data contain pairwise preferences.
33-
#'
34-
#' @param num_topological_sorts Integer vector containing the number of
35-
#' topological sorts for each user.
36-
#' @param file_count Integer vector containing the number of files with
37-
#' topological sorts for each user.
28+
#' @param topological_sorts A list returned from
29+
#' [precompute_topological_sorts()]. Only used with preference data, and
30+
#' defaults to `NULL`.
3831
#'
3932
#' @return An object
4033
#' @export
@@ -43,9 +36,7 @@ compute_sequentially <- function(
4336
data,
4437
hyperparameters = set_hyperparameters(),
4538
smc_options = set_smc_options(),
46-
topological_sorts_directory = NULL,
47-
num_topological_sorts = NULL,
48-
file_count = NULL
39+
topological_sorts = NULL
4940
){
5041
rank_columns <- grepl("item[0-9]+", colnames(data))
5142
preference_columns <- grepl("top\\_item|bottom\\_item", colnames(data))
@@ -60,34 +51,33 @@ compute_sequentially <- function(
6051
} else {
6152
attr(input_timeseries, "type") <- "complete rankings"
6253
}
54+
sort_matrices <- sort_counts <- list()
6355
} else if(sum(preference_columns) == 2) {
64-
if(is.null(topological_sorts_directory)) {
65-
stop("topological_sorts_directory must be provided with preference data.")
66-
}
67-
if(is.null(num_topological_sorts)) {
68-
stop("num_topological_sorts must be provided with preference data.")
69-
}
70-
if(is.null(file_count)) {
71-
stop("file_count must be provided with preference data.")
56+
if(is.null(topological_sorts)) {
57+
stop("topological_sorts must be provided with preference data.")
7258
}
7359
input_timeseries <- split(data, f = ~ timepoint) |>
7460
lapply(split, f = ~ user) |>
7561
lapply(function(x) lapply(x, function(y) as.matrix(y[preference_columns])))
7662
attr(input_timeseries, "type") <- "pairwise preferences"
77-
attr(input_timeseries, "topological_sorts_directory") <- topological_sorts_directory
78-
names(num_topological_sorts) <- names(file_count) <- as.character(lapply(input_timeseries, names))
79-
attr(input_timeseries, "num_topological_sorts") <- num_topological_sorts
80-
attr(input_timeseries, "file_count") <- file_count
63+
64+
sort_matrices <- lapply(topological_sorts, function(x) {
65+
lapply(x, function(y) y$sort_matrix)
66+
})
67+
68+
sort_counts <- lapply(topological_sorts, function(x) {
69+
lapply(x, function(y) y$sort_count)
70+
})
8171
} else {
8272
stop("Something wrong with data")
8373
}
8474

85-
attr(input_timeseries, "updated_users") <- FALSE
8675
if(max(table(data$user)) > 1 &&
8776
attr(input_timeseries, "type") != "pairwise preferences") {
8877
stop("Updated users not supported.")
8978
}
9079

91-
ret <- run_smc(input_timeseries, hyperparameters, smc_options)
80+
ret <- run_smc(input_timeseries, hyperparameters, smc_options,
81+
sort_matrices, sort_counts)
9282
}
9383

man/compute_sequentially.Rd

Lines changed: 4 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/precompute_topological_sorts.Rd

Lines changed: 19 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/RcppExports.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,36 +12,37 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
1212
#endif
1313

1414
// precompute_topological_sorts
15-
long long int precompute_topological_sorts(arma::umat prefs, int n_items, std::string output_directory, double save_frac);
16-
RcppExport SEXP _BayesMallowsSMC2_precompute_topological_sorts(SEXP prefsSEXP, SEXP n_itemsSEXP, SEXP output_directorySEXP, SEXP save_fracSEXP) {
15+
Rcpp::List precompute_topological_sorts(arma::umat prefs, int n_items, double save_frac);
16+
RcppExport SEXP _BayesMallowsSMC2_precompute_topological_sorts(SEXP prefsSEXP, SEXP n_itemsSEXP, SEXP save_fracSEXP) {
1717
BEGIN_RCPP
1818
Rcpp::RObject rcpp_result_gen;
1919
Rcpp::RNGScope rcpp_rngScope_gen;
2020
Rcpp::traits::input_parameter< arma::umat >::type prefs(prefsSEXP);
2121
Rcpp::traits::input_parameter< int >::type n_items(n_itemsSEXP);
22-
Rcpp::traits::input_parameter< std::string >::type output_directory(output_directorySEXP);
2322
Rcpp::traits::input_parameter< double >::type save_frac(save_fracSEXP);
24-
rcpp_result_gen = Rcpp::wrap(precompute_topological_sorts(prefs, n_items, output_directory, save_frac));
23+
rcpp_result_gen = Rcpp::wrap(precompute_topological_sorts(prefs, n_items, save_frac));
2524
return rcpp_result_gen;
2625
END_RCPP
2726
}
2827
// run_smc
29-
Rcpp::List run_smc(Rcpp::List input_timeseries, Rcpp::List input_prior, Rcpp::List input_options);
30-
RcppExport SEXP _BayesMallowsSMC2_run_smc(SEXP input_timeseriesSEXP, SEXP input_priorSEXP, SEXP input_optionsSEXP) {
28+
Rcpp::List run_smc(Rcpp::List input_timeseries, Rcpp::List input_prior, Rcpp::List input_options, Rcpp::List input_sort_matrices, Rcpp::List input_sort_counts);
29+
RcppExport SEXP _BayesMallowsSMC2_run_smc(SEXP input_timeseriesSEXP, SEXP input_priorSEXP, SEXP input_optionsSEXP, SEXP input_sort_matricesSEXP, SEXP input_sort_countsSEXP) {
3130
BEGIN_RCPP
3231
Rcpp::RObject rcpp_result_gen;
3332
Rcpp::RNGScope rcpp_rngScope_gen;
3433
Rcpp::traits::input_parameter< Rcpp::List >::type input_timeseries(input_timeseriesSEXP);
3534
Rcpp::traits::input_parameter< Rcpp::List >::type input_prior(input_priorSEXP);
3635
Rcpp::traits::input_parameter< Rcpp::List >::type input_options(input_optionsSEXP);
37-
rcpp_result_gen = Rcpp::wrap(run_smc(input_timeseries, input_prior, input_options));
36+
Rcpp::traits::input_parameter< Rcpp::List >::type input_sort_matrices(input_sort_matricesSEXP);
37+
Rcpp::traits::input_parameter< Rcpp::List >::type input_sort_counts(input_sort_countsSEXP);
38+
rcpp_result_gen = Rcpp::wrap(run_smc(input_timeseries, input_prior, input_options, input_sort_matrices, input_sort_counts));
3839
return rcpp_result_gen;
3940
END_RCPP
4041
}
4142

4243
static const R_CallMethodDef CallEntries[] = {
43-
{"_BayesMallowsSMC2_precompute_topological_sorts", (DL_FUNC) &_BayesMallowsSMC2_precompute_topological_sorts, 4},
44-
{"_BayesMallowsSMC2_run_smc", (DL_FUNC) &_BayesMallowsSMC2_run_smc, 3},
44+
{"_BayesMallowsSMC2_precompute_topological_sorts", (DL_FUNC) &_BayesMallowsSMC2_precompute_topological_sorts, 3},
45+
{"_BayesMallowsSMC2_run_smc", (DL_FUNC) &_BayesMallowsSMC2_run_smc, 5},
4546
{NULL, NULL, 0}
4647
};
4748

0 commit comments

Comments
 (0)