1+ #include " particle.h"
2+ #include " misc.h"
3+ #include " sample_latent_rankings.h"
14#include < RcppArmadillo.h>
25#include < algorithm>
36#include < vector>
4- #include " misc.h"
5- #include " particle.h"
6- #include " sample_latent_rankings.h"
77
88using namespace arma ;
99
1010using namespace arma ;
1111
12- StaticParameters::StaticParameters (const vec& alpha, const umat& rho, const vec& tau) :
13- alpha { alpha }, rho { rho }, tau { tau } {}
14-
15- StaticParameters::StaticParameters (const Prior& prior) :
16- alpha { Rcpp::rgamma (prior.n_clusters , prior.alpha_shape , 1 / prior.alpha_rate ) },
17- rho { umat (prior.n_items , prior.n_clusters ) },
18- tau { normalise (Rcpp::as<vec>(Rcpp::rgamma (prior.n_clusters , prior.cluster_concentration , 1 )), 1 ) }
19- {
20- rho.each_col ([&prior](uvec& a){
21- a = Rcpp::as<uvec>(Rcpp::sample (prior.n_items , prior.n_items , false ));
22- });
23- }
12+ StaticParameters::StaticParameters (const vec &alpha, const umat &rho,
13+ const vec &tau)
14+ : alpha{alpha}, rho{rho}, tau{tau} {}
15+
16+ StaticParameters::StaticParameters (const Prior &prior)
17+ : alpha{Rcpp::rgamma (prior.n_clusters , prior.alpha_shape ,
18+ 1 / prior.alpha_rate )},
19+ rho{umat (prior.n_items , prior.n_clusters )},
20+ tau{normalise (Rcpp::as<vec>(Rcpp::rgamma (prior.n_clusters ,
21+ prior.cluster_concentration , 1 )),
22+ 1 )} {
23+ rho.each_col ([&prior](uvec &a) {
24+ a = Rcpp::as<uvec>(Rcpp::sample (prior.n_items , prior.n_items , false ));
25+ });
26+ }
2427
25- Particle::Particle (const Options& options, const StaticParameters& parameters,
26- const std::unique_ptr<PartitionFunction>& pfun) :
27- parameters { parameters },
28- particle_filters (create_particle_filters(options)),
29- log_normalized_particle_filter_weights (
30- Rcpp::NumericVector (options.n_particle_filters, -log(options.n_particle_filters))
31- ){
32- logz = zeros (parameters.tau .size ());
33- for (size_t i{}; i < logz.size (); i++) {
34- logz (i) = pfun->logz (parameters.alpha (i));
35- }
28+ Particle::Particle (const Options &options, const StaticParameters ¶meters,
29+ const std::unique_ptr<PartitionFunction> &pfun)
30+ : parameters{parameters},
31+ particle_filters (create_particle_filters(options)),
32+ log_normalized_particle_filter_weights(Rcpp::NumericVector(
33+ options.n_particle_filters, -log(options.n_particle_filters))) {
34+ logz = zeros (parameters.tau .size ());
35+ for (size_t i{}; i < logz.size (); i++) {
36+ logz (i) = pfun->logz (parameters.alpha (i));
3637 }
38+ }
3739
3840void Particle::run_particle_filter (
39- unsigned int t, const Prior& prior,
40- const std::unique_ptr<Data>& data,
41- const std::unique_ptr<PartitionFunction>& pfun,
42- const std::unique_ptr<Distance>& distfun,
43- const std::unique_ptr<Resampler>& resampler,
44- std::string latent_rank_proposal,
45- bool conditional) {
46-
47- if (t > 0 ) {
41+ unsigned int t, const Prior &prior, const std::unique_ptr<Data> &data,
42+ const std::unique_ptr<PartitionFunction> &pfun,
43+ const std::unique_ptr<Distance> &distfun,
44+ const std::unique_ptr<Resampler> &resampler,
45+ std::string latent_rank_proposal, bool conditional) {
46+
47+ if (t > 0 ) {
4848 ivec new_counts = resampler->resample (
49- conditional ? particle_filters.size () - 1 : particle_filters.size (),
50- exp (log_normalized_particle_filter_weights));
51- if (conditional) new_counts (0 ) += 1 ;
49+ conditional ? particle_filters.size () - 1 : particle_filters.size (),
50+ exp (log_normalized_particle_filter_weights));
51+ if (conditional)
52+ new_counts (0 ) += 1 ;
5253 particle_filters = update_vector (new_counts, particle_filters);
5354 }
5455
5556 unsigned int pf_index{};
56- for (auto & pf : particle_filters) {
57- auto proposal = sample_latent_rankings (
58- data, t, prior, latent_rank_proposal, parameters, pfun, distfun);
57+ for (auto & pf : particle_filters) {
58+ auto proposal = sample_latent_rankings (data, t, prior, latent_rank_proposal,
59+ parameters, pfun, distfun);
5960
60- if (conditional && pf_index == 0 ) {
61+ if (conditional && pf_index == 0 ) {
6162 proposal.proposal = particle_filters[0 ].latent_rankings .col (t);
6263 }
6364
6465 double log_prob{};
6566
66- for (size_t i{}; i < proposal.proposal .n_cols ; i++) {
67+ for (size_t i{}; i < proposal.proposal .n_cols ; i++) {
6768 vec log_cluster_contribution (prior.n_clusters );
68- for (size_t c{}; c < prior.n_clusters ; c++) {
69- log_cluster_contribution (c) = log (parameters.tau (c)) - this ->logz (c) -
70- parameters.alpha (c) * distfun->d (proposal.proposal .col (i), parameters.rho .col (c));
69+ for (size_t c{}; c < prior.n_clusters ; c++) {
70+ log_cluster_contribution (c) =
71+ log (parameters.tau (c)) - this ->logz (c) -
72+ parameters.alpha (c) *
73+ distfun->d (proposal.proposal .col (i), parameters.rho .col (c));
7174 }
7275 log_prob += log_sum_exp (log_cluster_contribution);
7376 }
7477
75- pf.cluster_probabilities = join_horiz (
76- pf.cluster_probabilities , proposal.cluster_probabilities
77- );
78+ pf.cluster_probabilities =
79+ join_horiz (pf.cluster_probabilities , proposal.cluster_probabilities );
7880
79- if (!(conditional && pf_index == 0 )) {
80- if (prior.n_clusters > 1 ) {
81+ if (!(conditional && pf_index == 0 )) {
82+ if (prior.n_clusters > 1 ) {
8183 pf.index = join_cols (pf.index , uvec{pf_index});
8284 pf.cluster_assignments =
83- join_cols (pf.cluster_assignments , proposal.cluster_assignment );
85+ join_cols (pf.cluster_assignments , proposal.cluster_assignment );
8486 }
8587 pf.latent_rankings = join_horiz (pf.latent_rankings , proposal.proposal );
8688 }
@@ -91,147 +93,163 @@ void Particle::run_particle_filter(
9193 }
9294
9395 vec log_pf_weights (log_normalized_particle_filter_weights.size ());
94- std::transform (
95- particle_filters. cbegin (), particle_filters. cend (), log_pf_weights.begin (),
96- [t](const ParticleFilter& pf){ return pf.log_weight (t); });
96+ std::transform (particle_filters. cbegin (), particle_filters. cend (),
97+ log_pf_weights.begin (),
98+ [t](const ParticleFilter & pf) { return pf.log_weight (t); });
9799
98100 log_incremental_likelihood.resize (log_incremental_likelihood.size () + 1 );
99- log_incremental_likelihood (log_incremental_likelihood.size () - 1 ) = log_mean_exp (log_pf_weights);
101+ log_incremental_likelihood (log_incremental_likelihood.size () - 1 ) =
102+ log_mean_exp (log_pf_weights);
100103 log_normalized_particle_filter_weights = softmax (log_pf_weights);
101104
102- if (stored_weights.size () <= t) {
103- stored_weights.push_back (exp (log_normalized_particle_filter_weights));
105+ if (stored_weights.size () <= t) {
106+ stored_weights.push_back (exp (log_normalized_particle_filter_weights));
104107 } else {
105- stored_weights[t] = exp (log_normalized_particle_filter_weights);
108+ stored_weights[t] = exp (log_normalized_particle_filter_weights);
106109 }
107110}
108111
109- void Particle::assemble_backward_trajectory (unsigned int T, const std::unique_ptr<Resampler>& resampler) {
110- // We need to assemble a new reference trajectory traversing backwards from T to 0.
111- // The independence property means the transition density factors out of backward weights.
112- // Thus B_t is simply drawn from W_t independently.
113-
112+ void Particle::assemble_backward_trajectory (
113+ unsigned int T, const std::unique_ptr<Resampler> &resampler) {
114+ // We need to assemble a new reference trajectory traversing backwards from T
115+ // to 0. The independence property means the transition density factors out of
116+ // backward weights. Thus B_t is simply drawn from W_t independently.
117+
114118 ParticleFilter new_reference;
115119 new_reference.log_weight .resize (T + 1 );
116-
120+
117121 // Note: cluster_probabilities has size [cluster x (number of users up to T)]
118122 // We need to build these up. Actually, they are built horizontally (joined).
119123 // So we insert columns at the beginning.
120-
124+
121125 for (int t = T; t >= 0 ; --t) {
122126 arma::vec current_weights = stored_weights[t];
123-
127+
124128 // Sample a single index b_t based on current_weights
125129 arma::ivec counts = resampler->resample (1 , current_weights);
126- unsigned int b_t = arma::as_scalar (arma::find (counts > 0 , 1 )); // The chosen index
127-
128- unsigned int num_users_at_t = particle_filters[b_t ].latent_rankings .col (t).n_cols ; // actually wait, .col(t) returns EXACTLY 1 column.
129-
130- if (new_reference.latent_rankings .is_empty ()) {
131- new_reference.latent_rankings = particle_filters[b_t ].latent_rankings .col (t);
132- if (parameters.tau .size () > 1 ) {
133- // The total number of users up to time t in the forward pass is the length of cluster_assignments
134- unsigned int end_idx = particle_filters[b_t ].cluster_assignments .n_elem - 1 ;
135- // Since .col(t) grabbed 1 column, but what if multiple users were processed?
136- // Ah! In `run_particle_filter`, `proposal.proposal` is joined!
137- // Wait, `pf.latent_rankings = join_horiz(pf.latent_rankings, proposal.proposal);`
138- // If `proposal.proposal` had 5 columns at time `t`, then `pf.latent_rankings` grew by 5 columns!
139- // So `latent_rankings` columns correspond to USERS, not timepoints!
140- // So `col(t)` is completely wrong! We need to extract the columns corresponding to time `t`.
141- // Let's look at `sample_latent_rankings`. For complete data, 1 user = 1 row = 1 timepoint!
142- // Wait... for mixture models, see test: `compute_sequentially(mixtures[1:50,])`.
143- // `mixtures` has 1 row per user. So `n_timepoints` = 50.
144- // At each timepoint, 1 user is processed.
145- // So `proposal.proposal.n_cols` = 1.
146- // Thus `latent_rankings` has exactly 1 column per timepoint. `num_users_at_t` is always 1!
147- // SO WHY DID IT SEGFAULT?
148- // Because `col(t)` returns exactly 1 column, `num_users_at_t` is 1.
149- // Let's check `start_idx`.
150- new_reference.cluster_assignments = particle_filters[b_t ].cluster_assignments .subvec (t, t);
151- new_reference.cluster_probabilities = particle_filters[b_t ].cluster_probabilities .cols (t, t);
152- new_reference.index = uvec (T + 1 , fill::zeros);
153- }
130+ unsigned int b_t =
131+ arma::as_scalar (arma::find (counts > 0 , 1 )); // The chosen index
132+
133+ unsigned int num_users_at_t =
134+ particle_filters[b_t ]
135+ .latent_rankings .col (t)
136+ .n_cols ; // actually wait, .col(t) returns EXACTLY 1 column.
137+
138+ if (new_reference.latent_rankings .is_empty ()) {
139+ new_reference.latent_rankings =
140+ particle_filters[b_t ].latent_rankings .col (t);
141+ if (parameters.tau .size () > 1 ) {
142+ // The total number of users up to time t in the forward pass is the
143+ // length of cluster_assignments
144+ unsigned int end_idx =
145+ particle_filters[b_t ].cluster_assignments .n_elem - 1 ;
146+ // Since .col(t) grabbed 1 column, but what if multiple users were
147+ // processed? Ah! In `run_particle_filter`, `proposal.proposal` is
148+ // joined! Wait, `pf.latent_rankings = join_horiz(pf.latent_rankings,
149+ // proposal.proposal);` If `proposal.proposal` had 5 columns at time
150+ // `t`, then `pf.latent_rankings` grew by 5 columns! So
151+ // `latent_rankings` columns correspond to USERS, not timepoints! So
152+ // `col(t)` is completely wrong! We need to extract the columns
153+ // corresponding to time `t`. Let's look at `sample_latent_rankings`.
154+ // For complete data, 1 user = 1 row = 1 timepoint! Wait... for mixture
155+ // models, see test: `compute_sequentially(mixtures[1:50,])`. `mixtures`
156+ // has 1 row per user. So `n_timepoints` = 50. At each timepoint, 1 user
157+ // is processed. So `proposal.proposal.n_cols` = 1. Thus
158+ // `latent_rankings` has exactly 1 column per timepoint.
159+ // `num_users_at_t` is always 1! SO WHY DID IT SEGFAULT? Because
160+ // `col(t)` returns exactly 1 column, `num_users_at_t` is 1. Let's check
161+ // `start_idx`.
162+ new_reference.cluster_assignments =
163+ particle_filters[b_t ].cluster_assignments .subvec (t, t);
164+ new_reference.cluster_probabilities =
165+ particle_filters[b_t ].cluster_probabilities .cols (t, t);
166+ new_reference.index = uvec (T + 1 , fill::zeros);
167+ }
154168 } else {
155- new_reference.latent_rankings .insert_cols (0 , particle_filters[b_t ].latent_rankings .col (t));
156- if (parameters.tau .size () > 1 ) {
157- new_reference.cluster_assignments .insert_rows (0 , particle_filters[b_t ].cluster_assignments .subvec (t, t));
158- new_reference.cluster_probabilities .insert_cols (0 , particle_filters[b_t ].cluster_probabilities .cols (t, t));
159- }
169+ new_reference.latent_rankings .insert_cols (
170+ 0 , particle_filters[b_t ].latent_rankings .col (t));
171+ if (parameters.tau .size () > 1 ) {
172+ new_reference.cluster_assignments .insert_rows (
173+ 0 , particle_filters[b_t ].cluster_assignments .subvec (t, t));
174+ new_reference.cluster_probabilities .insert_cols (
175+ 0 , particle_filters[b_t ].cluster_probabilities .cols (t, t));
176+ }
160177 }
161-
178+
162179 new_reference.log_weight (t) = particle_filters[b_t ].log_weight (t);
163180 }
164181
165182 this ->particle_filters [0 ] = new_reference;
166183 this ->conditioned_particle_filter = 0 ;
167184}
168185
169-
170186void Particle::sample_particle_filter () {
171187 Rcpp::NumericVector probs = Rcpp::exp (log_normalized_particle_filter_weights);
172- conditioned_particle_filter = Rcpp::sample (probs.size (), 1 , false , probs, false )[0 ];
188+ conditioned_particle_filter =
189+ Rcpp::sample (probs.size (), 1 , false , probs, false )[0 ];
173190}
174191
175- std::vector<Particle> create_particle_vector (const Options& options, const Prior& prior,
176- const std::unique_ptr<PartitionFunction>& pfun) {
192+ std::vector<Particle>
193+ create_particle_vector (const Options &options, const Prior &prior,
194+ const std::unique_ptr<PartitionFunction> &pfun) {
177195 std::vector<Particle> result;
178196 result.reserve (options.n_particles );
179197
180- for (size_t i{}; i < options.n_particles ; i++) {
198+ for (size_t i{}; i < options.n_particles ; i++) {
181199 result.push_back (Particle{options, StaticParameters (prior), pfun});
182200 }
183201
184202 return result;
185203}
186204
187- std::vector<ParticleFilter> create_particle_filters (const Options& options) {
205+ std::vector<ParticleFilter> create_particle_filters (const Options & options) {
188206 std::vector<ParticleFilter> result;
189207 result.reserve (options.n_particle_filters );
190- for (size_t i{}; i < options.n_particle_filters ; i++) {
208+ for (size_t i{}; i < options.n_particle_filters ; i++) {
191209 result.push_back (ParticleFilter{});
192210 }
193211
194212 return result;
195213}
196214
197- vec normalize_log_importance_weights (const std::vector<Particle>& particle_vector) {
215+ vec normalize_log_importance_weights (
216+ const std::vector<Particle> &particle_vector) {
198217 vec log_importance_weights (particle_vector.size ());
199- std::transform (
200- particle_vector.cbegin (), particle_vector.cend (),
201- log_importance_weights.begin (),
202- [](const Particle& p) { return p.log_importance_weight ; });
218+ std::transform (particle_vector.cbegin (), particle_vector.cend (),
219+ log_importance_weights.begin (),
220+ [](const Particle &p) { return p.log_importance_weight ; });
203221
204222 return softmax (log_importance_weights);
205223}
206224
207- double log_marginal_likelihood_increment (
208- const std::vector<Particle>& particle_vector,
209- const vec& normalized_log_importance_weights,
210- int t
211- ) {
225+ double
226+ log_marginal_likelihood_increment (const std::vector<Particle> &particle_vector,
227+ const vec &normalized_log_importance_weights,
228+ int t) {
212229 vec unconditional_log_incremental (particle_vector.size ());
213- for (size_t i{}; i < particle_vector.size (); i++) {
230+ for (size_t i{}; i < particle_vector.size (); i++) {
214231 unconditional_log_incremental (i) =
215- normalized_log_importance_weights (i) + particle_vector[i].log_incremental_likelihood (t);
232+ normalized_log_importance_weights (i) +
233+ particle_vector[i].log_incremental_likelihood (t);
216234 }
217235 return log_sum_exp (unconditional_log_incremental);
218236}
219237
220- vec compute_alpha_stddev (const std::vector<Particle>& particle_vector) {
221- mat alpha_values (particle_vector.size (), particle_vector[0 ].parameters .alpha .size ());
222- for (size_t i{}; i < particle_vector.size (); i++) {
238+ vec compute_alpha_stddev (const std::vector<Particle> &particle_vector) {
239+ mat alpha_values (particle_vector.size (),
240+ particle_vector[0 ].parameters .alpha .size ());
241+ for (size_t i{}; i < particle_vector.size (); i++) {
223242 alpha_values.row (i) = particle_vector[i].parameters .alpha .t ();
224243 }
225244 return stddev (alpha_values, 0 , 0 ).t ();
226245}
227246
228- double compute_log_Z (const std::vector<ParticleFilter>& pf, int max_time) {
247+ double compute_log_Z (const std::vector<ParticleFilter> & pf, int max_time) {
229248 double log_Z{};
230- for (size_t s{}; s < max_time + 1 ; s++) {
249+ for (size_t s{}; s < max_time + 1 ; s++) {
231250 vec log_weights (pf.size ());
232- std::transform (
233- pf.begin (), pf.end (), log_weights.begin (),
234- [s](const ParticleFilter& pf) { return pf.log_weight (s); });
251+ std::transform (pf.begin (), pf.end (), log_weights.begin (),
252+ [s](const ParticleFilter &pf) { return pf.log_weight (s); });
235253 log_Z += log_mean_exp (log_weights);
236254 }
237255 return log_Z;
0 commit comments