Skip to content

Commit 7c16e03

Browse files
authored
Improve rollup ranking queries (#23868)
* Improve "Others" aggregation for rollup ranking queries * Always sort rollup results to ensure ordering * Remove no longer needed double rollup result iteration * Add test to ensure rollup ranking query result structure
1 parent e214ccd commit 7c16e03

7 files changed

Lines changed: 478 additions & 134 deletions

core/RankingQuery.php

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -293,33 +293,49 @@ public function generateRankingQuery($innerQuery, bool $withRollup = false)
293293

294294
// generate select clauses for label columns
295295
$labelColumnsString = '`' . implode('`, `', array_keys($this->labelColumns)) . '`';
296-
$labelColumnsOthersSwitch = array();
297-
$withRollupColumns = array();
296+
297+
$labelColumnsOthersSwitch = [];
298+
$withRollupColumns = [];
299+
$withRollupOthersGroupBy = [];
298300

299301
foreach (array_keys($this->labelColumns) as $column) {
300-
$rollupWhen = '';
301302
if ($withRollup) {
302-
$rollupLimitValue = empty($withRollupColumns) ?
303-
"'" . $this->othersLabelValue . "'"
304-
:
305-
'NULL';
306-
307-
$rollupWhen = "
308-
WHEN counterRollup = $limit THEN $rollupLimitValue
309-
WHEN counterRollup > 0 THEN `$column`
303+
if ([] === $withRollupColumns) {
304+
// support "Others" row for first label column
305+
$rollupWhen = "
306+
WHEN counterRollup = $limit THEN '" . $this->othersLabelValue . "'
307+
WHEN counterRollup > 0 THEN `$column`
308+
WHEN counter = $limit AND counterRollup = 0 THEN `$column`
309+
WHEN counter = $limit THEN '" . $this->othersLabelValue . "'
310+
";
311+
} else {
312+
// support "Others" row for secondary label columns
313+
$rollupWhen = "
314+
WHEN `$column` IS NULL THEN NULL
315+
WHEN counter = $limit AND counterRollup = 0 THEN '" . $this->othersLabelValue . "'
316+
";
317+
}
318+
319+
$switch = "
320+
CASE
321+
$rollupWhen
322+
ELSE `$column`
323+
END
310324
";
311325

312-
$withRollupColumns[] = $column;
326+
$labelColumnsOthersSwitch[] = "$switch AS `$column`";
327+
$withRollupColumns[] = $column;
328+
$withRollupOthersGroupBy[] = $switch;
329+
} else {
330+
$labelColumnsOthersSwitch[] = "
331+
CASE
332+
WHEN counter = $limit THEN '" . $this->othersLabelValue . "'
333+
ELSE `$column`
334+
END AS `$column`
335+
";
313336
}
314-
315-
$labelColumnsOthersSwitch[] = "
316-
CASE
317-
$rollupWhen
318-
WHEN counter = $limit THEN '" . $this->othersLabelValue . "'
319-
ELSE `$column`
320-
END AS `$column`
321-
";
322337
}
338+
323339
$labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch);
324340

325341
// generate select clauses for additional columns
@@ -346,29 +362,29 @@ public function generateRankingQuery($innerQuery, bool $withRollup = false)
346362

347363
$counterRollupExpression = '';
348364

349-
if ($withRollup && !empty($withRollupColumns)) {
365+
if ($withRollup) {
350366
$initCounter .= ' ( SELECT @counterRollup:=0 ) initCounterRollup,';
351367
$counterRollupWhen = '';
352368

353369
if (count($withRollupColumns) >= 2) {
354370
$counterRollupWhen = "
355371
WHEN `" . implode('` IS NULL AND `', $withRollupColumns) . "` IS NULL THEN -1
356-
";
372+
";
357373
}
358374

359375
foreach ($withRollupColumns as $withRollupColumn) {
360376
$counterRollupWhen .= "
361377
WHEN `$withRollupColumn` IS NULL AND @counterRollup = $limit THEN $limit
362378
WHEN `$withRollupColumn` IS NULL THEN @counterRollup := @counterRollup + 1
363-
";
379+
";
364380
}
365381

366382
$counterRollupExpression = "
367383
, CASE
368384
$counterRollupWhen
369385
ELSE 0
370386
END AS counterRollup
371-
";
387+
";
372388
}
373389

374390
if (false === strpos($innerQuery, ' LIMIT ') && !Schema::getInstance()->supportsSortingInSubquery()) {
@@ -389,7 +405,7 @@ public function generateRankingQuery($innerQuery, bool $withRollup = false)
389405
( $innerQuery ) actualQuery
390406
";
391407

392-
if ($withRollup && !empty($withRollupColumns) && !Schema::getInstance()->supportsRankingRollupWithoutExtraSorting()) {
408+
if ($withRollup && !Schema::getInstance()->supportsRankingRollupWithoutExtraSorting()) {
393409
// MariaDB requires an additional sorting layer to return
394410
// the counter/counterRollup values we expect
395411
$rollupColumnSorts = [];
@@ -410,13 +426,16 @@ public function generateRankingQuery($innerQuery, bool $withRollup = false)
410426
// group by the counter - this groups "Others" because the counter stops at $limit
411427
$groupBy = 'counter';
412428

413-
if ($withRollup && !empty($counterRollupExpression)) {
414-
$groupBy .= ', counterRollup';
429+
if ($withRollup) {
430+
// group rollups additionally by the rollup counter and the
431+
// full "Others" switch to ensure correct secondary level "Others" calculation
432+
$groupBy .= ', counterRollup, ' . implode(', ', $withRollupOthersGroupBy);
415433
}
416434

417435
if ($this->partitionColumn !== false) {
418436
$groupBy .= ', `' . $this->partitionColumn . '`';
419437
}
438+
420439
$groupOthers = "
421440
SELECT
422441
$labelColumnsOthersSwitch
@@ -425,13 +444,13 @@ public function generateRankingQuery($innerQuery, bool $withRollup = false)
425444
GROUP BY $groupBy
426445
";
427446

428-
if (!Schema::getInstance()->supportsSortingInSubquery()) {
447+
if ($withRollup) {
448+
// Sort the final result if a rollup was used
449+
// to ensure rollup values are returned first, and "Others" last
450+
$groupOthers .= " ORDER BY counter, counterRollup";
451+
} elseif (!Schema::getInstance()->supportsSortingInSubquery()) {
429452
// When subqueries aren't sorted, we need to sort the result manually again
430453
$groupOthers .= " ORDER BY counter";
431-
432-
if (!empty($counterRollupExpression)) {
433-
$groupOthers .= ', counterRollup';
434-
}
435454
}
436455

437456
return $groupOthers;

plugins/CustomDimensions/RecordBuilders/CustomDimension.php

Lines changed: 32 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -186,89 +186,65 @@ protected function aggregateFromActions(DataTable $report, LogAggregator $logAgg
186186
$metricIds[] = Metrics::INDEX_BOUNCE_COUNT;
187187
$metricIds[] = Metrics::INDEX_PAGE_EXIT_NB_VISITS;
188188

189-
$actionRows = [];
190-
191189
while ($row = $resultSet->fetch()) {
192190
if (!isset($row[Metrics::INDEX_NB_VISITS])) {
193191
return;
194192
}
195193

196194
$label = $row[$valueField];
197195

198-
if ($withRollup) {
199-
$url = $row['url'];
200-
201-
if (is_null($label)) {
202-
continue;
203-
}
204-
205-
if (!is_null($url)) {
206-
$actionRows[] = $row;
207-
continue;
208-
}
196+
if ($withRollup && $label === null) {
197+
// top-level rollup result
198+
continue;
209199
}
210200

211201
$columns = [];
202+
212203
foreach ($metricIds as $id) {
213204
$columns[$id] = (float) ($row[$id] ?? 0);
214205
}
206+
215207
$label = $this->cleanCustomDimensionValue($label);
216-
$tableRow = $report->sumRowWithLabel($label, $columns);
208+
$url = $row['url'];
217209

218210
if (!$withRollup) {
219-
$url = $row['url'];
220-
if (empty($url)) {
221-
continue;
222-
}
223-
224-
// make sure we always work with normalized URL no matter how the individual action stores it
225-
$normalized = Tracker\PageUrl::normalizeUrl($url);
226-
$url = $normalized['url'];
227-
228-
if (empty($url)) {
229-
continue;
230-
}
231-
232-
$tableRow->sumRowWithLabelToSubtable($url, $columns);
233-
}
234-
}
235-
236-
if ($withRollup) {
237-
foreach ($actionRows as $row) {
238-
if (!isset($row[Metrics::INDEX_NB_VISITS])) {
239-
return;
240-
}
241-
242-
$label = $row[$valueField];
243-
$url = $row['url'];
244-
245-
if (is_null($label) || is_null($url)) {
211+
$tableRow = $report->sumRowWithLabel($label, $columns);
212+
} else {
213+
if ($url === null) {
214+
// second-level rollup result
215+
$report->sumRowWithLabel($label, $columns);
246216
continue;
247217
}
248218

249-
$label = $this->cleanCustomDimensionValue($label);
250219
$tableRow = $report->getRowFromLabel($label);
251220

252-
if (empty($tableRow)) {
253-
continue;
254-
}
255-
256-
// make sure we always work with normalized URL no matter how the individual action stores it
257-
$normalized = Tracker\PageUrl::normalizeUrl($url);
258-
$url = $normalized['url'];
259-
260-
if (empty($url)) {
221+
if (false === $tableRow) {
222+
// non-rollup row but rollup row is missing
223+
// should not happen, but don't break
261224
continue;
262225
}
226+
}
263227

264-
$columns = [];
228+
// make sure we always work with normalized URL no matter how the individual action stores it
229+
$normalized = Tracker\PageUrl::normalizeUrl($url);
230+
$url = $normalized['url'];
265231

266-
foreach ($metricIds as $id) {
267-
$columns[$id] = (float) ($row[$id] ?? 0);
268-
}
232+
if (empty($url)) {
233+
continue;
234+
}
269235

270-
$tableRow->sumRowWithLabelToSubtable($url, $columns);
236+
if (
237+
$withRollup
238+
&& $url === RankingQuery::LABEL_SUMMARY_ROW
239+
&& !$tableRow->isSubtableLoaded()
240+
) {
241+
// skip creating the subtable if:
242+
// - we are using rollups
243+
// - the only row would be "Others"
244+
continue;
271245
}
246+
247+
$tableRow->sumRowWithLabelToSubtable($url, $columns);
272248
}
273249
}
274250

plugins/CustomDimensions/tests/System/expected/test_ranking_limit_with_rollup_by_archiving_query__CustomDimensions.getCustomDimension_day.xml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,48 @@
168168
<segment>dimension1==site3+group5;actionUrl=$example.com%2Fpage%2F5%2F5</segment>
169169
<url>example.com%2Fpage%2F5%2F5</url>
170170
</row>
171+
<row>
172+
<label>Others</label>
173+
<nb_visits>2</nb_visits>
174+
<nb_uniq_visitors>2</nb_uniq_visitors>
175+
<nb_hits>53</nb_hits>
176+
<sum_time_network>0</sum_time_network>
177+
<nb_hits_with_time_network>0</nb_hits_with_time_network>
178+
<min_time_network>0</min_time_network>
179+
<max_time_network>0</max_time_network>
180+
<sum_time_server>0</sum_time_server>
181+
<nb_hits_with_time_server>0</nb_hits_with_time_server>
182+
<min_time_server>0</min_time_server>
183+
<max_time_server>0</max_time_server>
184+
<sum_time_transfer>0</sum_time_transfer>
185+
<nb_hits_with_time_transfer>0</nb_hits_with_time_transfer>
186+
<min_time_transfer>0</min_time_transfer>
187+
<max_time_transfer>0</max_time_transfer>
188+
<sum_time_dom_processing>0</sum_time_dom_processing>
189+
<nb_hits_with_time_dom_processing>0</nb_hits_with_time_dom_processing>
190+
<min_time_dom_processing>0</min_time_dom_processing>
191+
<max_time_dom_processing>0</max_time_dom_processing>
192+
<sum_time_dom_completion>0</sum_time_dom_completion>
193+
<nb_hits_with_time_dom_completion>0</nb_hits_with_time_dom_completion>
194+
<min_time_dom_completion>0</min_time_dom_completion>
195+
<max_time_dom_completion>0</max_time_dom_completion>
196+
<sum_time_on_load>0</sum_time_on_load>
197+
<nb_hits_with_time_on_load>0</nb_hits_with_time_on_load>
198+
<min_time_on_load>0</min_time_on_load>
199+
<max_time_on_load>0</max_time_on_load>
200+
<sum_bandwidth>0</sum_bandwidth>
201+
<nb_hits_with_bandwidth>0</nb_hits_with_bandwidth>
202+
<min_bandwidth>0</min_bandwidth>
203+
<max_bandwidth>0</max_bandwidth>
204+
<sum_time_spent>0</sum_time_spent>
205+
<bounce_count>0</bounce_count>
206+
<exit_nb_visits>2</exit_nb_visits>
207+
<avg_time_on_dimension>0</avg_time_on_dimension>
208+
<bounce_rate>0%</bounce_rate>
209+
<exit_rate>100%</exit_rate>
210+
<segment>dimension1==site3+group5;actionUrl=$__mtm_ranking_query_others__</segment>
211+
<url>__mtm_ranking_query_others__</url>
212+
</row>
171213
</subtable>
172214
</row>
173215
<row>

plugins/CustomDimensions/tests/System/expected/test_ranking_limit_with_rollup_by_archiving_query_expanded__CustomDimensions.getCustomDimension_day.xml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,48 @@
168168
<segment>dimension1==site3+group5;actionUrl=$example.com%2Fpage%2F5%2F5</segment>
169169
<url>example.com%2Fpage%2F5%2F5</url>
170170
</row>
171+
<row>
172+
<label>Others</label>
173+
<nb_visits>2</nb_visits>
174+
<nb_uniq_visitors>2</nb_uniq_visitors>
175+
<nb_hits>53</nb_hits>
176+
<sum_time_network>0</sum_time_network>
177+
<nb_hits_with_time_network>0</nb_hits_with_time_network>
178+
<min_time_network>0</min_time_network>
179+
<max_time_network>0</max_time_network>
180+
<sum_time_server>0</sum_time_server>
181+
<nb_hits_with_time_server>0</nb_hits_with_time_server>
182+
<min_time_server>0</min_time_server>
183+
<max_time_server>0</max_time_server>
184+
<sum_time_transfer>0</sum_time_transfer>
185+
<nb_hits_with_time_transfer>0</nb_hits_with_time_transfer>
186+
<min_time_transfer>0</min_time_transfer>
187+
<max_time_transfer>0</max_time_transfer>
188+
<sum_time_dom_processing>0</sum_time_dom_processing>
189+
<nb_hits_with_time_dom_processing>0</nb_hits_with_time_dom_processing>
190+
<min_time_dom_processing>0</min_time_dom_processing>
191+
<max_time_dom_processing>0</max_time_dom_processing>
192+
<sum_time_dom_completion>0</sum_time_dom_completion>
193+
<nb_hits_with_time_dom_completion>0</nb_hits_with_time_dom_completion>
194+
<min_time_dom_completion>0</min_time_dom_completion>
195+
<max_time_dom_completion>0</max_time_dom_completion>
196+
<sum_time_on_load>0</sum_time_on_load>
197+
<nb_hits_with_time_on_load>0</nb_hits_with_time_on_load>
198+
<min_time_on_load>0</min_time_on_load>
199+
<max_time_on_load>0</max_time_on_load>
200+
<sum_bandwidth>0</sum_bandwidth>
201+
<nb_hits_with_bandwidth>0</nb_hits_with_bandwidth>
202+
<min_bandwidth>0</min_bandwidth>
203+
<max_bandwidth>0</max_bandwidth>
204+
<sum_time_spent>0</sum_time_spent>
205+
<bounce_count>0</bounce_count>
206+
<exit_nb_visits>2</exit_nb_visits>
207+
<avg_time_on_dimension>0</avg_time_on_dimension>
208+
<bounce_rate>0%</bounce_rate>
209+
<exit_rate>100%</exit_rate>
210+
<segment>dimension1==site3+group5;actionUrl=$__mtm_ranking_query_others__</segment>
211+
<url>__mtm_ranking_query_others__</url>
212+
</row>
171213
</subtable>
172214
</row>
173215
<row>

0 commit comments

Comments
 (0)