Skip to content

Commit 0dee185

Browse files
committed
Improve performance of mixed table aggregation (old hierarchy tables + new flat tables)
1 parent 748f22b commit 0dee185

3 files changed

Lines changed: 210 additions & 27 deletions

File tree

core/ArchiveProcessor/Record.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ public function getMultiplePeriodTransform(): ?callable
266266
* aggregates into the flat table when some periods do not
267267
* have the flat record yet.
268268
* Signature: function (DataTable $legacyHierarchy, DataTable $flatTable, ArchiveProcessor $archiveProcessor, Record $hierarchicalRecord): void
269+
* The callback is invoked once per legacy source period hierarchy table.
269270
*/
270271
public function setBuiltFromFlatRecord(
271272
string $flatRecordName,

core/ArchiveProcessor/RecordBuilder.php

Lines changed: 86 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -355,18 +355,16 @@ protected function aggregateBuiltFromFlatRecordForNonDay(
355355
$hasLegacyFallbackData = false;
356356
$legacyReducerCallback = $hierarchicalRecord->getLegacyHierarchyToFlatReducerCallback();
357357
if (!empty($periodsWithoutFlatRecord) && is_callable($legacyReducerCallback)) {
358-
[$legacyHierarchicalTable, $hasLegacyFallbackData] = $this->aggregateDataTableFromBlobs(
358+
$hasLegacyFallbackData = $this->aggregateLegacyHierarchyPeriodsIntoFlatTable(
359359
$archiveProcessor,
360360
$hierarchicalRecord->getName(),
361+
$flatTable,
362+
$legacyReducerCallback,
363+
$hierarchicalRecord,
361364
$columnAggregationOps,
362365
$columnToRenameAfterAggregation,
363366
$periodsWithoutFlatRecord
364367
);
365-
366-
if ($hasLegacyFallbackData) {
367-
call_user_func($legacyReducerCallback, $legacyHierarchicalTable, $flatTable, $archiveProcessor, $hierarchicalRecord);
368-
}
369-
Common::destroy($legacyHierarchicalTable);
370368
}
371369

372370
if (!$hasFlatSourceData && !$hasLegacyFallbackData) {
@@ -405,6 +403,88 @@ function (Row $flatRow) use ($flatToHierarchyPathCallback, $archiveProcessor, $h
405403
return true;
406404
}
407405

406+
protected function aggregateLegacyHierarchyPeriodsIntoFlatTable(
407+
ArchiveProcessor $archiveProcessor,
408+
string $recordName,
409+
DataTable $flatTable,
410+
callable $legacyReducerCallback,
411+
Record $hierarchicalRecord,
412+
?array $columnsAggregationOperation,
413+
?array $columnsToRenameAfterAggregation,
414+
?array $periodsToInclude
415+
): bool {
416+
$currentPeriod = null;
417+
$currentPeriodRows = [];
418+
$hasRows = false;
419+
420+
foreach ($this->querySingleBlobRows($archiveProcessor, $recordName) as $archiveDataRow) {
421+
$period = $archiveDataRow['date1'] . ',' . $archiveDataRow['date2'];
422+
if ($periodsToInclude !== null && !isset($periodsToInclude[$period])) {
423+
continue;
424+
}
425+
426+
if ($currentPeriod !== null && $period !== $currentPeriod) {
427+
$hasRows = $this->reduceLegacyHierarchyPeriodRowsIntoFlatTable(
428+
$currentPeriodRows,
429+
$recordName,
430+
$flatTable,
431+
$legacyReducerCallback,
432+
$archiveProcessor,
433+
$hierarchicalRecord,
434+
$columnsAggregationOperation,
435+
$columnsToRenameAfterAggregation
436+
) || $hasRows;
437+
$currentPeriodRows = [];
438+
}
439+
440+
$currentPeriod = $period;
441+
$currentPeriodRows[] = $archiveDataRow;
442+
}
443+
444+
if (!empty($currentPeriodRows)) {
445+
$hasRows = $this->reduceLegacyHierarchyPeriodRowsIntoFlatTable(
446+
$currentPeriodRows,
447+
$recordName,
448+
$flatTable,
449+
$legacyReducerCallback,
450+
$archiveProcessor,
451+
$hierarchicalRecord,
452+
$columnsAggregationOperation,
453+
$columnsToRenameAfterAggregation
454+
) || $hasRows;
455+
}
456+
457+
return $hasRows;
458+
}
459+
460+
protected function reduceLegacyHierarchyPeriodRowsIntoFlatTable(
461+
array $periodRows,
462+
string $recordName,
463+
DataTable $flatTable,
464+
callable $legacyReducerCallback,
465+
ArchiveProcessor $archiveProcessor,
466+
Record $hierarchicalRecord,
467+
?array $columnsAggregationOperation,
468+
?array $columnsToRenameAfterAggregation
469+
): bool {
470+
[$legacyHierarchicalTable, $hasRows] = BlobTableAggregator::aggregateBlobRows(
471+
$periodRows,
472+
$recordName,
473+
$columnsAggregationOperation,
474+
function (DataTable $table) use ($archiveProcessor, $columnsToRenameAfterAggregation): void {
475+
$archiveProcessor->renameColumnsAfterAggregation($table, $columnsToRenameAfterAggregation);
476+
}
477+
);
478+
479+
if ($hasRows) {
480+
call_user_func($legacyReducerCallback, $legacyHierarchicalTable, $flatTable, $archiveProcessor, $hierarchicalRecord);
481+
}
482+
483+
Common::destroy($legacyHierarchicalTable);
484+
485+
return $hasRows;
486+
}
487+
408488
/**
409489
* Hook executed after the hierarchy table has been rebuilt from the flat table and before
410490
* the hierarchical blob record is serialized and inserted.

tests/PHPUnit/Unit/ArchiveProcessor/RecordBuilderTest.php

Lines changed: 123 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -660,27 +660,6 @@ protected function aggregate(ArchiveProcessor $archiveProcessor): array
660660
return [];
661661
}
662662

663-
protected function aggregateDataTableFromBlobs(
664-
ArchiveProcessor $archiveProcessor,
665-
string $recordName,
666-
?array $columnsAggregationOperation,
667-
?array $columnsToRenameAfterAggregation,
668-
?array $periodsToInclude = null
669-
): array {
670-
$table = new DataTable();
671-
if ($recordName === 'TestPlugin_flat') {
672-
$table->addRowFromSimpleArray(['label' => '/flat-path', 'nb_visits' => 5]);
673-
return [$table, true];
674-
}
675-
676-
if ($recordName === 'TestPlugin_hierarchy') {
677-
$table->addRowFromSimpleArray(['label' => '/legacy-path', 'nb_visits' => 7]);
678-
return [$table, true];
679-
}
680-
681-
return [$table, false];
682-
}
683-
684663
protected function aggregateRootDataTableFromBlobs(
685664
ArchiveProcessor $archiveProcessor,
686665
string $recordName,
@@ -696,6 +675,23 @@ protected function aggregateRootDataTableFromBlobs(
696675
return [$table, false, []];
697676
}
698677

678+
protected function querySingleBlobRows(ArchiveProcessor $archiveProcessor, string $recordName): iterable
679+
{
680+
if ($recordName !== 'TestPlugin_hierarchy') {
681+
return [];
682+
}
683+
684+
$legacyTable = new DataTable();
685+
$legacyTable->addRowFromSimpleArray(['label' => '/legacy-path', 'nb_visits' => 7]);
686+
687+
yield [
688+
'date1' => '2020-03-05',
689+
'date2' => '2020-03-05',
690+
'name' => 'TestPlugin_hierarchy',
691+
'value' => $legacyTable->getSerialized()[0],
692+
];
693+
}
694+
699695
protected function getAllSubperiodKeys(ArchiveProcessor $archiveProcessor): array
700696
{
701697
return [
@@ -799,6 +795,112 @@ protected function beforeInsertBuiltFromFlatHierarchyRecord(
799795
$this->assertSame(['/flat-path-a', '/flat-path-b', '-1'], $this->getTopLevelLabelsOfInsertedBlobRecord('TestPlugin_hierarchy'));
800796
}
801797

798+
public function testBuildForNonDayPeriodReducesLegacyFallbackPerPeriod(): void
799+
{
800+
$state = (object) ['reducerCalls' => []];
801+
802+
$recordBuilder = new class ($state) extends ArchiveProcessor\RecordBuilder {
803+
private $state;
804+
805+
public function __construct(object $state)
806+
{
807+
parent::__construct();
808+
$this->state = $state;
809+
}
810+
811+
public function getRecordMetadata(ArchiveProcessor $archiveProcessor): array
812+
{
813+
return [
814+
Record::make(Record::TYPE_BLOB, 'TestPlugin_hierarchy')
815+
->setBuiltFromFlatRecord(
816+
'TestPlugin_flat',
817+
function (Row $flatRow): ?array {
818+
$label = $flatRow->getColumn('label');
819+
return is_string($label) && $label !== '' ? [$label] : null;
820+
},
821+
function (DataTable $legacyHierarchy, DataTable $flatTable) {
822+
$this->state->reducerCalls[] = $legacyHierarchy->getColumn('label');
823+
824+
foreach ($legacyHierarchy->getRowsWithoutSummaryRow() as $row) {
825+
$flatTable->addRow(clone $row);
826+
}
827+
}
828+
),
829+
Record::make(Record::TYPE_BLOB, 'TestPlugin_flat'),
830+
];
831+
}
832+
833+
protected function aggregate(ArchiveProcessor $archiveProcessor): array
834+
{
835+
return [];
836+
}
837+
838+
protected function aggregateRootDataTableFromBlobs(
839+
ArchiveProcessor $archiveProcessor,
840+
string $recordName,
841+
?array $columnsAggregationOperation,
842+
?array $columnsToRenameAfterAggregation
843+
): array {
844+
$table = new DataTable();
845+
if ($recordName === 'TestPlugin_flat') {
846+
$table->addRowFromSimpleArray(['label' => '/flat-path', 'nb_visits' => 5]);
847+
return [$table, true, ['2020-03-04,2020-03-04' => true]];
848+
}
849+
850+
return [$table, false, []];
851+
}
852+
853+
protected function querySingleBlobRows(ArchiveProcessor $archiveProcessor, string $recordName): iterable
854+
{
855+
if ($recordName !== 'TestPlugin_hierarchy') {
856+
return [];
857+
}
858+
859+
$legacyTableA = new DataTable();
860+
$legacyTableA->addRowFromSimpleArray(['label' => '/legacy-path-a', 'nb_visits' => 2]);
861+
862+
$legacyTableB = new DataTable();
863+
$legacyTableB->addRowFromSimpleArray(['label' => '/legacy-path-b', 'nb_visits' => 3]);
864+
865+
yield [
866+
'date1' => '2020-03-05',
867+
'date2' => '2020-03-05',
868+
'name' => 'TestPlugin_hierarchy',
869+
'value' => $legacyTableA->getSerialized()[0],
870+
];
871+
872+
yield [
873+
'date1' => '2020-03-06',
874+
'date2' => '2020-03-06',
875+
'name' => 'TestPlugin_hierarchy',
876+
'value' => $legacyTableB->getSerialized()[0],
877+
];
878+
}
879+
880+
protected function getAllSubperiodKeys(ArchiveProcessor $archiveProcessor): array
881+
{
882+
return [
883+
'2020-03-04,2020-03-04' => true,
884+
'2020-03-05,2020-03-05' => true,
885+
'2020-03-06,2020-03-06' => true,
886+
];
887+
}
888+
};
889+
890+
$mockArchiveProcessor = $this->getMockArchiveProcessor('week', ['TestPlugin_hierarchy']);
891+
$recordBuilder->buildForNonDayPeriod($mockArchiveProcessor);
892+
893+
$this->assertSame([['/legacy-path-a'], ['/legacy-path-b']], $state->reducerCalls);
894+
895+
$flatLabels = $this->getTopLevelLabelsOfInsertedBlobRecord('TestPlugin_flat');
896+
sort($flatLabels);
897+
$this->assertSame(['/flat-path', '/legacy-path-a', '/legacy-path-b'], $flatLabels);
898+
899+
$hierarchyLabels = $this->getTopLevelLabelsOfInsertedBlobRecord('TestPlugin_hierarchy');
900+
sort($hierarchyLabels);
901+
$this->assertSame(['/flat-path', '/legacy-path-a', '/legacy-path-b'], $hierarchyLabels);
902+
}
903+
802904
public function testBuildForNonDayPeriodCorrectlyAggregatesMetricsForMetricsThatAreRowCountsOfRecords()
803905
{
804906
$recordBuilder = new class () extends ArchiveProcessor\RecordBuilder {

0 commit comments

Comments
 (0)