Skip to content

Commit 748f22b

Browse files
committed
Implemented a memory-reduction step for non-day flat-first hierarchy rebuild
1 parent e83e39f commit 748f22b

3 files changed

Lines changed: 146 additions & 2 deletions

File tree

core/ArchiveProcessor/RecordBuilder.php

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ protected function aggregateBuiltFromFlatRecordForNonDay(
382382
$archiveProcessor->insertBlobRecord($flatRecordName, $flatSerialized);
383383
$processedFlatRecords[$flatRecordName] = true;
384384

385-
$hierarchicalTable = $this->buildHierarchicalTableFromFlatTable(
385+
$hierarchicalTable = $this->buildHierarchicalTableFromFlatTableAndConsumeRows(
386386
$flatTable,
387387
$columnAggregationOps,
388388
function (Row $flatRow) use ($flatToHierarchyPathCallback, $archiveProcessor, $hierarchicalRecord) {
@@ -410,7 +410,8 @@ function (Row $flatRow) use ($flatToHierarchyPathCallback, $archiveProcessor, $h
410410
* the hierarchical blob record is serialized and inserted.
411411
*
412412
* Intended for plugin-specific finalization (for example, metadata or column cleanup) when
413-
* using setBuiltFromFlatRecord().
413+
* using setBuiltFromFlatRecord(). The flat table has already been serialized at this point
414+
* and may have been fully consumed while rebuilding the hierarchy.
414415
*/
415416
protected function beforeInsertBuiltFromFlatHierarchyRecord(
416417
ArchiveProcessor $archiveProcessor,
@@ -465,6 +466,47 @@ protected function buildHierarchicalTableFromFlatTable(
465466
return $hierarchicalTable;
466467
}
467468

469+
protected function buildHierarchicalTableFromFlatTableAndConsumeRows(
470+
DataTable $flatTable,
471+
?array $columnAggregationOps,
472+
callable $flatToHierarchyPathCallback,
473+
array $defaultHierarchyRowColumns = []
474+
): DataTable {
475+
$hierarchicalTable = new DataTable();
476+
if (!empty($columnAggregationOps)) {
477+
$hierarchicalTable->setMetadata(DataTable::COLUMN_AGGREGATION_OPS_METADATA_NAME, $columnAggregationOps);
478+
}
479+
480+
while (($flatRow = $flatTable->shiftRow()) instanceof Row) {
481+
$path = call_user_func($flatToHierarchyPathCallback, $flatRow);
482+
if (is_array($path) && !empty($path)) {
483+
[$destinationRow, $level] = $hierarchicalTable->walkPath($path, $defaultHierarchyRowColumns, 0);
484+
if ($destinationRow instanceof Row) {
485+
$this->sumRowIntoDestination($flatRow, $destinationRow, $columnAggregationOps);
486+
}
487+
}
488+
489+
Common::destroy($flatRow);
490+
}
491+
492+
$summaryRow = $flatTable->getSummaryRow();
493+
if ($summaryRow instanceof Row && !$this->isSummaryRowEmpty($summaryRow)) {
494+
$destinationSummaryRow = $hierarchicalTable->getRowFromId(DataTable::ID_SUMMARY_ROW);
495+
if ($destinationSummaryRow === false) {
496+
$destinationSummaryRow = clone $summaryRow;
497+
$destinationSummaryRow->setIsSummaryRow();
498+
$hierarchicalTable->addSummaryRow($destinationSummaryRow);
499+
} else {
500+
$this->sumRowIntoDestination($summaryRow, $destinationSummaryRow, $columnAggregationOps);
501+
}
502+
}
503+
504+
$flatTable->deleteRow(DataTable::ID_SUMMARY_ROW);
505+
Common::destroy($summaryRow);
506+
507+
return $hierarchicalTable;
508+
}
509+
468510
protected function sumRowIntoDestination(Row $source, Row $destination, ?array $columnAggregationOps): void
469511
{
470512
$sourceCopy = clone $source;

core/DataTable.php

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -965,6 +965,30 @@ public function getRowsWithoutSummaryRow()
965965
return $this->rows;
966966
}
967967

968+
/**
969+
* Removes and returns the first non-summary row in this table.
970+
*
971+
* @return Row|null
972+
*/
973+
public function shiftRow()
974+
{
975+
if (empty($this->rows)) {
976+
return null;
977+
}
978+
979+
reset($this->rows);
980+
$rowId = key($this->rows);
981+
982+
if (!isset($this->rows[$rowId])) {
983+
return null;
984+
}
985+
986+
$row = $this->rows[$rowId];
987+
unset($this->rows[$rowId]);
988+
989+
return $row;
990+
}
991+
968992
/**
969993
* @return int
970994
* @ignore

tests/PHPUnit/Unit/ArchiveProcessor/RecordBuilderTest.php

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,84 @@ protected function getAllSubperiodKeys(ArchiveProcessor $archiveProcessor): arra
721721
$this->assertSame(['/flat-path', '/legacy-path'], $hierarchyLabels);
722722
}
723723

724+
public function testBuildForNonDayPeriodConsumesFlatTableBeforePreInsertHook(): void
725+
{
726+
$hookState = (object) ['flatRowsAtHook' => null, 'hierarchyLabelsAtHook' => []];
727+
728+
$recordBuilder = new class ($hookState) extends ArchiveProcessor\RecordBuilder {
729+
private $hookState;
730+
731+
public function __construct(object $hookState)
732+
{
733+
parent::__construct();
734+
$this->hookState = $hookState;
735+
}
736+
737+
public function getRecordMetadata(ArchiveProcessor $archiveProcessor): array
738+
{
739+
return [
740+
Record::make(Record::TYPE_BLOB, 'TestPlugin_hierarchy')
741+
->setBuiltFromFlatRecord('TestPlugin_flat', function (Row $flatRow): ?array {
742+
$label = $flatRow->getColumn('label');
743+
if (!is_string($label) || $label === '') {
744+
return null;
745+
}
746+
747+
return [$label];
748+
}),
749+
Record::make(Record::TYPE_BLOB, 'TestPlugin_flat'),
750+
];
751+
}
752+
753+
protected function aggregate(ArchiveProcessor $archiveProcessor): array
754+
{
755+
return [];
756+
}
757+
758+
protected function aggregateRootDataTableFromBlobs(
759+
ArchiveProcessor $archiveProcessor,
760+
string $recordName,
761+
?array $columnsAggregationOperation,
762+
?array $columnsToRenameAfterAggregation
763+
): array {
764+
$table = new DataTable();
765+
if ($recordName === 'TestPlugin_flat') {
766+
$table->addRowFromSimpleArray(['label' => '/flat-path-a', 'nb_visits' => 5]);
767+
$table->addRowFromSimpleArray(['label' => '/flat-path-b', 'nb_visits' => 3]);
768+
$table->addSummaryRow(new Row([Row::COLUMNS => ['label' => '-1', 'nb_visits' => 2]]));
769+
770+
return [$table, true, ['2020-03-04,2020-03-04' => true]];
771+
}
772+
773+
return [$table, false, []];
774+
}
775+
776+
protected function getAllSubperiodKeys(ArchiveProcessor $archiveProcessor): array
777+
{
778+
return ['2020-03-04,2020-03-04' => true];
779+
}
780+
781+
protected function beforeInsertBuiltFromFlatHierarchyRecord(
782+
ArchiveProcessor $archiveProcessor,
783+
Record $hierarchicalRecord,
784+
DataTable $hierarchicalTable,
785+
DataTable $flatTable
786+
): void {
787+
$this->hookState->flatRowsAtHook = $flatTable->getRowsCount();
788+
$this->hookState->hierarchyLabelsAtHook = $hierarchicalTable->getColumn('label');
789+
}
790+
};
791+
792+
$mockArchiveProcessor = $this->getMockArchiveProcessor('week', ['TestPlugin_flat']);
793+
$recordBuilder->buildForNonDayPeriod($mockArchiveProcessor);
794+
795+
$this->assertSame(0, $hookState->flatRowsAtHook);
796+
$this->assertSame(['/flat-path-a', '/flat-path-b', '-1'], $hookState->hierarchyLabelsAtHook);
797+
798+
$this->assertSame(['/flat-path-a', '/flat-path-b', '-1'], $this->getTopLevelLabelsOfInsertedBlobRecord('TestPlugin_flat'));
799+
$this->assertSame(['/flat-path-a', '/flat-path-b', '-1'], $this->getTopLevelLabelsOfInsertedBlobRecord('TestPlugin_hierarchy'));
800+
}
801+
724802
public function testBuildForNonDayPeriodCorrectlyAggregatesMetricsForMetricsThatAreRowCountsOfRecords()
725803
{
726804
$recordBuilder = new class () extends ArchiveProcessor\RecordBuilder {

0 commit comments

Comments
 (0)