Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions config/global.ini.php
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,9 @@

; maximum number of rows for any of the Actions tables (pages, downloads, outlinks)
datatable_archiving_maximum_rows_actions = 500
; maximum number of rows used when archiving flat page/title actions before rebuilding hierarchy
; if set to 0, legacy hierarchical-only Actions archiving is used
datatable_archiving_maximum_rows_actions_flat = 0
; maximum number of rows for pages in categories (sub pages, when clicking on the + for a page category)
; note: should not exceed the display limit in Piwik\Actions\Controller::ACTIONS_REPORT_ROWS_DISPLAY
; because each subdirectory doesn't have paging at the bottom, so all data should be displayed if possible.
Expand Down
78 changes: 12 additions & 66 deletions core/ArchiveProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

use Exception;
use Piwik\Archive\DataTableFactory;
use Piwik\ArchiveProcessor\BlobTableAggregator;
use Piwik\ArchiveProcessor\Parameters;
use Piwik\ArchiveProcessor\Rules;
use Piwik\Container\StaticContainer;
Expand Down Expand Up @@ -390,25 +391,12 @@ protected function aggregateDataTableRecord($name, $columnsAggregationOperation

protected function getAggregatedDataTableMapFromBlobs(\Iterator $dataTableBlobs, $columnsAggregationOperation, $columnsToRenameAfterAggregation, $name)
{
// maps period & subtable ID in database to the Row instance in $result that subtable should be added to when encountered
// [$row['date1'].','.$row['date2']][$tableId] = $row in $result
/** @var Row[][] */
$tableIdToResultRowMapping = [];

$result = new DataTable();

if (!empty($columnsAggregationOperation)) {
$result->setMetadata(DataTable::COLUMN_AGGREGATION_OPS_METADATA_NAME, $columnsAggregationOperation);
}

foreach ($dataTableBlobs as $archiveDataRow) {
$period = $archiveDataRow['date1'] . ',' . $archiveDataRow['date2'];
$tableId = $archiveDataRow['name'] == $name ? null : $this->getSubtableIdFromBlobName($archiveDataRow['name']);

$blobTable = DataTable::fromSerializedArray($archiveDataRow['value']);

// see https://github.com/piwik/piwik/issues/4377
$blobTable->filter(function ($table) use ($columnsToRenameAfterAggregation) {
[$result, $hasRows] = BlobTableAggregator::aggregateBlobRows(
$dataTableBlobs,
$name,
$columnsAggregationOperation,
function (DataTable $table) use ($columnsToRenameAfterAggregation): void {
// see https://github.com/piwik/piwik/issues/4377
if ($this->areColumnsNotAlreadyRenamed($table)) {
/**
* This makes archiving and range dates a lot faster. Imagine we archive a week, then we will
Expand All @@ -420,12 +408,9 @@ protected function getAggregatedDataTableMapFromBlobs(\Iterator $dataTableBlobs,
*/
$this->renameColumnsAfterAggregation($table, $columnsToRenameAfterAggregation);
}
});

$tableToAddTo = null;
if ($tableId === null) {
$tableToAddTo = $result;
} elseif (empty($tableIdToResultRowMapping[$period][$tableId])) { // sanity check
},
null,
function (string $period, int $tableId): void {
StaticContainer::get(LoggerInterface::class)->info(
'Unexpected state when aggregating DataTable, unknown period/table ID combination encountered: {period} - {tableId}.'
. ' This either means the SQL to order blobs is behaving incorrectly or the blob data is corrupt in some way.',
Expand All @@ -434,52 +419,13 @@ protected function getAggregatedDataTableMapFromBlobs(\Iterator $dataTableBlobs,
'tableId' => $tableId,
]
);
continue;
} else {
$rowToAddTo = $tableIdToResultRowMapping[$period][$tableId];

if (!$rowToAddTo->getIdSubDataTable()) {
$newTable = new DataTable();
$newTable->setMetadata(DataTable::COLUMN_AGGREGATION_OPS_METADATA_NAME, $columnsAggregationOperation);
$rowToAddTo->setSubtable($newTable);
}

$tableToAddTo = $rowToAddTo->getSubtable();
}

$tableToAddTo->addDataTable($blobTable);

// add subtable IDs for $blobTableRow to $tableIdToResultRowMapping
foreach ($blobTable->getRows() as $blobTableRow) {
$label = $blobTableRow->getColumn('label');
$subtableId = $blobTableRow->getIdSubDataTable();
if (empty($subtableId)) {
continue;
}

$rowToAddTo = $tableToAddTo->getRowFromLabel($label);
$tableIdToResultRowMapping[$period][$subtableId] = $rowToAddTo;
}

Common::destroy($blobTable);
unset($blobTable);
}
);
unset($hasRows);

return $result;
}

private function getSubtableIdFromBlobName($recordName)
{
$parts = explode('_', $recordName);
$id = end($parts);

if (is_numeric($id)) {
return $id;
}

return null;
}

/**
* Note: public only for use in closure in PHP 5.3.
*
Expand Down
117 changes: 117 additions & 0 deletions core/ArchiveProcessor/BlobTableAggregator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<?php

/**
* Matomo - free/libre analytics platform
*
* @link https://matomo.org
* @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/

namespace Piwik\ArchiveProcessor;

use Piwik\Common;
use Piwik\DataTable;
use Piwik\DataTable\Row;

/**
* Internal helper for aggregating blob rows into a DataTable.
*
* @internal
*/
final class BlobTableAggregator
{
/**
* @param iterable<array{name: string, date1: string, date2: string, value: string}> $archiveDataRows
* @param callable(DataTable):void $renameColumnsCallback
* @param callable(array{name: string, date1: string, date2: string, value: string}):bool|null $shouldIncludeRow
* @param callable(string, int):void|null $onMissingParentTable
* @return array{0: DataTable, 1: bool}
*/
public static function aggregateBlobRows(
iterable $archiveDataRows,
string $recordName,
?array $columnsAggregationOperation,
callable $renameColumnsCallback,
?callable $shouldIncludeRow = null,
?callable $onMissingParentTable = null
): array {
// maps period & subtable ID in database to the Row instance in $result that subtable should be added to
// [$row['date1'].','.$row['date2']][$tableId] = $row in $result
$tableIdToResultRowMapping = [];
$result = new DataTable();
$hasRows = false;

if (!empty($columnsAggregationOperation)) {
$result->setMetadata(DataTable::COLUMN_AGGREGATION_OPS_METADATA_NAME, $columnsAggregationOperation);
}

foreach ($archiveDataRows as $archiveDataRow) {
if ($shouldIncludeRow !== null && !$shouldIncludeRow($archiveDataRow)) {
continue;
}

$hasRows = true;
$period = $archiveDataRow['date1'] . ',' . $archiveDataRow['date2'];
$tableId = $archiveDataRow['name'] === $recordName
? null
: self::parseSubtableIdFromBlobName($archiveDataRow['name']);

$blobTable = DataTable::fromSerializedArray($archiveDataRow['value']);
$blobTable->filter(function (DataTable $table) use ($renameColumnsCallback) {
$renameColumnsCallback($table);
});

if ($tableId === null) {
$tableToAddTo = $result;
} elseif (empty($tableIdToResultRowMapping[$period][$tableId])) {
if ($onMissingParentTable !== null) {
$onMissingParentTable($period, $tableId);
}
Common::destroy($blobTable);
continue;
} else {
$rowToAddTo = $tableIdToResultRowMapping[$period][$tableId];
if (!$rowToAddTo->getIdSubDataTable()) {
$newTable = new DataTable();
if (!empty($columnsAggregationOperation)) {
$newTable->setMetadata(DataTable::COLUMN_AGGREGATION_OPS_METADATA_NAME, $columnsAggregationOperation);
}
$rowToAddTo->setSubtable($newTable);
}

$tableToAddTo = $rowToAddTo->getSubtable();
}

$tableToAddTo->addDataTable($blobTable);

foreach ($blobTable->getRows() as $blobTableRow) {
$label = $blobTableRow->getColumn('label');
$subtableId = $blobTableRow->getIdSubDataTable();
if (empty($subtableId)) {
continue;
}

$rowToAddTo = $tableToAddTo->getRowFromLabel($label);
if ($rowToAddTo instanceof Row) {
$tableIdToResultRowMapping[$period][$subtableId] = $rowToAddTo;
}
}

Common::destroy($blobTable);
}

return [$result, $hasRows];
}

public static function parseSubtableIdFromBlobName(string $recordName): ?int
{
$parts = explode('_', $recordName);
$id = end($parts);

if (!is_numeric($id)) {
return null;
}

return (int) $id;
}
}
72 changes: 72 additions & 0 deletions core/ArchiveProcessor/Record.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,21 @@ class Record
*/
private $multiplePeriodTransform = null;

/**
* @var string|null
*/
private $builtFromFlatRecord = null;

/**
* @var callable|null
*/
private $flatToHierarchyPathCallback = null;

/**
* @var callable|null
*/
private $legacyHierarchyToFlatReducerCallback = null;

public static function make($type, $name)
{
$record = new Record();
Expand Down Expand Up @@ -236,4 +251,61 @@ public function getMultiplePeriodTransform(): ?callable
{
return $this->multiplePeriodTransform;
}

/**
* Marks this blob record as being derived from a flat blob record during non-day aggregation.
*
* Use this when day archives store a flat representation and non-day archives should rebuild
* hierarchy from it. The flat record must be present in getRecordMetadata().
*
* @param string $flatRecordName Name of the flat blob record to aggregate first.
* @param callable $flatToHierarchyPathCallback Callback used when rebuilding hierarchy.
* Signature: function (Row $flatRow, ArchiveProcessor $archiveProcessor, Record $hierarchicalRecord): ?array
* Return value is the path of labels to map the flat row into the hierarchy.
* @param callable|null $legacyHierarchyToFlatReducerCallback Optional callback that can merge legacy hierarchical
* aggregates into the flat table when some periods do not
* have the flat record yet.
* Signature: function (DataTable $legacyHierarchy, DataTable $flatTable, ArchiveProcessor $archiveProcessor, Record $hierarchicalRecord): void
* The callback is invoked once per legacy source period hierarchy table.
*/
public function setBuiltFromFlatRecord(
Comment thread
sgiehl marked this conversation as resolved.
string $flatRecordName,
callable $flatToHierarchyPathCallback,
?callable $legacyHierarchyToFlatReducerCallback = null
): Record {
if ($this->type !== self::TYPE_BLOB) {
throw new \InvalidArgumentException('setBuiltFromFlatRecord() can only be used with blob records.');
}

if (!preg_match('/^[a-zA-Z0-9_-]+$/', $flatRecordName)) {
throw new \InvalidArgumentException('Invalid flat record name: ' . $flatRecordName);
}

$this->builtFromFlatRecord = $flatRecordName;
$this->flatToHierarchyPathCallback = $flatToHierarchyPathCallback;
$this->legacyHierarchyToFlatReducerCallback = $legacyHierarchyToFlatReducerCallback;

return $this;
}

public function getBuiltFromFlatRecord(): ?string
{
return $this->builtFromFlatRecord;
}

/**
* @see setBuiltFromFlatRecord()
*/
public function getFlatToHierarchyPathCallback(): ?callable
{
return $this->flatToHierarchyPathCallback;
}

/**
* @see setBuiltFromFlatRecord()
*/
public function getLegacyHierarchyToFlatReducerCallback(): ?callable
{
return $this->legacyHierarchyToFlatReducerCallback;
}
}
Loading
Loading