Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 83 additions & 49 deletions plugins/BotTracking/RecordBuilders/AIAssistantReports.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
use Piwik\Plugins\BotTracking\Archiver;
use Piwik\Plugins\BotTracking\Dao\BotRequestsDao;
use Piwik\Plugins\BotTracking\Metrics;
use Piwik\RankingQuery;
use Piwik\Tracker\Action;
use Piwik\Tracker\PageUrl;

Expand All @@ -40,13 +41,19 @@ class AIAssistantReports extends RecordBuilder
'Devin' => '',
];

/**
* @var int
*/
private $rankingQueryLimit;

public function __construct()
{
parent::__construct();

$this->columnToSortByBeforeTruncation = Metrics::COLUMN_REQUESTS;
$this->maxRowsInTable = (int)GeneralConfig::getConfigValue('datatable_archiving_maximum_rows_bots');
$this->maxRowsInSubtable = (int)GeneralConfig::getConfigValue('datatable_archiving_maximum_rows_subtable_bots');
$this->rankingQueryLimit = $this->getRankingQueryLimit();
}

public function getRecordMetadata(ArchiveProcessor $archiveProcessor): array
Expand Down Expand Up @@ -137,27 +144,7 @@ private function queryAcquiredVisitsByAIAssistant(LogAggregator $logAggregator):
*/
private function populateTableForActionType(array $tables, int $actionType, LogAggregator $logAggregator, array $visits): void
{
$where = $logAggregator->getWhereStatement('bot', 'server_time');
$bindBase = $logAggregator->getGeneralQueryBindParams();

$sql = sprintf(
"SELECT * FROM (SELECT bot.bot_name, log_action.name AS url, COUNT(*) AS requests
FROM %s AS bot
INNER JOIN %s AS log_action ON log_action.idaction = bot.idaction_url
WHERE log_action.name IS NOT NULL
AND log_action.name <> ''
AND log_action.type = %d
AND %s
GROUP BY bot.bot_name, url WITH ROLLUP) AS rollupQuery
ORDER BY bot_name, requests DESC, url",
BotRequestsDao::getPrefixedTableName(),
Common::prefixTable('log_action'),
$actionType,
$where
);

$resultSet = Db::query($sql, $bindBase);
$actionRows = [];
$resultSet = $this->queryBotRequests($logAggregator, $actionType);

while ($row = $resultSet->fetch()) {
/**
Expand All @@ -166,45 +153,47 @@ private function populateTableForActionType(array $tables, int $actionType, LogA
$label = $row['bot_name'];
$url = $row['url'];

if (is_null($label)) {
if ($label === null) {
// top-level rollup result
continue;
}

if (!is_null($url)) {
$actionRows[] = $row;
if ($url === null) {
// second-level rollup result
$metrics = [
Metrics::COLUMN_REQUESTS => $row['requests'],
Metrics::COLUMN_DOCUMENT_REQUESTS => $actionType === Action::TYPE_DOWNLOAD ? $row['requests'] : 0,
Metrics::COLUMN_PAGE_REQUESTS => $actionType === Action::TYPE_PAGE_URL ? $row['requests'] : 0,
Metrics::COLUMN_ACQUIRED_VISITS => $visits[$label] ?? 0,
];

$tables[Archiver::AI_ASSISTANTS_PAGES_RECORD]->sumRowWithLabel($label, $metrics, [Metrics::COLUMN_ACQUIRED_VISITS => 'max']);
$tables[Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD]->sumRowWithLabel($label, $metrics, [Metrics::COLUMN_ACQUIRED_VISITS => 'max']);
continue;
}

$metrics = [
Metrics::COLUMN_REQUESTS => $row['requests'],
Metrics::COLUMN_DOCUMENT_REQUESTS => $actionType === Action::TYPE_DOWNLOAD ? $row['requests'] : 0,
Metrics::COLUMN_PAGE_REQUESTS => $actionType === Action::TYPE_PAGE_URL ? $row['requests'] : 0,
Metrics::COLUMN_ACQUIRED_VISITS => $visits[$label] ?? 0,
];

// we add all records to both tables, so we in the end have the total count of pages & documents in the main table
$tables[Archiver::AI_ASSISTANTS_PAGES_RECORD]->sumRowWithLabel($label, $metrics, [Metrics::COLUMN_ACQUIRED_VISITS => 'max']);
$tables[Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD]->sumRowWithLabel($label, $metrics, [Metrics::COLUMN_ACQUIRED_VISITS => 'max']);
}

$table = $tables[Archiver::AI_ASSISTANTS_PAGES_RECORD];
$table = $tables[Archiver::AI_ASSISTANTS_PAGES_RECORD];

if ($actionType === Action::TYPE_DOWNLOAD) {
$table = $tables[Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD];
}

// use while / array_shift combination instead of foreach to save memory
while (is_array($actionRows) && count($actionRows)) {
/**
* @var array{requests: int, bot_name: string, url: string} $row
*/
$row = array_shift($actionRows);
$label = $row['bot_name'];
$url = $row['url'];
if ($actionType === Action::TYPE_DOWNLOAD) {
$table = $tables[Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD];
}

$tableRow = $table->getRowFromLabel($label);

if (empty($tableRow)) {
if (false === $tableRow) {
// non-rollup row but rollup row is missing
// should not happen, but don't break
continue;
}

if (
$url === RankingQuery::LABEL_SUMMARY_ROW
&& !$tableRow->isSubtableLoaded()
) {
// skip creating the subtable if:
// - we are using rollups
// - the only row would be "Others"
continue;
}

Expand All @@ -216,4 +205,49 @@ private function populateTableForActionType(array $tables, int $actionType, LogA
]);
}
}

private function queryBotRequests(LogAggregator $logAggregator, int $actionType)
{
$where = $logAggregator->getWhereStatement('bot', 'server_time');

$sql = sprintf(
"SELECT * FROM (SELECT bot.bot_name, log_action.name AS url, COUNT(*) AS requests
FROM %s AS bot
INNER JOIN %s AS log_action ON log_action.idaction = bot.idaction_url
WHERE log_action.name IS NOT NULL
AND log_action.name <> ''
AND log_action.type = %d
AND %s
GROUP BY bot.bot_name, url WITH ROLLUP) AS rollupQuery
ORDER BY requests DESC, bot_name, url",
BotRequestsDao::getPrefixedTableName(),
Common::prefixTable('log_action'),
$actionType,
$where
);

if ($this->rankingQueryLimit > 0) {
$rankingQuery = new RankingQuery($this->rankingQueryLimit);
$rankingQuery->addLabelColumn(['bot_name', 'url']);
$rankingQuery->addColumn('requests', 'sum');
$sql = $rankingQuery->generateRankingQuery($sql, true);
}

return Db::query($sql, $logAggregator->getGeneralQueryBindParams());
}

private function getRankingQueryLimit(): int
{
$maxRowsInTable = (int)$this->maxRowsInTable;
$maxRowsInSubtable = (int)$this->maxRowsInSubtable;

$configLimit = (int)GeneralConfig::getConfigValue('archiving_ranking_query_row_limit');
$configLimit = max($configLimit, 10 * $maxRowsInTable);

if ($configLimit === 0) {
return 0;
}

return max($configLimit, $maxRowsInTable, $maxRowsInSubtable);
}
}
2 changes: 2 additions & 0 deletions plugins/BotTracking/tests/Fixtures/BotTraffic.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@ private function trackBotRequests(): void
['ChatGPT-User/1.0', $pages[1], 500, 25896, false],
['ChatGPT-User/1.0', $downloads[1], 200, 33658, true],
['Perplexity-User/1.0', $pages[2], 200, 36985, false],
['Perplexity-User/1.0', $pages[2], 200, 36985, false],
['MistralAI-User/2.0', $pages[3], 200, 85236, false],
['Claude-User/3.0', $downloads[3], 200, 12456, true],
['Claude-User/3.0', $downloads[4], 200, 35562, true],
],
2 => [
['Perplexity-User/1.0', $downloads[3], 200, 84269, true],
Expand Down
84 changes: 84 additions & 0 deletions plugins/BotTracking/tests/System/RankingQueryApiTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<?php

/**
* Matomo - free/libre analytics platform
*
* @link https://matomo.org
* @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/

declare(strict_types=1);

namespace Piwik\Plugins\BotTracking\tests\System;

use Piwik\Cache;
use Piwik\Config;
use Piwik\Plugins\BotTracking\tests\Fixtures\BotTraffic;
use Piwik\Tests\Framework\TestCase\SystemTestCase;

/**
* @group BotTracking
*/
class RankingQueryApiTest extends SystemTestCase
{
/**
* @var BotTraffic
*/
public static $fixture;

public function testRankingQueryUsesOthersRowPages(): void
{
$generalConfig = &Config::getInstance()->General;
$generalConfig['archiving_ranking_query_row_limit'] = 3;
$generalConfig['datatable_archiving_maximum_rows_bots'] = 0; // no limit here, so we see that the ranking query creates the others row
$generalConfig['datatable_archiving_maximum_rows_subtable_bots'] = 2;

Cache::flushAll();
self::deleteArchiveTables();

$this->runApiTests(['BotTracking.getAIAssistantRequests'], [
'idSite' => 1,
'date' => '2025-02-03',
'periods' => ['day', 'week'],
'otherRequestParameters' => [
'expanded' => 1,
'secondaryDimension' => 'pages',
],
'testSuffix' => 'ranking_limit_pages',
]);
}

public function testRankingQueryUsesOthersRowDocuments(): void
{
$generalConfig = &Config::getInstance()->General;
$generalConfig['archiving_ranking_query_row_limit'] = 3;
$generalConfig['datatable_archiving_maximum_rows_bots'] = 4;
$generalConfig['datatable_archiving_maximum_rows_subtable_bots'] = 2;

Cache::flushAll();
self::deleteArchiveTables();

$this->runApiTests(['BotTracking.getAIAssistantRequests'], [
'idSite' => 1,
'date' => '2025-02-03',
'periods' => ['day', 'week'],
'otherRequestParameters' => [
'expanded' => 1,
'secondaryDimension' => 'documents',
],
'testSuffix' => 'ranking_limit_documents',
]);
}

public static function getOutputPrefix()
{
return '';
}

public static function getPathToTestDirectory()
{
return __DIR__;
}
}

RankingQueryApiTest::$fixture = new BotTraffic();
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,22 @@
</row>
<row>
<label>Perplexity-User</label>
<requests>1</requests>
<requests>2</requests>
<document_requests>0</document_requests>
<page_requests>1</page_requests>
<page_requests>2</page_requests>
<visits_acquired>1</visits_acquired>
</row>
<row>
<label>Claude-User</label>
<requests>2</requests>
<document_requests>2</document_requests>
<requests>3</requests>
<document_requests>3</document_requests>
<page_requests>0</page_requests>
<visits_acquired>0</visits_acquired>
<subtable>
<row>
<label>example.com/resources/case-study.pdf</label>
<requests>1</requests>
</row>
<row>
<label>example.com/resources/datasheet.pdf</label>
<requests>1</requests>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@
<result>
<row>
<label>Claude-User</label>
<requests>4</requests>
<document_requests>2</document_requests>
<requests>5</requests>
<document_requests>3</document_requests>
<page_requests>2</page_requests>
<visits_acquired>2</visits_acquired>
<subtable>
<row>
<label>example.com/resources/case-study.pdf</label>
<requests>1</requests>
</row>
<row>
<label>example.com/resources/datasheet.pdf</label>
<requests>1</requests>
Expand All @@ -19,9 +23,9 @@
</row>
<row>
<label>Perplexity-User</label>
<requests>6</requests>
<requests>7</requests>
<document_requests>5</document_requests>
<page_requests>1</page_requests>
<page_requests>2</page_requests>
<visits_acquired>2</visits_acquired>
<subtable>
<row>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
</row>
<row>
<label>Perplexity-User - example.com/article-3</label>
<requests>1</requests>
<requests>2</requests>
<BotTracking_AIAssistantName>Perplexity-User</BotTracking_AIAssistantName>
<BotTracking_PageUrl>example.com/article-3</BotTracking_PageUrl>
</row>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
</row>
<row>
<label>Perplexity-User - example.com/article-3</label>
<requests>1</requests>
<requests>2</requests>
<BotTracking_AIAssistantName>Perplexity-User</BotTracking_AIAssistantName>
<BotTracking_PageUrl>example.com/article-3</BotTracking_PageUrl>
</row>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,21 @@
</row>
<row>
<label>Perplexity-User</label>
<requests>1</requests>
<requests>2</requests>
<document_requests>0</document_requests>
<page_requests>1</page_requests>
<page_requests>2</page_requests>
<visits_acquired>1</visits_acquired>
<subtable>
<row>
<label>example.com/article-3</label>
<requests>1</requests>
<requests>2</requests>
</row>
</subtable>
</row>
<row>
<label>Claude-User</label>
<requests>2</requests>
<document_requests>2</document_requests>
<requests>3</requests>
<document_requests>3</document_requests>
<page_requests>0</page_requests>
<visits_acquired>0</visits_acquired>
</row>
Expand Down
Loading