Skip to content

Commit 30d01c6

Browse files
committed
Use a ranking query for bot requests
1 parent e590f05 commit 30d01c6

6 files changed

Lines changed: 405 additions & 29 deletions

plugins/BotTracking/RecordBuilders/AIAssistantReports.php

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
use Piwik\Plugins\BotTracking\Archiver;
2323
use Piwik\Plugins\BotTracking\Dao\BotRequestsDao;
2424
use Piwik\Plugins\BotTracking\Metrics;
25+
use Piwik\RankingQuery;
2526
use Piwik\Tracker\Action;
2627
use Piwik\Tracker\PageUrl;
2728

@@ -40,13 +41,19 @@ class AIAssistantReports extends RecordBuilder
4041
'Devin' => '',
4142
];
4243

44+
/**
45+
* @var int
46+
*/
47+
private $rankingQueryLimit;
48+
4349
public function __construct()
4450
{
4551
parent::__construct();
4652

4753
$this->columnToSortByBeforeTruncation = Metrics::COLUMN_REQUESTS;
4854
$this->maxRowsInTable = (int)GeneralConfig::getConfigValue('datatable_archiving_maximum_rows_bots');
4955
$this->maxRowsInSubtable = (int)GeneralConfig::getConfigValue('datatable_archiving_maximum_rows_subtable_bots');
56+
$this->rankingQueryLimit = $this->getRankingQueryLimit();
5057
}
5158

5259
public function getRecordMetadata(ArchiveProcessor $archiveProcessor): array
@@ -137,27 +144,9 @@ private function queryAcquiredVisitsByAIAssistant(LogAggregator $logAggregator):
137144
*/
138145
private function populateTableForActionType(array $tables, int $actionType, LogAggregator $logAggregator, array $visits): void
139146
{
140-
$where = $logAggregator->getWhereStatement('bot', 'server_time');
141-
$bindBase = $logAggregator->getGeneralQueryBindParams();
142-
143-
$sql = sprintf(
144-
"SELECT * FROM (SELECT bot.bot_name, log_action.name AS url, COUNT(*) AS requests
145-
FROM %s AS bot
146-
INNER JOIN %s AS log_action ON log_action.idaction = bot.idaction_url
147-
WHERE log_action.name IS NOT NULL
148-
AND log_action.name <> ''
149-
AND log_action.type = %d
150-
AND %s
151-
GROUP BY bot.bot_name, url WITH ROLLUP) AS rollupQuery
152-
ORDER BY bot_name, requests DESC, url",
153-
BotRequestsDao::getPrefixedTableName(),
154-
Common::prefixTable('log_action'),
155-
$actionType,
156-
$where
157-
);
158-
159-
$resultSet = Db::query($sql, $bindBase);
147+
$resultSet = $this->queryBotRequests($logAggregator, $actionType);
160148
$actionRows = [];
149+
$botTotals = [];
161150

162151
while ($row = $resultSet->fetch()) {
163152
/**
@@ -170,19 +159,28 @@ private function populateTableForActionType(array $tables, int $actionType, LogA
170159
continue;
171160
}
172161

173-
if (!is_null($url)) {
174-
$actionRows[] = $row;
162+
if (is_null($url)) {
175163
continue;
176164
}
177165

178-
$metrics = [
179-
Metrics::COLUMN_REQUESTS => $row['requests'],
180-
Metrics::COLUMN_DOCUMENT_REQUESTS => $actionType === Action::TYPE_DOWNLOAD ? $row['requests'] : 0,
181-
Metrics::COLUMN_PAGE_REQUESTS => $actionType === Action::TYPE_PAGE_URL ? $row['requests'] : 0,
182-
Metrics::COLUMN_ACQUIRED_VISITS => $visits[$label] ?? 0,
183-
];
166+
$actionRows[] = $row;
167+
168+
if (!isset($botTotals[$label])) {
169+
$botTotals[$label] = [
170+
Metrics::COLUMN_REQUESTS => 0,
171+
Metrics::COLUMN_DOCUMENT_REQUESTS => 0,
172+
Metrics::COLUMN_PAGE_REQUESTS => 0,
173+
Metrics::COLUMN_ACQUIRED_VISITS => 0,
174+
];
175+
}
176+
177+
$botTotals[$label][Metrics::COLUMN_REQUESTS] += $row['requests'];
178+
$botTotals[$label][Metrics::COLUMN_DOCUMENT_REQUESTS] += $actionType === Action::TYPE_DOWNLOAD ? $row['requests'] : 0;
179+
$botTotals[$label][Metrics::COLUMN_PAGE_REQUESTS] += $actionType === Action::TYPE_PAGE_URL ? $row['requests'] : 0;
180+
$botTotals[$label][Metrics::COLUMN_ACQUIRED_VISITS] = max($botTotals[$label][Metrics::COLUMN_ACQUIRED_VISITS], $visits[$label] ?? 0);
181+
}
184182

185-
// we add all records to both tables, so we in the end have the total count of pages & documents in the main table
183+
foreach ($botTotals as $label => $metrics) {
186184
$tables[Archiver::AI_ASSISTANTS_PAGES_RECORD]->sumRowWithLabel($label, $metrics, [Metrics::COLUMN_ACQUIRED_VISITS => 'max']);
187185
$tables[Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD]->sumRowWithLabel($label, $metrics, [Metrics::COLUMN_ACQUIRED_VISITS => 'max']);
188186
}
@@ -202,6 +200,10 @@ private function populateTableForActionType(array $tables, int $actionType, LogA
202200
$label = $row['bot_name'];
203201
$url = $row['url'];
204202

203+
if ($label === RankingQuery::LABEL_SUMMARY_ROW) {
204+
continue;
205+
}
206+
205207
$tableRow = $table->getRowFromLabel($label);
206208

207209
if (empty($tableRow)) {
@@ -216,4 +218,48 @@ private function populateTableForActionType(array $tables, int $actionType, LogA
216218
]);
217219
}
218220
}
221+
222+
private function queryBotRequests(LogAggregator $logAggregator, int $actionType)
223+
{
224+
$where = $logAggregator->getWhereStatement('bot', 'server_time');
225+
$where .= ' AND log_action.name IS NOT NULL
226+
AND log_action.name <> \'\'
227+
AND log_action.type = ' . $actionType;
228+
229+
$sql = sprintf(
230+
"SELECT bot.bot_name, log_action.name AS url, COUNT(*) AS requests
231+
FROM %s AS bot
232+
INNER JOIN %s AS log_action ON log_action.idaction = bot.idaction_url
233+
WHERE %s
234+
GROUP BY bot.bot_name, url
235+
ORDER BY bot.bot_name, requests DESC, url",
236+
BotRequestsDao::getPrefixedTableName(),
237+
Common::prefixTable('log_action'),
238+
$where
239+
);
240+
241+
if ($this->rankingQueryLimit > 0) {
242+
$rankingQuery = new RankingQuery($this->rankingQueryLimit);
243+
$rankingQuery->addLabelColumn(['bot_name', 'url']);
244+
$rankingQuery->addColumn('requests', 'sum');
245+
$sql = $rankingQuery->generateRankingQuery($sql);
246+
}
247+
248+
return Db::query($sql, $logAggregator->getGeneralQueryBindParams());
249+
}
250+
251+
private function getRankingQueryLimit(): int
252+
{
253+
$maxRowsInTable = (int)$this->maxRowsInTable;
254+
$maxRowsInSubtable = (int)$this->maxRowsInSubtable;
255+
256+
$configLimit = (int)GeneralConfig::getConfigValue('archiving_ranking_query_row_limit');
257+
$configLimit = max($configLimit, 10 * $maxRowsInTable);
258+
259+
if ($configLimit === 0) {
260+
return 0;
261+
}
262+
263+
return max($configLimit, $maxRowsInTable, $maxRowsInSubtable);
264+
}
219265
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<?php
2+
3+
/**
4+
* Matomo - free/libre analytics platform
5+
*
6+
* @link https://matomo.org
7+
* @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
8+
*/
9+
10+
declare(strict_types=1);
11+
12+
namespace Piwik\Plugins\BotTracking\tests\System;
13+
14+
use Piwik\Cache;
15+
use Piwik\Config;
16+
use Piwik\Plugins\BotTracking\tests\Fixtures\BotTraffic;
17+
use Piwik\Tests\Framework\TestCase\SystemTestCase;
18+
19+
/**
20+
* @group BotTracking
21+
*/
22+
class RankingQueryApiTest extends SystemTestCase
23+
{
24+
/**
25+
* @var BotTraffic
26+
*/
27+
public static $fixture;
28+
29+
public function testRankingQueryUsesOthersRowPages(): void
30+
{
31+
$generalConfig = &Config::getInstance()->General;
32+
$generalConfig['archiving_ranking_query_row_limit'] = 3;
33+
$generalConfig['datatable_archiving_maximum_rows_bots'] = 0; // no limit here, so we see that the ranking query creates the others row
34+
$generalConfig['datatable_archiving_maximum_rows_subtable_bots'] = 2;
35+
36+
Cache::flushAll();
37+
self::deleteArchiveTables();
38+
39+
$this->runApiTests(['BotTracking.getAIAssistantRequests'], [
40+
'idSite' => 1,
41+
'date' => '2025-02-03',
42+
'periods' => ['day', 'week'],
43+
'otherRequestParameters' => [
44+
'expanded' => 1,
45+
'secondaryDimension' => 'pages',
46+
],
47+
'testSuffix' => 'ranking_limit_pages',
48+
]);
49+
}
50+
51+
public function testRankingQueryUsesOthersRowDocuments(): void
52+
{
53+
$generalConfig = &Config::getInstance()->General;
54+
$generalConfig['archiving_ranking_query_row_limit'] = 3;
55+
$generalConfig['datatable_archiving_maximum_rows_bots'] = 4;
56+
$generalConfig['datatable_archiving_maximum_rows_subtable_bots'] = 2;
57+
58+
Cache::flushAll();
59+
self::deleteArchiveTables();
60+
61+
$this->runApiTests(['BotTracking.getAIAssistantRequests'], [
62+
'idSite' => 1,
63+
'date' => '2025-02-03',
64+
'periods' => ['day', 'week'],
65+
'otherRequestParameters' => [
66+
'expanded' => 1,
67+
'secondaryDimension' => 'documents',
68+
],
69+
'testSuffix' => 'ranking_limit_documents',
70+
]);
71+
}
72+
73+
public static function getOutputPrefix()
74+
{
75+
return '';
76+
}
77+
78+
public static function getPathToTestDirectory()
79+
{
80+
return __DIR__;
81+
}
82+
}
83+
84+
RankingQueryApiTest::$fixture = new BotTraffic();
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<result>
3+
<row>
4+
<label>ChatGPT-User</label>
5+
<requests>2</requests>
6+
<document_requests>1</document_requests>
7+
<page_requests>1</page_requests>
8+
<visits_acquired>1</visits_acquired>
9+
<subtable>
10+
<row>
11+
<label>example.com/resources/guide.pdf</label>
12+
<requests>1</requests>
13+
</row>
14+
</subtable>
15+
</row>
16+
<row>
17+
<label>Perplexity-User</label>
18+
<requests>1</requests>
19+
<document_requests>0</document_requests>
20+
<page_requests>1</page_requests>
21+
<visits_acquired>1</visits_acquired>
22+
</row>
23+
<row>
24+
<label>Claude-User</label>
25+
<requests>2</requests>
26+
<document_requests>2</document_requests>
27+
<page_requests>0</page_requests>
28+
<visits_acquired>0</visits_acquired>
29+
<subtable>
30+
<row>
31+
<label>example.com/resources/datasheet.pdf</label>
32+
<requests>1</requests>
33+
</row>
34+
<row>
35+
<label>example.com/resources/whitepaper.pdf</label>
36+
<requests>1</requests>
37+
</row>
38+
</subtable>
39+
</row>
40+
<row>
41+
<label>MistralAI-User</label>
42+
<requests>2</requests>
43+
<document_requests>0</document_requests>
44+
<page_requests>2</page_requests>
45+
<visits_acquired>0</visits_acquired>
46+
</row>
47+
</result>
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<result>
3+
<row>
4+
<label>Perplexity-User</label>
5+
<requests>5</requests>
6+
<document_requests>4</document_requests>
7+
<page_requests>1</page_requests>
8+
<visits_acquired>2</visits_acquired>
9+
<subtable>
10+
<row>
11+
<label>example.com/resources/case-study.pdf</label>
12+
<requests>1</requests>
13+
</row>
14+
<row>
15+
<label>Others</label>
16+
<requests>3</requests>
17+
</row>
18+
</subtable>
19+
</row>
20+
<row>
21+
<label>ChatGPT-User</label>
22+
<requests>5</requests>
23+
<document_requests>1</document_requests>
24+
<page_requests>4</page_requests>
25+
<visits_acquired>1</visits_acquired>
26+
<subtable>
27+
<row>
28+
<label>example.com/resources/guide.pdf</label>
29+
<requests>1</requests>
30+
</row>
31+
</subtable>
32+
</row>
33+
<row>
34+
<label>MistralAI-User</label>
35+
<requests>4</requests>
36+
<document_requests>0</document_requests>
37+
<page_requests>4</page_requests>
38+
<visits_acquired>0</visits_acquired>
39+
</row>
40+
<row>
41+
<label>Others</label>
42+
<requests>12</requests>
43+
<document_requests>5</document_requests>
44+
<page_requests>7</page_requests>
45+
<visits_acquired>3</visits_acquired>
46+
</row>
47+
</result>
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<result>
3+
<row>
4+
<label>ChatGPT-User</label>
5+
<requests>2</requests>
6+
<document_requests>1</document_requests>
7+
<page_requests>1</page_requests>
8+
<visits_acquired>1</visits_acquired>
9+
<subtable>
10+
<row>
11+
<label>example.com/article-2</label>
12+
<requests>1</requests>
13+
</row>
14+
</subtable>
15+
</row>
16+
<row>
17+
<label>Claude-User</label>
18+
<requests>2</requests>
19+
<document_requests>2</document_requests>
20+
<page_requests>0</page_requests>
21+
<visits_acquired>0</visits_acquired>
22+
</row>
23+
<row>
24+
<label>MistralAI-User</label>
25+
<requests>2</requests>
26+
<document_requests>0</document_requests>
27+
<page_requests>2</page_requests>
28+
<visits_acquired>0</visits_acquired>
29+
<subtable>
30+
<row>
31+
<label>example.com/article-3</label>
32+
<requests>1</requests>
33+
</row>
34+
<row>
35+
<label>example.com/article-4</label>
36+
<requests>1</requests>
37+
</row>
38+
</subtable>
39+
</row>
40+
<row>
41+
<label>Others</label>
42+
<requests>1</requests>
43+
<document_requests>0</document_requests>
44+
<page_requests>1</page_requests>
45+
<visits_acquired>0</visits_acquired>
46+
</row>
47+
</result>

0 commit comments

Comments
 (0)