2222use Piwik \Plugins \BotTracking \Archiver ;
2323use Piwik \Plugins \BotTracking \Dao \BotRequestsDao ;
2424use Piwik \Plugins \BotTracking \Metrics ;
25+ use Piwik \RankingQuery ;
2526use Piwik \Tracker \Action ;
2627use Piwik \Tracker \PageUrl ;
2728
@@ -40,13 +41,19 @@ class AIAssistantReports extends RecordBuilder
4041 'Devin ' => '' ,
4142 ];
4243
44+ /**
45+ * @var int
46+ */
47+ private $ rankingQueryLimit ;
48+
4349 public function __construct ()
4450 {
4551 parent ::__construct ();
4652
4753 $ this ->columnToSortByBeforeTruncation = Metrics::COLUMN_REQUESTS ;
4854 $ this ->maxRowsInTable = (int )GeneralConfig::getConfigValue ('datatable_archiving_maximum_rows_bots ' );
4955 $ this ->maxRowsInSubtable = (int )GeneralConfig::getConfigValue ('datatable_archiving_maximum_rows_subtable_bots ' );
56+ $ this ->rankingQueryLimit = $ this ->getRankingQueryLimit ();
5057 }
5158
5259 public function getRecordMetadata (ArchiveProcessor $ archiveProcessor ): array
@@ -137,27 +144,7 @@ private function queryAcquiredVisitsByAIAssistant(LogAggregator $logAggregator):
137144 */
138145 private function populateTableForActionType (array $ tables , int $ actionType , LogAggregator $ logAggregator , array $ visits ): void
139146 {
140- $ where = $ logAggregator ->getWhereStatement ('bot ' , 'server_time ' );
141- $ bindBase = $ logAggregator ->getGeneralQueryBindParams ();
142-
143- $ sql = sprintf (
144- "SELECT * FROM (SELECT bot.bot_name, log_action.name AS url, COUNT(*) AS requests
145- FROM %s AS bot
146- INNER JOIN %s AS log_action ON log_action.idaction = bot.idaction_url
147- WHERE log_action.name IS NOT NULL
148- AND log_action.name <> ''
149- AND log_action.type = %d
150- AND %s
151- GROUP BY bot.bot_name, url WITH ROLLUP) AS rollupQuery
152- ORDER BY bot_name, requests DESC, url " ,
153- BotRequestsDao::getPrefixedTableName (),
154- Common::prefixTable ('log_action ' ),
155- $ actionType ,
156- $ where
157- );
158-
159- $ resultSet = Db::query ($ sql , $ bindBase );
160- $ actionRows = [];
147+ $ resultSet = $ this ->queryBotRequests ($ logAggregator , $ actionType );
161148
162149 while ($ row = $ resultSet ->fetch ()) {
163150 /**
@@ -166,45 +153,47 @@ private function populateTableForActionType(array $tables, int $actionType, LogA
166153 $ label = $ row ['bot_name ' ];
167154 $ url = $ row ['url ' ];
168155
169- if (is_null ($ label )) {
156+ if ($ label === null ) {
157+ // top-level rollup result
170158 continue ;
171159 }
172160
173- if (!is_null ($ url )) {
174- $ actionRows [] = $ row ;
161+ if ($ url === null ) {
162+ // second-level rollup result
163+ $ metrics = [
164+ Metrics::COLUMN_REQUESTS => $ row ['requests ' ],
165+ Metrics::COLUMN_DOCUMENT_REQUESTS => $ actionType === Action::TYPE_DOWNLOAD ? $ row ['requests ' ] : 0 ,
166+ Metrics::COLUMN_PAGE_REQUESTS => $ actionType === Action::TYPE_PAGE_URL ? $ row ['requests ' ] : 0 ,
167+ Metrics::COLUMN_ACQUIRED_VISITS => $ visits [$ label ] ?? 0 ,
168+ ];
169+
170+ $ tables [Archiver::AI_ASSISTANTS_PAGES_RECORD ]->sumRowWithLabel ($ label , $ metrics , [Metrics::COLUMN_ACQUIRED_VISITS => 'max ' ]);
171+ $ tables [Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD ]->sumRowWithLabel ($ label , $ metrics , [Metrics::COLUMN_ACQUIRED_VISITS => 'max ' ]);
175172 continue ;
176173 }
177174
178- $ metrics = [
179- Metrics::COLUMN_REQUESTS => $ row ['requests ' ],
180- Metrics::COLUMN_DOCUMENT_REQUESTS => $ actionType === Action::TYPE_DOWNLOAD ? $ row ['requests ' ] : 0 ,
181- Metrics::COLUMN_PAGE_REQUESTS => $ actionType === Action::TYPE_PAGE_URL ? $ row ['requests ' ] : 0 ,
182- Metrics::COLUMN_ACQUIRED_VISITS => $ visits [$ label ] ?? 0 ,
183- ];
184-
185- // we add all records to both tables, so we in the end have the total count of pages & documents in the main table
186- $ tables [Archiver::AI_ASSISTANTS_PAGES_RECORD ]->sumRowWithLabel ($ label , $ metrics , [Metrics::COLUMN_ACQUIRED_VISITS => 'max ' ]);
187- $ tables [Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD ]->sumRowWithLabel ($ label , $ metrics , [Metrics::COLUMN_ACQUIRED_VISITS => 'max ' ]);
188- }
189175
190- $ table = $ tables [Archiver::AI_ASSISTANTS_PAGES_RECORD ];
176+ $ table = $ tables [Archiver::AI_ASSISTANTS_PAGES_RECORD ];
191177
192- if ($ actionType === Action::TYPE_DOWNLOAD ) {
193- $ table = $ tables [Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD ];
194- }
195-
196- // use while / array_shift combination instead of foreach to save memory
197- while (is_array ($ actionRows ) && count ($ actionRows )) {
198- /**
199- * @var array{requests: int, bot_name: string, url: string} $row
200- */
201- $ row = array_shift ($ actionRows );
202- $ label = $ row ['bot_name ' ];
203- $ url = $ row ['url ' ];
178+ if ($ actionType === Action::TYPE_DOWNLOAD ) {
179+ $ table = $ tables [Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD ];
180+ }
204181
205182 $ tableRow = $ table ->getRowFromLabel ($ label );
206183
207- if (empty ($ tableRow )) {
184+ if (false === $ tableRow ) {
185+ // non-rollup row but rollup row is missing
186+ // should not happen, but don't break
187+ continue ;
188+ }
189+
190+ if (
191+ $ url === RankingQuery::LABEL_SUMMARY_ROW
192+ && !$ tableRow ->isSubtableLoaded ()
193+ ) {
194+ // skip creating the subtable if:
195+ // - we are using rollups
196+ // - the only row would be "Others"
208197 continue ;
209198 }
210199
@@ -216,4 +205,49 @@ private function populateTableForActionType(array $tables, int $actionType, LogA
216205 ]);
217206 }
218207 }
208+
209+ private function queryBotRequests (LogAggregator $ logAggregator , int $ actionType )
210+ {
211+ $ where = $ logAggregator ->getWhereStatement ('bot ' , 'server_time ' );
212+
213+ $ sql = sprintf (
214+ "SELECT * FROM (SELECT bot.bot_name, log_action.name AS url, COUNT(*) AS requests
215+ FROM %s AS bot
216+ INNER JOIN %s AS log_action ON log_action.idaction = bot.idaction_url
217+ WHERE log_action.name IS NOT NULL
218+ AND log_action.name <> ''
219+ AND log_action.type = %d
220+ AND %s
221+ GROUP BY bot.bot_name, url WITH ROLLUP) AS rollupQuery
222+ ORDER BY requests DESC, bot_name, url " ,
223+ BotRequestsDao::getPrefixedTableName (),
224+ Common::prefixTable ('log_action ' ),
225+ $ actionType ,
226+ $ where
227+ );
228+
229+ if ($ this ->rankingQueryLimit > 0 ) {
230+ $ rankingQuery = new RankingQuery ($ this ->rankingQueryLimit );
231+ $ rankingQuery ->addLabelColumn (['bot_name ' , 'url ' ]);
232+ $ rankingQuery ->addColumn ('requests ' , 'sum ' );
233+ $ sql = $ rankingQuery ->generateRankingQuery ($ sql , true );
234+ }
235+
236+ return Db::query ($ sql , $ logAggregator ->getGeneralQueryBindParams ());
237+ }
238+
239+ private function getRankingQueryLimit (): int
240+ {
241+ $ maxRowsInTable = (int )$ this ->maxRowsInTable ;
242+ $ maxRowsInSubtable = (int )$ this ->maxRowsInSubtable ;
243+
244+ $ configLimit = (int )GeneralConfig::getConfigValue ('archiving_ranking_query_row_limit ' );
245+ $ configLimit = max ($ configLimit , 10 * $ maxRowsInTable );
246+
247+ if ($ configLimit === 0 ) {
248+ return 0 ;
249+ }
250+
251+ return max ($ configLimit , $ maxRowsInTable , $ maxRowsInSubtable );
252+ }
219253}
0 commit comments