Skip to content

Commit f2867a2

Browse files
committed
Properly handle data removal for sites without visits
1 parent 7081b06 commit f2867a2

5 files changed

Lines changed: 57 additions & 39 deletions

File tree

plugins/BotTracking/BotTracking.php

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
use Piwik\Date;
1515
use Piwik\Plugin;
1616
use Piwik\Plugins\BotTracking\Dao\BotRequestsDao;
17+
use Piwik\Plugins\SitesManager\API;
1718
use Piwik\Tracker\Request;
1819

1920
/**
@@ -66,10 +67,26 @@ public function deleteLogsOlderThan(Date $dateUpperLimit): void
6667
(new BotRequestsDao())->deleteOldRecords($dateUpperLimit);
6768
}
6869

69-
public function deleteDataSubjectsForDeletedSites(array &$result, array $idSitesNoLongerExisting): void
70+
/**
71+
* @param array<string, int> $result
72+
*/
73+
public function deleteDataSubjectsForDeletedSites(array &$result): void
7074
{
71-
$dao = new BotRequestsDao();
72-
$result[$dao::getTableName()] = $dao->deleteRecordsForIdSites($idSitesNoLongerExisting);
75+
$allExistingIdSites = API::getInstance()->getAllSitesId();
76+
$allExistingIdSites = array_map('intval', $allExistingIdSites);
77+
$maxIdSite = max($allExistingIdSites);
78+
79+
if (empty($maxIdSite)) {
80+
return;
81+
}
82+
83+
$dao = new BotRequestsDao();
84+
$idSitesInTable = $dao->getDistinctIdSitesInTable($maxIdSite);
85+
$idSitesNoLongerExisting = array_diff($idSitesInTable, $allExistingIdSites);
86+
87+
if (count($idSitesNoLongerExisting) > 0) {
88+
$result[$dao::getTableName()] = $dao->deleteRecordsForIdSites($idSitesNoLongerExisting);
89+
}
7390
}
7491

7592
/**

plugins/BotTracking/Dao/BotRequestsDao.php

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public static function getPrefixedTableName(): string
3333
*/
3434
public function createTable(): void
3535
{
36-
$tableName = self::getTableName();
36+
$tableName = self::getTableName();
3737
$definition = '
3838
`idrequest` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
3939
`idsite` INT UNSIGNED NOT NULL,
@@ -80,12 +80,12 @@ public function insert(array $data): int
8080
];
8181

8282
$values = [];
83-
$bind = [];
83+
$bind = [];
8484

8585
foreach ($fields as $field) {
8686
if (isset($data[$field])) {
8787
$values[] = '?';
88-
$bind[] = $data[$field];
88+
$bind[] = $data[$field];
8989
} else {
9090
$values[] = 'NULL';
9191
}
@@ -143,4 +143,18 @@ public function deleteRecordsForIdSites(array $siteIds): int
143143

144144
return (int)Db::get()->rowCount($result);
145145
}
146+
147+
/**
148+
* @return int[]
149+
*/
150+
public function getDistinctIdSitesInTable(int $maxIdSite): array
151+
{
152+
$tableName = self::getPrefixedTableName();
153+
$idSitesLogTable = Db::fetchAll('SELECT DISTINCT idsite FROM ' . $tableName);
154+
$idSitesLogTable = array_column($idSitesLogTable, 'idsite');
155+
$idSitesLogTable = array_map('intval', $idSitesLogTable);
156+
return array_filter($idSitesLogTable, function ($idSite) use ($maxIdSite) {
157+
return !empty($idSite) && $idSite <= $maxIdSite;
158+
});
159+
}
146160
}

plugins/BotTracking/tests/System/PurgeLogDataTest.php

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,6 @@ public function testLogDataPurgingRemovesBotRequests(): void
8282

8383
public function testDeleteDataSubjectsForDeletedSitesRemovesBotRequests(): void
8484
{
85-
// track a normal visit that gets removed, otherwise bot requests won't be removed
86-
$t = Fixture::getTracker(1, '2025-02-02 12:00:00');
87-
$t->setUrl('https://matomo.org/faq/123');
88-
Fixture::checkResponse($t->doTrackPageView(''));
89-
9085
// track request for another site
9186
Fixture::createWebsite('2014-02-04');
9287

@@ -109,11 +104,7 @@ public function testDeleteDataSubjectsForDeletedSitesRemovesBotRequests(): void
109104
$dataSubjects = new DataSubjects($logTablesProvider);
110105
$result = $dataSubjects->deleteDataSubjectsForDeletedSites([2]); // idsite 2 still exists
111106
$this->assertEquals([
112-
'log_visit' => 1,
113-
'log_link_visit_action' => 1,
114-
'log_conversion_item' => 0,
115-
'log_conversion' => 0,
116-
'log_bot_request' => 3,
107+
'log_bot_request' => 3,
117108
], $result);
118109

119110
// check that requests were correctly removed

plugins/PrivacyManager/Model/DataSubjects.php

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,24 +65,20 @@ public function deleteDataSubjectsForDeletedSites($allExistingIdSites)
6565

6666
$idSitesNoLongerExisting = array_diff($idSitesUsed, $allExistingIdSites);
6767

68-
if (empty($idSitesNoLongerExisting)) {
69-
// nothing to be deleted... if there is no entry for that table in log_visit or log_link_visit_action
70-
// then there shouldn't be anything to be deleted in other tables either
71-
return [];
72-
}
73-
74-
$logTables = $this->getLogTablesToDeleteFrom();
75-
// It's quicker to call the delete queries one site at a time instead of using the IN operator and potentially
76-
// creating a huge result set
77-
foreach ($idSitesNoLongerExisting as $idSiteNoLongerExisting) {
78-
$r = $this->deleteLogDataFrom($logTables, function ($tableToSelectFrom) use ($idSiteNoLongerExisting) {
79-
return [$tableToSelectFrom . '.idsite = ' . $idSiteNoLongerExisting, []];
80-
});
81-
foreach ($r as $k => $v) {
82-
if (!array_key_exists($k, $results)) {
83-
$results[$k] = 0;
68+
if (!empty($idSitesNoLongerExisting)) {
69+
$logTables = $this->getLogTablesToDeleteFrom();
70+
// It's quicker to call the delete queries one site at a time instead of using the IN operator and potentially
71+
// creating a huge result set
72+
foreach ($idSitesNoLongerExisting as $idSiteNoLongerExisting) {
73+
$r = $this->deleteLogDataFrom($logTables, function ($tableToSelectFrom) use ($idSiteNoLongerExisting) {
74+
return [$tableToSelectFrom . '.idsite = ' . $idSiteNoLongerExisting, []];
75+
});
76+
foreach ($r as $k => $v) {
77+
if (!array_key_exists($k, $results)) {
78+
$results[$k] = 0;
79+
}
80+
$results[$k] += $v;
8481
}
85-
$results[$k] += $v;
8682
}
8783
}
8884

@@ -94,16 +90,18 @@ public function deleteDataSubjectsForDeletedSites($allExistingIdSites)
9490
*
9591
* **Example**
9692
*
97-
* public function deleteDataSubjectsForDeletedSites(&$result, $idSitesNoLongerExisting)
93+
* public function deleteDataSubjectsForDeletedSites(&$result)
9894
* {
99-
* $numDeletes = $this->deleteDataForSites($idSitesNoLongerExisting)
95+
* $existingSiteIds = SitesManager\API::getInstance()->getAllSitesId();
96+
* $idSitesInTable = $this->>getAllSiteIdsInLogTable();
97+
* $idSitesNoLongerExisting = array_diff($existingSiteIds, $idSitesInTable);
98+
* $numDeletes = $this->deleteDataForSites($idSitesNoLongerExisting);
10099
* $result['myplugin'] = $numDeletes;
101100
* }
102101
*
103102
* @param array &$results An array storing the result of how much data was deleted for.
104-
* @param array &$idSitesNoLongerExisting An array with multiple site ids that were removed
105103
*/
106-
Piwik::postEvent('PrivacyManager.deleteDataSubjectsForDeletedSites', [&$results, $idSitesNoLongerExisting]);
104+
Piwik::postEvent('PrivacyManager.deleteDataSubjectsForDeletedSites', [&$results, $allExistingIdSites]);
107105

108106
krsort($results); // make sure test results are always in same order
109107
return $results;

plugins/PrivacyManager/tests/Integration/Model/DataSubjectsTest.php

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,6 @@ public function testDeleteDataSubjectsForDeletedSitesRemovesLogDataThatDoesNotBe
265265
'log_foo' => 6,
266266
'log_conversion_item' => 8,
267267
'log_conversion' => 32,
268-
'log_bot_request' => 0,
269268
], $result);
270269

271270
// assert new counts
@@ -343,7 +342,6 @@ public function testDeleteDataSubjectsForDeletedSitesIgnoresSitesNewerThanThoseI
343342
'log_foo' => 3,
344343
'log_conversion_item' => 4,
345344
'log_conversion' => 16,
346-
'log_bot_request' => 0,
347345
], $result);
348346

349347
// assert new counts

0 commit comments

Comments
 (0)