Skip to content

Commit 0df5442

Browse files
authored
Do not index preview URLs for searching (see contao#2550)
Description ----------- | Q | A | -----------------| --- | Fixed issues | Fixes contao#2507 | Docs PR or issue | - @Toflar: If the `X-Robots-Tag` header or the meta robots tag contains `noindex`, the listener will not pass the document to the indexer at all. This means that potentially existing entries will not be deleted. Is this OK? Commits ------- 17d0e8a Do not index preview URLs for searching
1 parent 946940d commit 0df5442

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

core-bundle/src/EventListener/SearchIndexListener.php

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,26 @@ public function __invoke(TerminateEvent $event): void
7171
return;
7272
}
7373

74-
$document = Document::createFromRequestResponse($request, $event->getResponse());
74+
$response = $event->getResponse();
75+
76+
// Do not index if the X-Robots-Tag header contains "noindex"
77+
if (false !== strpos($response->headers->get('X-Robots-Tag', ''), 'noindex')) {
78+
return;
79+
}
80+
81+
$document = Document::createFromRequestResponse($request, $response);
82+
83+
try {
84+
$robots = $document->getContentCrawler()->filterXPath('//head/meta[@name="robots"]')->first()->attr('content');
85+
86+
// Do not index if the meta robots tag contains "noindex"
87+
if (false !== strpos($robots, 'noindex')) {
88+
return;
89+
}
90+
} catch (\Exception $e) {
91+
// No meta robots tag found
92+
}
93+
7594
$lds = $document->extractJsonLdScripts();
7695

7796
// If there are no json ld scripts at all, this should not be handled by our indexer

core-bundle/tests/EventListener/SearchIndexListenerTest.php

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,5 +121,24 @@ public function getRequestResponse(): \Generator
121121
false,
122122
false,
123123
];
124+
125+
$response = new Response('<html><body><script type="application/ld+json">{"@context":"https:\/\/contao.org\/","@type":"Page","pageId":2,"noSearch":false,"protected":false,"groups":[],"fePreview":false}</script></body></html>', 403);
126+
$response->headers->set('X-Robots-Tag', 'noindex');
127+
128+
yield 'Should not be handled because the X-Robots-Tag header contains "noindex" ' => [
129+
Request::create('/foobar'),
130+
$response,
131+
SearchIndexListener::FEATURE_DELETE | SearchIndexListener::FEATURE_INDEX,
132+
false,
133+
false,
134+
];
135+
136+
yield 'Should not be handled because the meta robots tag contains "noindex" ' => [
137+
Request::create('/foobar'),
138+
new Response('<html><head><meta name="robots" content="noindex,nofollow"/></head><body><script type="application/ld+json">{"@context":"https:\/\/contao.org\/","@type":"Page","pageId":2,"noSearch":false,"protected":false,"groups":[],"fePreview":false}</script></body></html>', 403),
139+
SearchIndexListener::FEATURE_DELETE | SearchIndexListener::FEATURE_INDEX,
140+
false,
141+
false,
142+
];
124143
}
125144
}

manager-bundle/src/Resources/skeleton/web/preview.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
$kernel = ContaoKernel::fromRequest(\dirname(__DIR__), $request);
3131
$response = $kernel->handle($request);
3232

33+
// Prevent preview URLs from being indexed
34+
$response->headers->set('X-Robots-Tag', 'noindex');
35+
3336
// Force no-cache on all responses in the preview front controller
3437
$response->headers->set('Cache-Control', 'no-store');
3538

0 commit comments

Comments
 (0)