Skip to content

Commit c667ef6

Browse files
authored
fix(seo): reduce sitemap bloat by filtering versioned docs and low-value pages (#2016)
1 parent 2fe09ab commit c667ef6

File tree

2 files changed

+163
-1
lines changed

2 files changed

+163
-1
lines changed

scripts/update-sitemap-loc.js

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,53 @@ const sitemapXMLs = [
1515
],
1616
];
1717

18+
/**
19+
* URL patterns to exclude from the sitemap.
20+
*
21+
* Why:
22+
* - Versioned doc URLs (e.g. /docs/apisix/3.14/) duplicate the latest
23+
* unversioned paths (e.g. /docs/apisix/) and bloat the sitemap.
24+
* Only the unversioned (latest) URLs should be indexed.
25+
* - /docs/.../next/ pages are for unreleased development docs.
26+
* - /search pages are blocked by robots.txt — keeping them in
27+
* the sitemap sends contradictory signals to crawlers.
28+
* - /blog/tags/ and /blog/page/ are low-value aggregation/pagination
29+
* pages, also blocked by robots.txt.
30+
*/
31+
const excludePatterns = [
32+
// Versioned docs: /docs/<project>/<version>/ where version is digits.digits
33+
/\/docs\/[\w-]+\/\d+\.\d+\//,
34+
// Development "next" docs
35+
/\/docs\/[\w-]+\/next\//,
36+
// Search pages (blocked by robots.txt)
37+
/\/search\/?$/,
38+
// Blog tag and pagination pages (blocked by robots.txt)
39+
/\/blog\/tags\//,
40+
/\/blog\/page\//,
41+
];
42+
43+
/**
44+
* Returns true if the URL should be excluded from the sitemap.
45+
*/
46+
function shouldExclude(url) {
47+
return excludePatterns.some((pattern) => pattern.test(url));
48+
}
49+
50+
/**
51+
* Filter out excluded URLs from a sitemap object and return removal count.
52+
*/
53+
function filterSitemapUrls(sitemap) {
54+
const urls = Array.isArray(sitemap.urlset.url)
55+
? sitemap.urlset.url
56+
: [sitemap.urlset.url];
57+
const before = urls.length;
58+
sitemap.urlset.url = urls.filter((entry) => {
59+
const loc = entry.loc && entry.loc._text;
60+
return !loc || !shouldExclude(loc);
61+
});
62+
return before - sitemap.urlset.url.length;
63+
}
64+
1865
const tasks = new Listr([
1966
{
2067
title: `Check sitemap.xml files exist`,
@@ -27,7 +74,7 @@ const tasks = new Listr([
2774
),
2875
},
2976
{
30-
title: `Merge sitemap.xml files`,
77+
title: `Merge and filter sitemap.xml files`,
3178
task: () => new Listr(
3279
sitemapXMLs.map((group) => ({
3380
title: `Merge ${group[0]}`,
@@ -42,6 +89,8 @@ const tasks = new Listr([
4289
...sitemaps[i].urlset.url,
4390
];
4491
}
92+
const removed = filterSitemapUrls(res);
93+
console.log(` Filtered out ${removed} URLs from ${group[0]}`);
4594
return res;
4695
})
4796
.then((sitemap) => writeFile(group[0], js2xml(sitemap, { compact: true }, 'utf-8'))),

website/static/robots.txt

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,119 @@ Disallow: /zh/blog/page/
88
Disallow: /search
99
Disallow: /zh/search
1010

11+
# Blog aggregation and pagination pages (low-value for indexing)
12+
Disallow: /blog/tags/
13+
Disallow: /zh/blog/tags/
14+
Disallow: /blog/page/
15+
Disallow: /zh/blog/page/
16+
17+
# Search pages
18+
Disallow: /search
19+
Disallow: /zh/search
20+
21+
# Versioned docs — only the unversioned (latest) paths should be indexed.
22+
# e.g. /docs/apisix/ is the latest; /docs/apisix/3.14/ is a duplicate.
23+
Disallow: /docs/apisix/3.10/
24+
Disallow: /docs/apisix/3.11/
25+
Disallow: /docs/apisix/3.12/
26+
Disallow: /docs/apisix/3.13/
27+
Disallow: /docs/apisix/3.14/
28+
Disallow: /docs/apisix/3.15/
29+
Disallow: /docs/apisix/next/
30+
Disallow: /docs/ingress-controller/3.10/
31+
Disallow: /docs/ingress-controller/3.11/
32+
Disallow: /docs/ingress-controller/3.12/
33+
Disallow: /docs/ingress-controller/3.13/
34+
Disallow: /docs/ingress-controller/3.14/
35+
Disallow: /docs/ingress-controller/3.15/
36+
Disallow: /docs/ingress-controller/next/
37+
Disallow: /docs/helm-chart/3.10/
38+
Disallow: /docs/helm-chart/3.11/
39+
Disallow: /docs/helm-chart/3.12/
40+
Disallow: /docs/helm-chart/3.13/
41+
Disallow: /docs/helm-chart/3.14/
42+
Disallow: /docs/helm-chart/3.15/
43+
Disallow: /docs/helm-chart/next/
44+
Disallow: /docs/docker/3.10/
45+
Disallow: /docs/docker/3.11/
46+
Disallow: /docs/docker/3.12/
47+
Disallow: /docs/docker/3.13/
48+
Disallow: /docs/docker/3.14/
49+
Disallow: /docs/docker/3.15/
50+
Disallow: /docs/docker/next/
51+
Disallow: /docs/java-plugin-runner/3.10/
52+
Disallow: /docs/java-plugin-runner/3.11/
53+
Disallow: /docs/java-plugin-runner/3.12/
54+
Disallow: /docs/java-plugin-runner/3.13/
55+
Disallow: /docs/java-plugin-runner/3.14/
56+
Disallow: /docs/java-plugin-runner/3.15/
57+
Disallow: /docs/java-plugin-runner/next/
58+
Disallow: /docs/go-plugin-runner/3.10/
59+
Disallow: /docs/go-plugin-runner/3.11/
60+
Disallow: /docs/go-plugin-runner/3.12/
61+
Disallow: /docs/go-plugin-runner/3.13/
62+
Disallow: /docs/go-plugin-runner/3.14/
63+
Disallow: /docs/go-plugin-runner/3.15/
64+
Disallow: /docs/go-plugin-runner/next/
65+
Disallow: /docs/python-plugin-runner/3.10/
66+
Disallow: /docs/python-plugin-runner/3.11/
67+
Disallow: /docs/python-plugin-runner/3.12/
68+
Disallow: /docs/python-plugin-runner/3.13/
69+
Disallow: /docs/python-plugin-runner/3.14/
70+
Disallow: /docs/python-plugin-runner/3.15/
71+
Disallow: /docs/python-plugin-runner/next/
72+
73+
# Chinese equivalents
74+
Disallow: /zh/docs/apisix/3.10/
75+
Disallow: /zh/docs/apisix/3.11/
76+
Disallow: /zh/docs/apisix/3.12/
77+
Disallow: /zh/docs/apisix/3.13/
78+
Disallow: /zh/docs/apisix/3.14/
79+
Disallow: /zh/docs/apisix/3.15/
80+
Disallow: /zh/docs/apisix/next/
81+
Disallow: /zh/docs/ingress-controller/3.10/
82+
Disallow: /zh/docs/ingress-controller/3.11/
83+
Disallow: /zh/docs/ingress-controller/3.12/
84+
Disallow: /zh/docs/ingress-controller/3.13/
85+
Disallow: /zh/docs/ingress-controller/3.14/
86+
Disallow: /zh/docs/ingress-controller/3.15/
87+
Disallow: /zh/docs/ingress-controller/next/
88+
Disallow: /zh/docs/helm-chart/3.10/
89+
Disallow: /zh/docs/helm-chart/3.11/
90+
Disallow: /zh/docs/helm-chart/3.12/
91+
Disallow: /zh/docs/helm-chart/3.13/
92+
Disallow: /zh/docs/helm-chart/3.14/
93+
Disallow: /zh/docs/helm-chart/3.15/
94+
Disallow: /zh/docs/helm-chart/next/
95+
Disallow: /zh/docs/docker/3.10/
96+
Disallow: /zh/docs/docker/3.11/
97+
Disallow: /zh/docs/docker/3.12/
98+
Disallow: /zh/docs/docker/3.13/
99+
Disallow: /zh/docs/docker/3.14/
100+
Disallow: /zh/docs/docker/3.15/
101+
Disallow: /zh/docs/docker/next/
102+
Disallow: /zh/docs/java-plugin-runner/3.10/
103+
Disallow: /zh/docs/java-plugin-runner/3.11/
104+
Disallow: /zh/docs/java-plugin-runner/3.12/
105+
Disallow: /zh/docs/java-plugin-runner/3.13/
106+
Disallow: /zh/docs/java-plugin-runner/3.14/
107+
Disallow: /zh/docs/java-plugin-runner/3.15/
108+
Disallow: /zh/docs/java-plugin-runner/next/
109+
Disallow: /zh/docs/go-plugin-runner/3.10/
110+
Disallow: /zh/docs/go-plugin-runner/3.11/
111+
Disallow: /zh/docs/go-plugin-runner/3.12/
112+
Disallow: /zh/docs/go-plugin-runner/3.13/
113+
Disallow: /zh/docs/go-plugin-runner/3.14/
114+
Disallow: /zh/docs/go-plugin-runner/3.15/
115+
Disallow: /zh/docs/go-plugin-runner/next/
116+
Disallow: /zh/docs/python-plugin-runner/3.10/
117+
Disallow: /zh/docs/python-plugin-runner/3.11/
118+
Disallow: /zh/docs/python-plugin-runner/3.12/
119+
Disallow: /zh/docs/python-plugin-runner/3.13/
120+
Disallow: /zh/docs/python-plugin-runner/3.14/
121+
Disallow: /zh/docs/python-plugin-runner/3.15/
122+
Disallow: /zh/docs/python-plugin-runner/next/
123+
11124
Sitemap: https://apisix.apache.org/sitemap.xml
12125

13126
Sitemap: https://apisix.apache.org/zh/sitemap.xml

0 commit comments

Comments
 (0)