Skip to content

Commit 1031c06

Browse files
authored
feat: improved sitemap (#3579)
* feat: extended sitemap functionality * docs: del samples * docs: readme * feat: new sitemap * feat: createLinkInHead removed * docs: updated changeset text * refactor: 'zod' function() instead of self made refine() * Revert "refactor: 'zod' function() instead of self made refine()" This reverts commit 036bac7. undo function()
1 parent 44ba4e1 commit 1031c06

15 files changed

Lines changed: 607 additions & 76 deletions

File tree

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
---
2+
'@astrojs/sitemap': minor
3+
---
4+
5+
# Key features
6+
7+
- Split up your large sitemap into multiple sitemaps by custom limit.
8+
- Ability to add sitemap specific attributes such as `lastmod` etc.
9+
- Final output customization via JS function.
10+
- Localization support.
11+
- Reliability: all config options are validated.
12+
13+
## Important changes
14+
15+
The integration always generates at least two files instead of one:
16+
17+
- `sitemap-index.xml` - index file;
18+
- `sitemap-{i}.xml` - actual sitemap.

examples/integrations-playground/astro.config.mjs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ import solid from '@astrojs/solid-js';
99

1010
// https://astro.build/config
1111
export default defineConfig({
12+
site: 'https://example.com',
1213
integrations: [lit(), react(), tailwind(), turbolinks(), partytown(), sitemap(), solid()],
1314
});

packages/integrations/sitemap/README.md

Lines changed: 182 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,35 @@ export default {
6464
}
6565
```
6666

67-
Now, [build your site for production](https://docs.astro.build/en/reference/cli-reference/#astro-build) via the `astro build` command. You should find your sitemap under `dist/sitemap.xml`!
67+
Now, [build your site for production](https://docs.astro.build/en/reference/cli-reference/#astro-build) via the `astro build` command. You should find your _sitemap_ under `dist/sitemap-index.xml` and `dist/sitemap-0.xml`!
68+
69+
Generated sitemap content for two pages website:
70+
71+
**sitemap-index.xml**
72+
73+
```xml
74+
<?xml version="1.0" encoding="UTF-8"?>
75+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
76+
<sitemap>
77+
<loc>https://stargazers.club/sitemap-0.xml</loc>
78+
</sitemap>
79+
</sitemapindex>
80+
```
81+
82+
**sitemap-0.xml**
83+
<?xml version="1.0" encoding="UTF-8"?>
84+
85+
```xml
86+
<?xml version="1.0" encoding="UTF-8"?>
87+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
88+
<url>
89+
<loc>https://stargazers.club/</loc>
90+
</url>
91+
<url>
92+
<loc>https://stargazers.club/second-page/</loc>
93+
</url>
94+
</urlset>
95+
```
6896

6997
You can also check our [Astro Integration Documentation][astro-integration] for more on integrations.
7098

@@ -111,5 +139,158 @@ export default {
111139
}
112140
```
113141

142+
### entryLimit
143+
144+
Non-negative `Number` of entries per sitemap file. Default value is 45000. A sitemap index and multiple sitemaps are created if you have more entries. See explanation on [Google](https://developers.google.com/search/docs/advanced/sitemaps/large-sitemaps).
145+
146+
__astro.config.mjs__
147+
148+
```js
149+
import sitemap from '@astrojs/sitemap';
150+
151+
export default {
152+
site: 'https://stargazers.club',
153+
integrations: [
154+
sitemap({
155+
entryLimit: 10000,
156+
}),
157+
],
158+
}
159+
```
160+
161+
### changefreq, lastmod, priority
162+
163+
`changefreq` - How frequently the page is likely to change. Available values: `always` \| `hourly` \| `daily` \| `weekly` \| `monthly` \| `yearly` \| `never`.
164+
165+
`priority` - The priority of this URL relative to other URLs on your site. Valid values range from 0.0 to 1.0.
166+
167+
`lastmod` - The date of page last modification.
168+
169+
`changefreq` and `priority` are ignored by Google.
170+
171+
See detailed explanation of sitemap specific options on [sitemap.org](https://www.sitemaps.org/protocol.html).
172+
173+
174+
:exclamation: This integration uses 'astro:build:done' hook. The hook exposes generated page paths only. So with present version of Astro the integration has no abilities to analyze a page source, frontmatter etc. The integration can add `changefreq`, `lastmod` and `priority` attributes only in a batch or nothing.
175+
176+
__astro.config.mjs__
177+
178+
```js
179+
import sitemap from '@astrojs/sitemap';
180+
181+
export default {
182+
site: 'https://stargazers.club',
183+
integrations: [
184+
sitemap({
185+
changefreq: 'weekly',
186+
priority: 0.7,
187+
lastmod: new Date('2022-02-24'),
188+
}),
189+
],
190+
}
191+
```
192+
193+
### serialize
194+
195+
Async or sync function called for each sitemap entry just before writing to a disk.
196+
197+
It receives as parameter `SitemapItem` object which consists of `url` (required, absolute page URL) and optional `changefreq`, `lastmod`, `priority` and `links` properties.
198+
199+
Optional `links` property contains a `LinkItem` list of alternate pages including a parent page.
200+
`LinkItem` type has two required fields: `url` (the fully-qualified URL for the version of this page for the specified language) and `hreflang` (a supported language code targeted by this version of the page).
201+
202+
`serialize` function should return `SitemapItem`, touched or not.
203+
204+
The example below shows the ability to add the sitemap specific properties individually.
205+
206+
__astro.config.mjs__
207+
208+
```js
209+
import sitemap from '@astrojs/sitemap';
210+
211+
export default {
212+
site: 'https://stargazers.club',
213+
integrations: [
214+
sitemap({
215+
serialize(item) {
216+
if (/your-special-page/.test(item.url)) {
217+
item.changefreq = 'daily';
218+
item.lastmod = new Date();
219+
item.priority = 0.9;
220+
}
221+
return item;
222+
},
223+
}),
224+
],
225+
}
226+
```
227+
228+
### i18n
229+
230+
To localize a sitemap you should supply the integration config with the `i18n` option. The integration will check generated page paths on presence of locale keys in paths.
231+
232+
`i18n` object has two required properties:
233+
234+
- `defaultLocale`: `String`. Its value must exist as one of `locales` keys.
235+
- `locales`: `Record<String, String>`, key/value - pairs. The key is used to look for a locale part in a page path. The value is a language attribute, only English alphabet and hyphen allowed. See more about language attribute on [MDN](https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/lang).
236+
237+
238+
Read more about localization on Google in [Advanced SEO](https://developers.google.com/search/docs/advanced/crawling/localized-versions#all-method-guidelines).
239+
240+
__astro.config.mjs__
241+
242+
```js
243+
import sitemap from '@astrojs/sitemap';
244+
245+
export default {
246+
site: 'https://stargazers.club',
247+
integrations: [
248+
sitemap({
249+
i18n: {
250+
defaultLocale: 'en', // All urls that don't contain `es` or `fr` after `https://stargazers.club/` will be treated as default locale, i.e. `en`
251+
locales: {
252+
en: 'en-US', // The `defaultLocale` value must present in `locales` keys
253+
es: 'es-ES',
254+
fr: 'fr-CA',
255+
},
256+
},
257+
}),
258+
],
259+
};
260+
...
261+
262+
```
263+
264+
The sitemap content will be:
265+
266+
```xml
267+
...
268+
<url>
269+
<loc>https://stargazers.club/</loc>
270+
<xhtml:link rel="alternate" hreflang="en-US" href="https://stargazers.club/"/>
271+
<xhtml:link rel="alternate" hreflang="es-ES" href="https://stargazers.club/es/"/>
272+
<xhtml:link rel="alternate" hreflang="fr-CA" href="https://stargazers.club/fr/"/>
273+
</url>
274+
<url>
275+
<loc>https://stargazers.club/es/</loc>
276+
<xhtml:link rel="alternate" hreflang="en-US" href="https://stargazers.club/"/>
277+
<xhtml:link rel="alternate" hreflang="es-ES" href="https://stargazers.club/es/"/>
278+
<xhtml:link rel="alternate" hreflang="fr-CA" href="https://stargazers.club/fr/"/>
279+
</url>
280+
<url>
281+
<loc>https://stargazers.club/fr/</loc>
282+
<xhtml:link rel="alternate" hreflang="en-US" href="https://stargazers.club/"/>
283+
<xhtml:link rel="alternate" hreflang="es-ES" href="https://stargazers.club/es/"/>
284+
<xhtml:link rel="alternate" hreflang="fr-CA" href="https://stargazers.club/fr/"/>
285+
</url>
286+
<url>
287+
<loc>https://stargazers.club/es/second-page/</loc>
288+
<xhtml:link rel="alternate" hreflang="es-ES" href="https://stargazers.club/es/second-page/"/>
289+
<xhtml:link rel="alternate" hreflang="fr-CA" href="https://stargazers.club/fr/second-page/"/>
290+
<xhtml:link rel="alternate" hreflang="en-US" href="https://stargazers.club/second-page/"/>
291+
</url>
292+
...
293+
```
294+
114295
[astro-integration]: https://docs.astro.build/en/guides/integrations-guide/
115296
[astro-ui-frameworks]: https://docs.astro.build/en/core-concepts/framework-components/#using-framework-components

packages/integrations/sitemap/package.json

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,27 @@
1313
},
1414
"keywords": [
1515
"astro-component",
16-
"seo"
16+
"seo",
17+
"sitemap"
1718
],
1819
"bugs": "https://github.com/withastro/astro/issues",
1920
"homepage": "https://astro.build",
2021
"exports": {
2122
".": "./dist/index.js",
2223
"./package.json": "./package.json"
2324
},
25+
"files": [
26+
"dist"
27+
],
2428
"scripts": {
2529
"build": "astro-scripts build \"src/**/*.ts\" && tsc",
2630
"build:ci": "astro-scripts build \"src/**/*.ts\"",
2731
"dev": "astro-scripts dev \"src/**/*.ts\""
2832
},
29-
"dependencies": {},
33+
"dependencies": {
34+
"sitemap": "^7.1.1",
35+
"zod": "^3.17.3"
36+
},
3037
"devDependencies": {
3138
"astro": "workspace:*",
3239
"astro-scripts": "workspace:*"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import type { SitemapOptions } from './index';
2+
3+
export const SITEMAP_CONFIG_DEFAULTS: SitemapOptions & any = {
4+
entryLimit: 45000,
5+
};
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
export const changefreqValues = [
2+
'always',
3+
'hourly',
4+
'daily',
5+
'weekly',
6+
'monthly',
7+
'yearly',
8+
'never',
9+
] as const;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import { SitemapItemLoose } from 'sitemap';
2+
3+
import type { SitemapOptions } from './index';
4+
import { parseUrl } from './utils/parse-url';
5+
6+
const STATUS_CODE_PAGE_REGEXP = /\/[0-9]{3}\/?$/;
7+
8+
/** Construct sitemap.xml given a set of URLs */
9+
export function generateSitemap(pages: string[], finalSiteUrl: string, opts: SitemapOptions) {
10+
const { changefreq, priority: prioritySrc, lastmod: lastmodSrc, i18n } = opts || {};
11+
// TODO: find way to respect <link rel="canonical"> URLs here
12+
const urls = [...pages].filter((url) => !STATUS_CODE_PAGE_REGEXP.test(url));
13+
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time
14+
15+
const lastmod = lastmodSrc?.toISOString();
16+
const priority = typeof prioritySrc === 'number' ? prioritySrc : undefined;
17+
18+
const { locales, defaultLocale } = i18n || {};
19+
const localeCodes = Object.keys(locales || {});
20+
21+
const getPath = (url: string) => {
22+
const result = parseUrl(url, i18n?.defaultLocale || '', localeCodes, finalSiteUrl);
23+
return result?.path;
24+
};
25+
const getLocale = (url: string) => {
26+
const result = parseUrl(url, i18n?.defaultLocale || '', localeCodes, finalSiteUrl);
27+
return result?.locale;
28+
};
29+
30+
const urlData = urls.map((url) => {
31+
let links;
32+
if (defaultLocale && locales) {
33+
const currentPath = getPath(url);
34+
if (currentPath) {
35+
const filtered = urls.filter((subUrl) => getPath(subUrl) === currentPath);
36+
if (filtered.length > 1) {
37+
links = filtered.map((subUrl) => ({
38+
url: subUrl,
39+
lang: locales[getLocale(subUrl)!],
40+
}));
41+
}
42+
}
43+
}
44+
45+
return {
46+
url,
47+
links,
48+
lastmod,
49+
priority,
50+
changefreq, // : changefreq as EnumChangefreq,
51+
} as SitemapItemLoose;
52+
});
53+
54+
return urlData;
55+
}

0 commit comments

Comments
 (0)