Skip to content

Commit fbd98c4

Browse files
authored
feat(site): 预渲染 503 静态课程页修复 SEO 空壳 (#28)
* feat(site): 预渲染 503 静态课程页修复 SEO 空壳 + 渲染器抽至 md-render.js - 新增 writeLessonPages:构建时用与浏览器同一份渲染器把正文烤进 site/lessons/<phase>/<lesson>/index.html,每课独立 title/desc/canonical/JSON-LD - parseMd 等纯函数簇从 lesson.html 抽至 md-render.js(浏览器与 Node 共用, 预渲染产物 = 运行时渲染产物) - lesson.html boot 识别 __PRERENDERED__:跳过 fetch,setTimeout(0) 后直接 enhanceLesson + fetchQuiz(同步调用会踩 var 赋值未执行的坑) - 站内全部 lesson 链接(sidebar/上下课/时间线/cmdpalette/app/prereqs)和 sitemap/llms.txt 切到 /lessons/ 静态 URL;旧 ?path= 入口保留,canonical 指静态页 - 产物 gitignore(同 sitemap/llms 策略,Vercel buildCommand 生成);vercel.json 补 /lessons/ 缓存头 * fix(site): 落实双审意见——模板锚点 fail-fast + 导航只指已生成页 + figure 属性转义 - writeLessonPages 三个模板锚点(LOADING_BLOCK / md-render script / </head>)前置校验,任一失配即中止构建;__PRERENDERED__ 注入后逐页断言 (注入丢失的后果是 boot showError 把烤好的正文覆盖成错误页,必须拦死) - 烤进静态页的 prev/next 导航只指向 zh.md 真实存在的课,避免 /lessons/ 404 - figure 块 data-figure 属性改用 escapeAttr(含双引号会截断属性, lesson.html 旧实现同款潜在 bug,共享渲染器一并修正) - updateLessonSeo 空 path 防御(避免生成 /lessons// 坏 canonical)
1 parent bab918e commit fbd98c4

9 files changed

Lines changed: 724 additions & 446 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ mlruns/
6161
# 以免漂移或丢 URL。每次从 PHASES 重新生成。
6262
site/sitemap.xml
6363
site/llms.txt
64+
# 预渲染静态课程页(writeLessonPages 生成,503 个 index.html)——同款策略不提交
65+
site/lessons/
6466

6567
# 上游 scripts/build_catalog.py 的产物(上游也 gitignore 它)。本仓不用那套
6668
# (它认 docs/en.md),课数校验改用 site/build.js --check——忽略以防误提交。

site/app.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@
249249

250250
var actionHtml = '';
251251
if ((l.status === 'complete' || userComplete) && lessonPath) {
252-
actionHtml = '<a href="lesson.html?path=' + lessonPath + '" class="modal-lesson-read">' + (userComplete ? '回顾' : '阅读') + '</a>';
252+
actionHtml = '<a href="/lessons/' + lessonPath.replace(/^phases\//, '') + '/" class="modal-lesson-read">' + (userComplete ? '回顾' : '阅读') + '</a>';
253253
}
254254
var toggleHtml = '';
255255
if (hasProgress && lessonPath) {

site/build.js

Lines changed: 182 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,21 @@ const OUTPUT_PATH = path.join(__dirname, 'data.js');
2020
const GITHUB_BASE = 'https://github.com/fancyboi999/ai-engineering-from-scratch-zh/tree/main/';
2121
const SITE_ORIGIN = 'https://aieng-zh.cn';
2222

23+
// 与浏览器端同一份渲染器(从 lesson.html 抽出);预渲染产物 = 运行时渲染产物
24+
const mdRender = require('./md-render.js');
25+
2326
// GITHUB_BASE lesson url -> site path "phases/<phase>/<lesson>"
2427
function lessonPath(url) {
2528
if (!url) return null;
2629
const m = url.match(/(phases\/[^/]+\/[^/]+)\/?$/);
2730
return m ? m[1] : null;
2831
}
2932

33+
// 预渲染静态课程页的站内路径(与 lesson.html 的 lessonHref 同构,两边要一起改)
34+
function lessonStaticHref(relPath) {
35+
return '/lessons/' + relPath.replace(/^phases\//, '') + '/';
36+
}
37+
3038
// ─── Parse ROADMAP.md for lesson statuses ────────────────────────────
3139
function parseRoadmap(content) {
3240
const statuses = {}; // { "Phase 0": { phaseStatus, lessons: { "Dev Environment": "complete" } } }
@@ -463,6 +471,7 @@ const ARTIFACTS = ${JSON.stringify(artifacts, null, 2)};
463471
syncCounts(totalLessons, artifacts.length);
464472
writeSitemap(phases, glossaryTerms.length);
465473
writeLlms(phases, glossaryTerms.length, artifacts.length);
474+
writeLessonPages(phases); // 必须在 syncCounts 之后:模板里的课数文案要先对齐
466475
}
467476

468477
// ─── sitemap.xml:从站点渲染的同一份 PHASES 生成 ─────────────────────
@@ -478,7 +487,7 @@ function writeSitemap(phases, glossaryCount) {
478487
for (const phase of phases) {
479488
for (const l of phase.lessons) {
480489
const p = lessonPath(l.url);
481-
if (p) urls.push({ loc: '/lesson.html?path=' + p, priority: '0.6', freq: 'monthly' });
490+
if (p) urls.push({ loc: lessonStaticHref(p), priority: '0.6', freq: 'monthly' });
482491
}
483492
}
484493
const body = urls.map(u =>
@@ -509,7 +518,7 @@ function writeLlms(phases, glossaryCount, artifactCount) {
509518
const p = lessonPath(l.url);
510519
if (!p) continue;
511520
const note = l.summary ? ` — ${l.summary}` : '';
512-
out += `- [${l.name}](${SITE_ORIGIN}/lesson.html?path=${p})${note}\n`;
521+
out += `- [${l.name}](${SITE_ORIGIN}${lessonStaticHref(p)})${note}\n`;
513522
}
514523
out += `\n`;
515524
}
@@ -521,6 +530,177 @@ function writeLlms(phases, glossaryCount, artifactCount) {
521530
console.log(` wrote llms.txt`);
522531
}
523532

533+
// ─── 预渲染静态课程页:site/lessons/<phase>/<lesson>/index.html ──────
534+
// 旧 lesson.html?path= 入口的正文靠浏览器运行时 fetch 渲染,爬虫拿到的是
535+
// 503 个字节级相同的空壳(百度不执行跨域 fetch,Google 渲染预算对新站极少)。
536+
// 这里用与浏览器同一份渲染器(md-render.js)在构建时把正文烤进 HTML:
537+
// 每课独立 URL + 独立 title/description/canonical/JSON-LD。产物 gitignore,
538+
// 与 sitemap/llms 同款策略——Vercel buildCommand 部署时生成。
539+
// 旧 ?path= URL 保持可用(兼容外部旧链接),其 canonical 指向这些静态页。
540+
function bottomNavHtml(flat, idx) {
541+
// 与 lesson.html 客户端 addBottomNav 同构:跳过不可读课程找前后邻居
542+
let prev = null, next = null;
543+
for (let pi = idx - 1; pi >= 0; pi--) {
544+
if (flat[pi].isReadable) { prev = flat[pi]; break; }
545+
}
546+
for (let ni = idx + 1; ni < flat.length; ni++) {
547+
if (flat[ni].isReadable) { next = flat[ni]; break; }
548+
}
549+
let nav = '<div class="lesson-nav-bottom">';
550+
if (prev) {
551+
nav += '<a class="lesson-nav-btn prev" href="' + lessonStaticHref(prev.rel) + '">';
552+
nav += '<span class="nav-label">&larr; 上一节</span>';
553+
nav += '<span class="nav-title">' + mdRender.escapeHtml(prev.name) + '</span>';
554+
nav += '</a>';
555+
} else {
556+
nav += '<div></div>';
557+
}
558+
if (next) {
559+
nav += '<a class="lesson-nav-btn next" href="' + lessonStaticHref(next.rel) + '">';
560+
nav += '<span class="nav-label">下一节 &rarr;</span>';
561+
nav += '<span class="nav-title">' + mdRender.escapeHtml(next.name) + '</span>';
562+
nav += '</a>';
563+
}
564+
nav += '</div>';
565+
return nav;
566+
}
567+
568+
function lessonJsonLd(title, desc, url) {
569+
// 与 lesson.html 客户端 updateLessonSeo 的 JSON-LD 同构
570+
const ld = {
571+
'@context': 'https://schema.org',
572+
'@graph': [
573+
{ '@type': 'LearningResource', name: title, description: desc, url: url,
574+
inLanguage: 'zh-CN', isAccessibleForFree: true,
575+
isPartOf: { '@type': 'Course', name: 'AI Engineering from Scratch 简体中文版', url: SITE_ORIGIN } },
576+
{ '@type': 'BreadcrumbList', itemListElement: [
577+
{ '@type': 'ListItem', position: 1, name: '首页', item: SITE_ORIGIN },
578+
{ '@type': 'ListItem', position: 2, name: '课程表', item: SITE_ORIGIN + '/catalog.html' },
579+
{ '@type': 'ListItem', position: 3, name: title, item: url }
580+
] }
581+
]
582+
};
583+
// < 防止 title/desc 含 </script> 提前闭合标签
584+
return JSON.stringify(ld).replace(/</g, '\\u003c');
585+
}
586+
587+
function writeLessonPages(phases) {
588+
const template = fs.readFileSync(path.join(__dirname, 'lesson.html'), 'utf8');
589+
const lessonsRoot = path.join(__dirname, 'lessons');
590+
fs.rmSync(lessonsRoot, { recursive: true, force: true }); // 清掉改名课程的孤儿页
591+
592+
// 模板相对路径 → 根绝对路径(生成页在两级子目录下,相对引用会断)。
593+
// 只改写以字母/数字开头的相对地址;协议地址、/、#、data: 和 JS 拼接串不动。
594+
const absolutized = template.replace(
595+
/\b(href|src)="(?!(?:[a-z][a-z0-9+.-]*:|\/\/|\/|#))([A-Za-z0-9_][^"]*)"/g,
596+
'$1="/$2"'
597+
);
598+
599+
const LOADING_BLOCK =
600+
' <div class="lesson-content" id="lessonContent">\n' +
601+
' <div class="lesson-loading" id="lessonLoading">\n' +
602+
' <div class="spinner"></div>\n' +
603+
' <div class="lesson-loading-text">课程加载中...</div>\n' +
604+
' </div>\n' +
605+
' </div>';
606+
// __PRERENDERED__ 注入锚点。注入丢失的后果不是降级而是更糟:boot 读不到
607+
// path 会 showError 把烤好的正文覆盖成错误页——所以锚点必须 fail-fast。
608+
const SCRIPT_ANCHOR = ' <script src="/md-render.js';
609+
// 模板锚点前置校验:任何一个没命中都中止构建,绝不静默生成坏页(审查发现:
610+
// string.replace 匹配不到时原样返回不报错,503 页会齐刷刷坏掉且无感知)
611+
const anchorErrors = [];
612+
if (absolutized.indexOf(LOADING_BLOCK) === -1) anchorErrors.push('加载占位块(lessonContent/spinner)');
613+
if (absolutized.indexOf(SCRIPT_ANCHOR) === -1) anchorErrors.push('md-render.js script 标签(__PRERENDERED__ 注入点)');
614+
if (absolutized.split('</head>').length - 1 !== 1) anchorErrors.push('</head> 不是恰好 1 处(JSON-LD 注入点)');
615+
if (anchorErrors.length) {
616+
console.error('❌ writeLessonPages:lesson.html 模板锚点失配(模板结构变了?),预渲染中止:');
617+
anchorErrors.forEach(e => console.error(' - ' + e));
618+
process.exit(1);
619+
}
620+
621+
// 与 lesson.html 客户端 flatLessons 同序的扁平课表(prev/next 导航用)。
622+
// isReadable 比客户端(status complete || url)更严:只认「zh.md 真实存在 =
623+
// 静态页真的会生成」的课。登记了 README 但还没翻译的课如果进了导航,
624+
// 烤出来的链接就是 /lessons/ 404(审查发现)。
625+
const flat = [];
626+
for (const phase of phases) {
627+
for (const l of phase.lessons) {
628+
const rel = lessonPath(l.url);
629+
const hasDoc = !!rel && fs.existsSync(path.join(REPO_ROOT, rel, 'docs', 'zh.md'));
630+
flat.push({ name: l.name, rel, isReadable: hasDoc });
631+
}
632+
}
633+
634+
let written = 0;
635+
const skipped = [];
636+
for (let i = 0; i < flat.length; i++) {
637+
const f = flat[i];
638+
if (!f.rel) continue;
639+
let md;
640+
try {
641+
md = fs.readFileSync(path.join(REPO_ROOT, f.rel, 'docs', 'zh.md'), 'utf8');
642+
} catch (_) {
643+
skipped.push(f.rel); // 登记了 README 但还没有 zh.md 的课
644+
continue;
645+
}
646+
647+
const title = mdRender.extractTitle(md);
648+
const desc = mdRender.lessonDescription(md);
649+
const url = SITE_ORIGIN + lessonStaticHref(f.rel);
650+
const docTitle = title + ' - AI Engineering from Scratch';
651+
const ogTitle = title + ' · AI Engineering from Scratch 简体中文版';
652+
653+
// 正文结构与客户端 renderLesson 完全同构:parseMd + AI 面板占位 + 上下课导航
654+
let body = mdRender.parseMd(md);
655+
body += '<div class="ai-panels" id="aiPanels"></div>';
656+
body += bottomNavHtml(flat, i);
657+
const article =
658+
' <div class="lesson-content" id="lessonContent"><article class="lesson-article">' +
659+
body + '</article></div>';
660+
661+
let page = absolutized;
662+
page = page.replace('<html lang="en"', '<html lang="zh-CN"');
663+
page = page.replace(/<title>[^<]*<\/title>/, () => '<title>' + mdRender.escapeHtml(docTitle) + '</title>');
664+
const setMeta = (attr, name, value) => {
665+
const re = new RegExp('(<meta ' + attr + '="' + name + '" content=")[^"]*(")');
666+
page = page.replace(re, (m, p1, p2) => p1 + mdRender.escapeAttr(value) + p2);
667+
};
668+
setMeta('name', 'description', desc);
669+
setMeta('property', 'og:title', ogTitle);
670+
setMeta('property', 'og:description', desc);
671+
setMeta('name', 'twitter:title', ogTitle);
672+
setMeta('name', 'twitter:description', desc);
673+
page = page.replace(
674+
/<link rel="canonical" href="[^"]*">/,
675+
() => '<link rel="canonical" href="' + url + '">\n <meta property="og:url" content="' + url + '">'
676+
);
677+
page = page.replace('</head>', () =>
678+
' <script type="application/ld+json" id="lessonJsonLd">' + lessonJsonLd(title, desc, url) + '</script>\n</head>');
679+
page = page.replace(LOADING_BLOCK, () => article);
680+
// 注入预渲染标记:boot 据此跳过运行时 fetch,直接走 enhanceLesson + fetchQuiz
681+
page = page.replace(
682+
SCRIPT_ANCHOR,
683+
' <script>window.__PRERENDERED__ = {path: ' + JSON.stringify(f.rel) + '};</script>\n' + SCRIPT_ANCHOR
684+
);
685+
// 注意:不能只查 '__PRERENDERED__'——内联 boot 脚本本身就含这个标识符
686+
if (page.indexOf('<script>window.__PRERENDERED__ = {path:') === -1) {
687+
console.error('❌ writeLessonPages:' + f.rel + ' 的 __PRERENDERED__ 注入失败,中止');
688+
process.exit(1);
689+
}
690+
691+
const outDir = path.join(lessonsRoot, f.rel.replace(/^phases\//, ''));
692+
fs.mkdirSync(outDir, { recursive: true });
693+
fs.writeFileSync(path.join(outDir, 'index.html'), page, 'utf8');
694+
written++;
695+
}
696+
697+
console.log(` wrote ${written} prerendered lesson pages under site/lessons/`);
698+
if (skipped.length) {
699+
console.warn(`⚠️ ${skipped.length} 课登记了但没有 docs/zh.md,未生成静态页:`);
700+
skipped.forEach(s => console.warn(' - ' + s));
701+
}
702+
}
703+
524704
// ─── 自动同步站点文案里的课程数 / 产出数(单一真相 = 本次构建)─────
525705
// 每次同步新课只需在 README 表格补行 + 跑 build,站点这些散落的数字
526706
// 会自动对齐,不必手动逐个改(435 漂了好几个月、489 项产出过时都因此)。

site/cmdpalette.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,13 +379,13 @@
379379
if (r.kind === 'lesson') {
380380
// Prefer the in-site reader; fall back to GitHub URL
381381
dest = r.lessonPath
382-
? 'lesson.html?path=' + encodeURIComponent(r.lessonPath)
382+
? '/lessons/' + r.lessonPath.replace(/^phases\//, '') + '/'
383383
: r.url;
384384
chip = '阶段 ' + String(r.phaseId).padStart(2, '0');
385385
} else if (r.kind === 'artifact') {
386386
// Jump to the lesson that produced this artifact
387387
dest = r.lessonPath
388-
? 'lesson.html?path=' + encodeURIComponent(r.lessonPath)
388+
? '/lessons/' + r.lessonPath.replace(/^phases\//, '') + '/'
389389
: ('https://github.com/fancyboi999/ai-engineering-from-scratch-zh/tree/main/' + r.file);
390390
var ak = (r.artKind || 'artifact');
391391
chip = artifactKindLabel(ak);

site/data.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// Auto-generated by build.js — do not edit manually.
2-
// Last built: 2026-06-12T02:01:31.260Z
2+
// Last built: 2026-06-12T06:45:00.023Z
33

44
const PHASES = [
55
{

0 commit comments

Comments
 (0)