Skip to content

Commit 8e890cf

Browse files
authored
fix: unwrap consecutive 60330...60330 blocks merged into a single paragraph by md4c (#151)
1 parent b6168a8 commit 8e890cf

1 file changed

Lines changed: 34 additions & 7 deletions

File tree

cpp/parser/MD4CParser.cpp

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -395,16 +395,43 @@ std::shared_ptr<MarkdownASTNode> MD4CParser::parse(const std::string &markdown,
395395

396396
impl_->flushText();
397397

398-
// md4c emits $$...$$ as an inline span inside a Paragraph — promote to block-level
399-
// so the segment splitter treats it as a standalone math segment.
398+
// md4c wraps certain block-level constructs as inline spans inside a Paragraph.
399+
// When they appear on consecutive lines without a blank separator, md4c merges
400+
// them into a single Paragraph with LineBreak / whitespace Text nodes between them.
401+
// Detect and unwrap these, promoting each block node to a top-level sibling.
402+
// Example: consecutive $$...$$ (LatexMathDisplay) on adjacent lines.
400403
if (impl_->root) {
404+
auto shouldPromote = [](const MarkdownASTNode &n) { return n.type == NodeType::LatexMathDisplay; };
405+
auto isSeparator = [](const MarkdownASTNode &n) {
406+
return n.type == NodeType::LineBreak ||
407+
(n.type == NodeType::Text && n.content.find_first_not_of(" \t\n\r") == std::string::npos);
408+
};
409+
401410
auto &children = impl_->root->children;
402-
for (size_t i = 0; i < children.size(); ++i) {
403-
auto &child = children[i];
404-
if (child->type == NodeType::Paragraph && child->children.size() == 1 &&
405-
child->children[0]->type == NodeType::LatexMathDisplay) {
406-
children[i] = child->children[0];
411+
for (size_t i = 0; i < children.size();) {
412+
auto &para = children[i];
413+
if (para->type != NodeType::Paragraph || para->children.empty()) {
414+
++i;
415+
continue;
416+
}
417+
418+
std::vector<std::shared_ptr<MarkdownASTNode>> promoted;
419+
for (auto &pc : para->children) {
420+
if (shouldPromote(*pc))
421+
promoted.push_back(pc);
422+
else if (!isSeparator(*pc)) {
423+
promoted.clear();
424+
break;
425+
}
426+
}
427+
428+
if (promoted.empty()) {
429+
++i;
430+
continue;
407431
}
432+
auto pos = children.erase(children.begin() + static_cast<ptrdiff_t>(i));
433+
children.insert(pos, promoted.begin(), promoted.end());
434+
i += promoted.size();
408435
}
409436
}
410437

0 commit comments

Comments
 (0)