Skip to content

Commit cc103aa

Browse files
committed
feat(markdown): add Telegram MarkdownV2 support and fallbacks
Improve cross-platform markdown compatibility by adding Telegram-specific escaping and graceful fallbacks for unsupported tags while refining CommonMark text handling; includes docs/tests updates and bumps package version to 5.3.0.
1 parent 229f995 commit cc103aa

23 files changed

Lines changed: 367 additions & 11 deletions

File tree

README.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ If you have used and benefitted from this library. Please feel free to sponsor m
1717
**Markdown flavors**
1818
- GitHub Flavoured Markdown conversion for br, pre, tasklists, and table. Use `var config = new ReverseMarkdown.Config(githubFlavoured:true);`. By default the table will always be converted to Github flavored markdown immaterial of this flag
1919
- Slack Flavoured Markdown conversion. Use `var config = new ReverseMarkdown.Config { SlackFlavored = true };`
20+
- Telegram MarkdownV2 conversion. Use `var config = new ReverseMarkdown.Config { TelegramMarkdownV2 = true };`
2021
- CommonMark-focused output with opt-in flags to preserve compatibility. Use `var config = new ReverseMarkdown.Config { CommonMark = true };` This mode may emit inline HTML for tricky emphasis/link cases unless you disable `CommonMarkUseHtmlInlineTags`.
2122

2223
**Tables**
@@ -101,18 +102,42 @@ If you need to preserve markdown-like text as literal content (for example `# He
101102
```cs
102103
var config = new ReverseMarkdown.Config
103104
{
104-
EscapeMarkdownLineStarts = true
105+
EscapeMarkdownLineStarts = true,
105106
// or CommonMark = true
106107
};
107108

108109
var converter = new ReverseMarkdown.Converter(config);
109110
```
110111

112+
### Telegram MarkdownV2 mode
113+
114+
When `TelegramMarkdownV2` is enabled, ReverseMarkdown applies Telegram-compatible formatting and escaping rules:
115+
116+
```cs
117+
var converter = new ReverseMarkdown.Converter(new ReverseMarkdown.Config
118+
{
119+
TelegramMarkdownV2 = true
120+
});
121+
122+
var html = "This is <strong>bold</strong>, <em>italic</em>, <del>strikethrough</del> and <a href=\"https://example.com/path_(one)?q=1)2\">a_b[c]</a>";
123+
var result = converter.Convert(html);
124+
// This is *bold*, _italic_, ~strikethrough~ and [a\_b\[c\]](https://example.com/path_(one\)?q=1\)2)
125+
```
126+
127+
Notes:
128+
129+
- Text and link labels escape Telegram-reserved characters.
130+
- Ordered and unordered list markers are escaped (`1\.` and `\-`).
131+
- `<img>` falls back to a link label (for example `[Image: alt](url)`).
132+
- `<table>` falls back to a preformatted code block representation.
133+
- `<sup>` falls back to caret notation (for example `x^2`).
134+
111135
## Configuration options
112136

113137
* `DefaultCodeBlockLanguage` - Option to set the default code block language for Github style markdown if class based language markers are not available
114138
* `GithubFlavored` - Github style markdown for br, pre and table. Default is false
115139
* `SlackFlavored` - Slack style markdown formatting. When enabled, uses `*` for bold, `_` for italic, `~` for strikethrough, and `` for list bullets. Default is false
140+
* `TelegramMarkdownV2` - Telegram MarkdownV2 formatting and escaping rules. When enabled, output escapes Telegram-reserved characters and uses Telegram-compatible emphasis and link syntax. For unsupported Telegram constructs, ReverseMarkdown falls back to readable text (`<img>` to link label, `<table>` to preformatted block, `<sup>` to caret notation).
116141
* `CommonMark` - Enable CommonMark-focused output rules. Default is false
117142
* `CommonMarkUseHtmlInlineTags` - When CommonMark is enabled, emit HTML for inline tags (`em`, `strong`, `a`, `img`) to avoid delimiter edge cases. Default is true
118143
* `CommonMarkIntrawordEmphasisSpacing` - When CommonMark is enabled, insert spaces to avoid intraword emphasis. Default is false

dotnet

Whitespace-only changes.

src/ReverseMarkdown.Test/ConverterTests.cs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,82 @@ public void SlackFlavored_Unsupported_Hr()
647647
Assert.Throws<SlackUnsupportedTagException>(() => converter.Convert(html));
648648
}
649649

650+
[Fact]
651+
public void TelegramMarkdownV2_BasicFormatting()
652+
{
653+
var html = "This is <strong>bold</strong>, <em>italic</em>, <del>strikethrough</del> and <a href=\"https://example.com\">a link</a>";
654+
var converter = new Converter(new Config { TelegramMarkdownV2 = true });
655+
656+
var result = converter.Convert(html);
657+
658+
Assert.Equal("This is *bold*, _italic_, ~strikethrough~ and [a link](https://example.com)", result);
659+
}
660+
661+
[Fact]
662+
public void TelegramMarkdownV2_EscapeSpecialCharactersInText()
663+
{
664+
var html = "<p>Special _ * [ ] ( ) ~ ` > # + - = | { } . ! \\</p>";
665+
var converter = new Converter(new Config { TelegramMarkdownV2 = true });
666+
667+
var result = converter.Convert(html);
668+
669+
Assert.Equal("Special \\_ \\* \\[ \\] \\( \\) \\~ \\` \\> \\# \\+ \\- \\= \\| \\{ \\} \\. \\! \\\\", result);
670+
}
671+
672+
[Fact]
673+
public void TelegramMarkdownV2_EscapeLinkTextAndHref()
674+
{
675+
var html = "<a href=\"https://example.com/path_(one)?q=1)2\">a_b[c]</a>";
676+
var converter = new Converter(new Config { TelegramMarkdownV2 = true });
677+
678+
var result = converter.Convert(html);
679+
680+
Assert.Equal("[a\\_b\\[c\\]](https://example.com/path_(one\\)?q=1\\)2)", result);
681+
}
682+
683+
[Fact]
684+
public void TelegramMarkdownV2_EscapesListMarkers()
685+
{
686+
var html = "<ul><li>Item 1</li></ul><ol><li>Item 2</li></ol>";
687+
var converter = new Converter(new Config { TelegramMarkdownV2 = true });
688+
689+
var result = converter.Convert(html);
690+
691+
Assert.Contains("\\- Item 1", result);
692+
Assert.Contains("1\\. Item 2", result);
693+
}
694+
695+
[Fact]
696+
public void TelegramMarkdownV2_Img_FallsBackToLink()
697+
{
698+
var converter = new Converter(new Config { TelegramMarkdownV2 = true });
699+
700+
var result = converter.Convert("<img src=\"https://example.com/test.png\" />");
701+
702+
Assert.Equal("[Image](https://example.com/test.png)", result);
703+
}
704+
705+
[Fact]
706+
public void TelegramMarkdownV2_Sup_FallsBackToCaretNotation()
707+
{
708+
var converter = new Converter(new Config { TelegramMarkdownV2 = true });
709+
710+
var result = converter.Convert("x<sup>2</sup>");
711+
712+
Assert.Equal("x^2", result);
713+
}
714+
715+
[Fact]
716+
public void TelegramMarkdownV2_Table_FallsBackToCodeBlock()
717+
{
718+
var converter = new Converter(new Config { TelegramMarkdownV2 = true });
719+
720+
var result = converter.Convert("<table><tr><td>value</td></tr></table>");
721+
722+
Assert.Contains("```", result);
723+
Assert.Contains("value", result);
724+
}
725+
650726
[Fact]
651727
public void SlackFlavored_Unsupported_Img()
652728
{
@@ -808,6 +884,7 @@ private static Config BuildConfig(CaseData testCase)
808884

809885
ApplyBool(overrides.GithubFlavored, value => config.GithubFlavored = value);
810886
ApplyBool(overrides.SlackFlavored, value => config.SlackFlavored = value);
887+
ApplyBool(overrides.TelegramMarkdownV2, value => config.TelegramMarkdownV2 = value);
811888
ApplyBool(overrides.CommonMark, value => config.CommonMark = value);
812889
ApplyBool(overrides.CommonMarkIntrawordEmphasisSpacing,
813890
value => config.CommonMarkIntrawordEmphasisSpacing = value);
@@ -924,6 +1001,7 @@ public class CaseConfig
9241001
{
9251002
public bool? GithubFlavored { get; set; }
9261003
public bool? SlackFlavored { get; set; }
1004+
public bool? TelegramMarkdownV2 { get; set; }
9271005
public bool? CommonMark { get; set; }
9281006
public bool? CommonMarkIntrawordEmphasisSpacing { get; set; }
9291007
public bool? CommonMarkUseHtmlInlineTags { get; set; }

src/ReverseMarkdown/Config.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ public class Config
1111

1212
public bool SlackFlavored { get; set; } = false;
1313

14+
/// <summary>
15+
/// Telegram MarkdownV2 conversion.
16+
/// </summary>
17+
public bool TelegramMarkdownV2 { get; set; } = false;
18+
1419
/// <summary>
1520
/// Enable CommonMark compatible emphasis handling (avoid intraword emphasis by inserting spaces).
1621
/// </summary>

src/ReverseMarkdown/Converters/A.cs

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,20 @@ public A(Converter converter) : base(converter)
2121
public override void Convert(TextWriter writer, HtmlNode node)
2222
{
2323
var isCommonMark = Converter.Config.CommonMark;
24+
var isTelegram = Converter.Config.TelegramMarkdownV2;
2425
var name = TreatChildrenAsString(node);
25-
if (!isCommonMark) {
26+
if (!isCommonMark && !isTelegram) {
2627
name = name.Trim();
2728
}
28-
else {
29+
else if (isCommonMark) {
2930
name = name.ReplaceLineEndings("&#10;");
3031
}
3132

33+
if (isTelegram) {
34+
ConvertTelegramMarkdownV2(writer, node, name);
35+
return;
36+
}
37+
3238
if (isCommonMark &&
3339
node.FirstChild?.NodeType == HtmlNodeType.Text &&
3440
(node.InnerText.Contains("\\", StringComparison.Ordinal) ||
@@ -200,6 +206,45 @@ public override void Convert(TextWriter writer, HtmlNode node)
200206
}
201207
}
202208

209+
private void ConvertTelegramMarkdownV2(TextWriter writer, HtmlNode node, string name)
210+
{
211+
var href = node.GetAttributeValue("href", string.Empty).Trim();
212+
var hasHrefAttribute = node.Attributes["href"] != null;
213+
var escapedName = StringUtils.EscapeTelegramMarkdownV2(name);
214+
215+
if (!hasHrefAttribute) {
216+
writer.Write(escapedName);
217+
return;
218+
}
219+
220+
var scheme = StringUtils.GetScheme(href);
221+
222+
var isRemoveLinkWhenSameName = (
223+
Converter.Config.SmartHrefHandling &&
224+
scheme != string.Empty &&
225+
Uri.IsWellFormedUriString(href, UriKind.RelativeOrAbsolute) && (
226+
href.Equals(name, StringComparison.OrdinalIgnoreCase) ||
227+
href.Equals($"tel:{name}", StringComparison.OrdinalIgnoreCase) ||
228+
href.Equals($"mailto:{name}", StringComparison.OrdinalIgnoreCase)
229+
)
230+
);
231+
232+
if (href.StartsWith("#", StringComparison.Ordinal)
233+
|| !Converter.Config.IsSchemeWhitelisted(scheme)
234+
|| isRemoveLinkWhenSameName
235+
|| string.IsNullOrEmpty(href)) {
236+
writer.Write(escapedName);
237+
return;
238+
}
239+
240+
var escapedHref = StringUtils.EscapeTelegramMarkdownV2LinkUrl(href);
241+
writer.Write('[');
242+
writer.Write(escapedName);
243+
writer.Write("](");
244+
writer.Write(escapedHref);
245+
writer.Write(')');
246+
}
247+
203248
private static void WriteRawHtmlAnchor(TextWriter writer, HtmlNode node, string text)
204249
{
205250
writer.Write("<a");

src/ReverseMarkdown/Converters/Br.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ public override void Convert(TextWriter writer, HtmlNode node)
2020
if (Converter.Config.CommonMark) {
2121
writer.WriteLine("\\");
2222
}
23+
else if (Converter.Config.TelegramMarkdownV2) {
24+
writer.WriteLine();
25+
}
2326
else if (Converter.Config.GithubFlavored) {
2427
writer.WriteLine();
2528
}

src/ReverseMarkdown/Converters/Code.cs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
using System;
1+
using System;
22
using System.IO;
33
using HtmlAgilityPack;
4+
using ReverseMarkdown.Helpers;
45

56

67
namespace ReverseMarkdown.Converters {
@@ -12,6 +13,13 @@ public Code(Converter converter) : base(converter)
1213

1314
public override void Convert(TextWriter writer, HtmlNode node)
1415
{
16+
if (Converter.Config.TelegramMarkdownV2) {
17+
writer.Write('`');
18+
writer.Write(StringUtils.EscapeTelegramMarkdownV2Code(DecodeHtml(node.InnerText)));
19+
writer.Write('`');
20+
return;
21+
}
22+
1523
if (Converter.Config.CommonMark) {
1624
var content = node.InnerHtml;
1725
var fence = CreateCommonMarkCodeFence(content);

src/ReverseMarkdown/Converters/Em.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ public override void Convert(TextWriter writer, HtmlNode node)
7171
: string.Empty;
7272

7373
var emphasis = Converter.Config.SlackFlavored
74+
|| Converter.Config.TelegramMarkdownV2
7475
? "_"
7576
: isCommonMark && Context.AncestorsAny("i")
7677
? "_"

src/ReverseMarkdown/Converters/H.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@ public override void Convert(TextWriter writer, HtmlNode node)
2525
var level = node.Name[1] - '0'; // 'h1' -> 1, 'h2' -> 2, etc.
2626

2727
var content = TreatChildrenAsString(node);
28+
if (Converter.Config.TelegramMarkdownV2) {
29+
writer.WriteLine();
30+
for (var i = 0; i < level; i++) {
31+
writer.Write("\\#");
32+
}
33+
34+
writer.Write(' ');
35+
writer.Write(content);
36+
writer.WriteLine();
37+
return;
38+
}
39+
2840
if (Converter.Config.CommonMark) {
2941
content = content.ReplaceLineEndings("&#10;");
3042
content = EscapeTrailingHashes(content);

src/ReverseMarkdown/Converters/Hr.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ public override void Convert(TextWriter writer, HtmlNode node)
1515
throw new SlackUnsupportedTagException(node.Name);
1616
}
1717

18+
if (Converter.Config.TelegramMarkdownV2) {
19+
writer.WriteLine();
20+
writer.Write("\\-\\-\\-");
21+
writer.WriteLine();
22+
return;
23+
}
24+
1825
writer.WriteLine();
1926
writer.Write("* * *");
2027
writer.WriteLine();

0 commit comments

Comments
 (0)