Skip to content

Commit 1ca4d87

Browse files
committed
Support utf8 in the markdown renderer
1 parent da8fcb9 commit 1ca4d87

1 file changed

Lines changed: 22 additions & 21 deletions

File tree

src/repl/markdown.go

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ type MarkdownRenderer struct {
9696
linkURLBuffer string
9797
inLinkText bool
9898
inLinkURL bool
99-
boldMarker byte // '*' or '_' for bold
100-
italicMarker byte // '*' or '_' for italic
99+
boldMarker rune // '*' or '_' for bold
100+
italicMarker rune // '*' or '_' for italic
101101
nestingLevel int // For handling nested formatting
102102

103103
// State flags
@@ -137,9 +137,10 @@ func (r *MarkdownRenderer) Process(chunk string) string {
137137

138138
var result strings.Builder
139139

140-
// Process character by character
141-
for i := 0; i < len(chunk); i++ {
142-
c := chunk[i]
140+
// Process character by character, supporting multibyte UTF-8 runes
141+
runes := []rune(chunk)
142+
for i := 0; i < len(runes); i++ {
143+
c := runes[i]
143144

144145
// Handle escape character
145146
if c == '\\' && !r.isEscaping {
@@ -159,13 +160,13 @@ func (r *MarkdownRenderer) Process(chunk string) string {
159160
if c == '#' {
160161
headerCount := 1
161162
j := i + 1
162-
for j < len(chunk) && chunk[j] == '#' && headerCount < 6 {
163+
for j < len(runes) && runes[j] == '#' && headerCount < 6 {
163164
headerCount++
164165
j++
165166
}
166167

167168
// Validate that it's a proper header (needs space after #)
168-
if j < len(chunk) && chunk[j] == ' ' {
169+
if j < len(runes) && runes[j] == ' ' {
169170
r.collectingHeader = true
170171
r.headerLevel = headerCount
171172
r.currentElement = MarkdownElement{Type: HeaderElement, Level: headerCount, Content: ""}
@@ -176,7 +177,7 @@ func (r *MarkdownRenderer) Process(chunk string) string {
176177
}
177178

178179
// Handle list items (- item or * item)
179-
if (c == '-' || c == '*') && i+1 < len(chunk) && chunk[i+1] == ' ' {
180+
if (c == '-' || c == '*') && i+1 < len(runes) && runes[i+1] == ' ' {
180181
r.collectingListItem = true
181182
r.currentElement = MarkdownElement{Type: ListItemElement, Content: ""}
182183
i += 1 // Skip the marker and space
@@ -187,10 +188,10 @@ func (r *MarkdownRenderer) Process(chunk string) string {
187188
// Handle ordered list items (1. item)
188189
if isDigit(c) {
189190
j := i
190-
for j < len(chunk) && isDigit(chunk[j]) {
191+
for j < len(runes) && isDigit(runes[j]) {
191192
j++
192193
}
193-
if j < len(chunk) && chunk[j] == '.' && j+1 < len(chunk) && chunk[j+1] == ' ' {
194+
if j < len(runes) && runes[j] == '.' && j+1 < len(runes) && runes[j+1] == ' ' {
194195
r.collectingListItem = true
195196
r.currentElement = MarkdownElement{Type: ListItemElement, Content: ""}
196197
i = j + 1 // Skip past the number, dot, and space
@@ -200,7 +201,7 @@ func (r *MarkdownRenderer) Process(chunk string) string {
200201
}
201202

202203
// Handle blockquotes (> quote)
203-
if c == '>' && i+1 < len(chunk) && chunk[i+1] == ' ' {
204+
if c == '>' && i+1 < len(runes) && runes[i+1] == ' ' {
204205
r.collectingBlockquote = true
205206
r.currentElement = MarkdownElement{Type: BlockquoteElement, Content: ""}
206207
i += 1 // Skip the > and space
@@ -209,20 +210,20 @@ func (r *MarkdownRenderer) Process(chunk string) string {
209210
}
210211

211212
// Handle code fences (```)
212-
if c == '`' && i+2 < len(chunk) && chunk[i+1] == '`' && chunk[i+2] == '`' {
213+
if c == '`' && i+2 < len(runes) && runes[i+1] == '`' && runes[i+2] == '`' {
213214
r.inCodeFence = !r.inCodeFence
214215
if r.inCodeFence {
215216
// Look for optional language identifier
216217
langStart := i + 3
217-
for langStart < len(chunk) && chunk[langStart] == ' ' {
218+
for langStart < len(runes) && runes[langStart] == ' ' {
218219
langStart++
219220
}
220221
langEnd := langStart
221-
for langEnd < len(chunk) && chunk[langEnd] != '\n' {
222+
for langEnd < len(runes) && runes[langEnd] != '\n' {
222223
langEnd++
223224
}
224225
if langEnd > langStart {
225-
r.codeLanguage = chunk[langStart:langEnd]
226+
r.codeLanguage = string(runes[langStart:langEnd])
226227
}
227228
r.currentElement = MarkdownElement{Type: CodeBlockElement, Content: ""}
228229
} else {
@@ -232,7 +233,7 @@ func (r *MarkdownRenderer) Process(chunk string) string {
232233
r.codeLanguage = ""
233234
}
234235
i += 2 // Skip past the ```
235-
if i+1 < len(chunk) && chunk[i+1] == '\n' {
236+
if i+1 < len(runes) && runes[i+1] == '\n' {
236237
i++ // Skip the newline after code fence
237238
}
238239
r.isInLineStart = true
@@ -245,12 +246,12 @@ func (r *MarkdownRenderer) Process(chunk string) string {
245246
// Inside code fence - collect everything until the closing fence
246247
if r.inCodeFence && r.currentElement.Type == CodeBlockElement {
247248
// Check for closing fence
248-
if c == '`' && i+2 < len(chunk) && chunk[i+1] == '`' && chunk[i+2] == '`' {
249+
if c == '`' && i+2 < len(runes) && runes[i+1] == '`' && runes[i+2] == '`' {
249250
r.inCodeFence = false
250251
result.WriteString(formatElement(r.currentElement))
251252
r.currentElement = MarkdownElement{Type: TextElement, Content: ""}
252253
i += 2 // Skip the remaining `` characters
253-
if i+1 < len(chunk) && chunk[i+1] == '\n' {
254+
if i+1 < len(runes) && runes[i+1] == '\n' {
254255
i++ // Skip the newline after code fence
255256
}
256257
r.isInLineStart = true
@@ -285,7 +286,7 @@ func (r *MarkdownRenderer) Process(chunk string) string {
285286
}
286287

287288
// Handle bold formatting (** or __)
288-
if (c == '*' || c == '_') && i+1 < len(chunk) && chunk[i+1] == c {
289+
if (c == '*' || c == '_') && i+1 < len(runes) && runes[i+1] == c {
289290
if r.collectingBold && c == r.boldMarker {
290291
// End of bold, add the element
291292
r.collectingBold = false
@@ -343,7 +344,7 @@ func (r *MarkdownRenderer) Process(chunk string) string {
343344
continue
344345
}
345346

346-
if c == ']' && r.collectingLinkText && i+1 < len(chunk) && chunk[i+1] == '(' {
347+
if c == ']' && r.collectingLinkText && i+1 < len(runes) && runes[i+1] == '(' {
347348
r.collectingLinkText = false
348349
r.collectingLinkURL = true
349350
i++ // Skip the opening (
@@ -507,7 +508,7 @@ func formatElement(element MarkdownElement) string {
507508
}
508509

509510
// isDigit checks if a character is a digit
510-
func isDigit(c byte) bool {
511+
func isDigit(c rune) bool {
511512
return c >= '0' && c <= '9'
512513
}
513514

0 commit comments

Comments
 (0)