Skip to content

Commit aca53b7

Browse files
committed
fix: print: beancount: encode non-ASCII letters in metadata keys, also [#2576]
Beancount metadata keys must match [a-z][a-z0-9_-]*, so non-ASCII letters like ä, ö, ü must be encoded there too (using the usual "c<HEXBYTES>" encoding).
1 parent e471702 commit aca53b7

2 files changed

Lines changed: 28 additions & 8 deletions

File tree

hledger-lib/Hledger/Write/Beancount.hs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,24 +132,25 @@ toBeancountMetadataName name =
132132
Just (c,cs) | T.null cs || not (isBeancountMetadataNameStartChar c) -> T.cons beancountMetadataDummyNameStartChar t
133133
_ -> t
134134

135-
-- | Is this a valid character to start a Beancount metadata name (lowercase letter) ?
135+
-- | Is this a valid character to start a Beancount metadata name (lowercase ASCII letter) ?
136136
isBeancountMetadataNameStartChar :: Char -> Bool
137-
isBeancountMetadataNameStartChar c = isLetter c && islowercase c
137+
isBeancountMetadataNameStartChar = isAsciiLower
138138

139139
-- | Dummy valid starting character to prepend to a Beancount metadata name if needed.
140140
beancountMetadataDummyNameStartChar :: Char
141141
beancountMetadataDummyNameStartChar = 'm'
142142

143-
-- | Is this a valid character in the middle of a Beancount metadata name (a lowercase letter, digit, _ or -) ?
143+
-- | Is this a valid character in the middle of a Beancount metadata name (a lowercase ASCII letter, digit, _ or -) ?
144144
isBeancountMetadataNameChar :: Char -> Bool
145-
isBeancountMetadataNameChar c = (isLetter c && islowercase c) || isDigit c || c `elem` ['_', '-']
145+
isBeancountMetadataNameChar c = isAsciiLower c || isDigit c || c `elem` ['_', '-']
146146

147147
-- | Convert a character to one or more characters valid inside a Beancount metadata name.
148-
-- Letters are lowercased, spaces are converted to dashes, and unsupported characters are encoded as c<HEXBYTES>.
148+
-- ASCII uppercase letters are lowercased, spaces are converted to dashes, and unsupported
149+
-- characters (including non-ASCII letters) are encoded as c<HEXBYTES>.
149150
toBeancountMetadataNameChar :: Char -> Text
150151
toBeancountMetadataNameChar c
151152
| isBeancountMetadataNameChar c = T.singleton c
152-
| isLetter c = T.singleton $ toLower c
153+
| isAsciiUpper c = T.singleton $ toLower c
153154
| isSpace c = "-"
154155
| otherwise = T.pack $ printf "c%x" c
155156

@@ -312,8 +313,6 @@ charToBeancount c = if isSpace c then "-" else printf "C%x" c
312313
-- https://hackage.haskell.org/package/base-4.20.0.1/docs/Data-Char.html#v:isUpperCase would be more correct,
313314
-- but isn't available till base 4.18/ghc 9.6. isUpper is close enough in practice.
314315
isuppercase = isUpper
315-
-- same story, presumably
316-
islowercase = isLower
317316

318317
-- | Is this a valid character to start a Beancount account name part (capital letter or digit) ?
319318
isBeancountAccountStartChar :: Char -> Bool

hledger/test/print/beancount.test

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,24 @@ $ hledger -f- print -O beancount
265265
Equity:Opening
266266

267267
>=
268+
269+
# ** 13. Non-ASCII letters in metadata names are hex-encoded (#2576).
270+
# Beancount metadata keys must match [a-z][a-z0-9_-]*, so non-ASCII letters
271+
# like ä cannot just be lowercased; they must be encoded as c<HEXBYTES>.
272+
# Non-ASCII characters in values are fine - Beancount accepts UTF-8 in strings.
273+
<
274+
2024-01-01 Test ; Empfänger: Schäfer
275+
assets:cash -1 EUR
276+
expenses:test 1 EUR
277+
278+
$ hledger -f- print -O beancount
279+
;option "inferred_tolerance_default" "*:0.005"
280+
2024-01-01 open Assets:Cash
281+
2024-01-01 open Expenses:Test
282+
283+
2024-01-01 * "Test" ; Empfänger: Schäfer
284+
empfce4nger: "Schäfer"
285+
Assets:Cash -1 EUR
286+
Expenses:Test 1 EUR
287+
288+
>=

0 commit comments

Comments
 (0)