Skip to content

Commit aed1c1e

Browse files
edemainek4b7
authored andcommitted
unicodeTextInMathMode setting (#1117)
* unicodeTextInMathMode setting * When `unicodeTextInMathMode` is `true`, accented letters from `unicodeSymbols.js`, and CJK and other supported languages, get added support in math mode (as requested in #895). * When `unicodeTextInMathMode` is `false, all of these stop working in math mode, and are only supported in text mode (matching XeTeX behavior). Note that this is a backwards incompatibility with some 0.9.0 alpha/betas. * Fix handling of Unicode characters ð, Å, å * Fix double handling of ð (math maps to \eth, not special Unicode character) * Remove Åå special math handling, thanks to #1125 * Forbid extraLatin when unicodeTextInMathMode is false
1 parent 7de91f7 commit aed1c1e

File tree

7 files changed

+66
-12
lines changed

7 files changed

+66
-12
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ You can provide an object of options as the last argument to `katex.render` and
7171
- `errorColor`: `string`. A color string given in the format `"#XXX"` or `"#XXXXXX"`. This option determines the color which unsupported commands are rendered in. (default: `#cc0000`)
7272
- `macros`: `object`. A collection of custom macros. Each macro is a property with a name like `\name` (written `"\\name"` in JavaScript) which maps to a string that describes the expansion of the macro. Single-character keys can also be included in which case the character will be redefined as the given macro (similar to TeX active characters).
7373
- `colorIsTextColor`: `boolean`. If `true`, `\color` will work like LaTeX's `\textcolor`, and take two arguments (e.g., `\color{blue}{hello}`), which restores the old behavior of KaTeX (pre-0.8.0). If `false` (the default), `\color` will work like LaTeX's `\color`, and take one argument (e.g., `\color{blue}hello`). In both cases, `\textcolor` works as in LaTeX (e.g., `\textcolor{blue}{hello}`).
74+
- `unicodeTextInMathMode`: `boolean`. If `true`, supported unicode text characters like `é` and `` will also work in math mode. (They always work in text mode.) The default is `false`, which matches XeTeX behavior; `true` emulates MathJax behavior.
7475
- `maxSize`: `number`. If non-zero, all user-specified sizes, e.g. in `\rule{500em}{500em}`, will be capped to `maxSize` ems. Otherwise, users can make elements and spaces arbitrarily large (the default behavior).
7576

7677
For example:
@@ -121,6 +122,12 @@ will appear larger than 1cm in browser units.
121122
`align` in math mode. The `aligned` environment offers the same functionality
122123
but in math mode, so use that instead or define a macro that maps `align` to
123124
`aligned`.
125+
- MathJax defines `\color` to be like `\textcolor` by default; set KaTeX's
126+
`colorIsTextColor` option to `true` for this behavior. KaTeX's default
127+
behavior matches MathJax with its `color.js` extension enabled.
128+
- MathJax supports Unicode text characters in math mode, unlike LaTeX.
129+
To support this behavior in KaTeX, set the `unicodeTextInMathMode` option
130+
to `true`.
124131

125132
## Libraries
126133

src/Parser.js

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import functions from "./functions";
55
import environments from "./environments";
66
import MacroExpander from "./MacroExpander";
7-
import symbols from "./symbols";
7+
import symbols, { extraLatin } from "./symbols";
88
import { validUnit } from "./units";
99
import { supportedCodepoint } from "./unicodeScripts";
1010
import unicodeAccents from "./unicodeAccents";
@@ -968,9 +968,12 @@ export default class Parser {
968968
return newDollar(nucleus);
969969
}
970970
// At this point, we should have a symbol, possibly with accents.
971-
// First expand any accented base symbol according to unicodeSymbols.
971+
// First expand any accented base symbol according to unicodeSymbols,
972+
// unless we're in math mode and unicodeTextInMathMode is false
973+
// (XeTeX-compatible mode).
972974
if (unicodeSymbols.hasOwnProperty(text[0]) &&
973-
!symbols[this.mode][text[0]]) {
975+
!symbols[this.mode][text[0]] &&
976+
(this.settings.unicodeTextInMathMode || this.mode === "text")) {
974977
text = unicodeSymbols[text[0]] + text.substr(1);
975978
}
976979
// Strip off any combining characters
@@ -986,10 +989,15 @@ export default class Parser {
986989
// Recognize base symbol
987990
let symbol = null;
988991
if (symbols[this.mode][text]) {
992+
if (this.mode === 'math' && extraLatin.indexOf(text) >= 0 &&
993+
!this.settings.unicodeTextInMathMode) {
994+
throw new ParseError(`Unicode text character ${text} used in ` +
995+
`math mode without unicodeTextInMathMode setting`, nucleus);
996+
}
989997
symbol = new ParseNode(symbols[this.mode][text].group,
990998
text, this.mode, nucleus);
991-
} else if (this.mode === "text" &&
992-
supportedCodepoint(text.charCodeAt(0))) {
999+
} else if (supportedCodepoint(text.charCodeAt(0)) &&
1000+
(this.mode === "text" || this.settings.unicodeTextInMathMode)) {
9931001
symbol = new ParseNode("textord", text, this.mode, nucleus);
9941002
} else {
9951003
return null; // EOF, ^, _, {, }, etc.

src/Settings.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export type SettingsOptions = {
1414
errorColor?: string;
1515
macros?: MacroMap;
1616
colorIsTextColor?: boolean;
17+
unicodeTextInMathMode?: boolean;
1718
maxSize?: number;
1819
};
1920

@@ -33,6 +34,7 @@ class Settings {
3334
errorColor: string;
3435
macros: MacroMap;
3536
colorIsTextColor: boolean;
37+
unicodeTextInMathMode: boolean;
3638
maxSize: number;
3739

3840
constructor(options: SettingsOptions) {
@@ -43,6 +45,8 @@ class Settings {
4345
this.errorColor = utils.deflt(options.errorColor, "#cc0000");
4446
this.macros = options.macros || {};
4547
this.colorIsTextColor = utils.deflt(options.colorIsTextColor, false);
48+
this.unicodeTextInMathMode =
49+
utils.deflt(options.unicodeTextInMathMode, false);
4650
this.maxSize = Math.max(0, utils.deflt(options.maxSize, Infinity));
4751
}
4852
}

src/symbols.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ for (let i = 0; i < letters.length; i++) {
739739
// but they are not actually in the font, nor are they supported by the
740740
// Unicode accent mechanism, so they fall back to Times font and look ugly.
741741
// TODO(edemaine): Fix this.
742-
const extraLatin = "ÇÐÞçþ";
742+
export const extraLatin = "ÇÐÞçþ";
743743
for (let i = 0; i < extraLatin.length; i++) {
744744
const ch = extraLatin.charAt(i);
745745
defineSymbol(math, main, mathord, ch, ch);

test/katex-spec.js

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2864,7 +2864,8 @@ describe("Unicode accents", function() {
28642864
"\\tilde n" +
28652865
"\\grave o\\acute o\\hat o\\tilde o\\ddot o" +
28662866
"\\grave u\\acute u\\hat u\\ddot u" +
2867-
"\\acute y\\ddot y");
2867+
"\\acute y\\ddot y",
2868+
{unicodeTextInMathMode: true});
28682869
});
28692870

28702871
it("should parse Latin-1 letters in text mode", function() {
@@ -2894,18 +2895,21 @@ describe("Unicode accents", function() {
28942895
});
28952896

28962897
it("should parse combining characters", function() {
2897-
expect("A\u0301C\u0301").toParseLike("Á\\acute C");
2898+
expect("A\u0301C\u0301").toParseLike("Á\\acute C",
2899+
{unicodeTextInMathMode: true});
28982900
expect("\\text{A\u0301C\u0301}").toParseLike("\\text{Á\\'C}");
28992901
});
29002902

29012903
it("should parse multi-accented characters", function() {
2902-
expect("ấā́ắ\\text{ấā́ắ}").toParse();
2904+
expect("ấā́ắ\\text{ấā́ắ}").toParse({unicodeTextInMathMode: true});
29032905
// Doesn't parse quite the same as
29042906
// "\\text{\\'{\\^a}\\'{\\=a}\\'{\\u a}}" because of the ordgroups.
29052907
});
29062908

29072909
it("should parse accented i's and j's", function() {
2908-
expect("íȷ́").toParseLike("\\acute ı\\acute ȷ");
2910+
expect("íȷ́").toParseLike("\\acute ı\\acute ȷ",
2911+
{unicodeTextInMathMode: true});
2912+
expect("ấā́ắ\\text{ấā́ắ}").toParse({unicodeTextInMathMode: true});
29092913
});
29102914
});
29112915

@@ -3009,6 +3013,29 @@ describe("Symbols", function() {
30093013
});
30103014
});
30113015

3016+
describe("unicodeTextInMathMode setting", function() {
3017+
it("should allow unicode text when true", () => {
3018+
expect("é").toParse({unicodeTextInMathMode: true});
3019+
expect("試").toParse({unicodeTextInMathMode: true});
3020+
});
3021+
3022+
it("should forbid unicode text when false", () => {
3023+
expect("é").toNotParse({unicodeTextInMathMode: false});
3024+
expect("試").toNotParse({unicodeTextInMathMode: false});
3025+
});
3026+
3027+
it("should forbid unicode text when default", () => {
3028+
expect("é").toNotParse();
3029+
expect("試").toNotParse();
3030+
});
3031+
3032+
it("should always allow unicode text in text mode", () => {
3033+
expect("\\text{é試}").toParse({unicodeTextInMathMode: false});
3034+
expect("\\text{é試}").toParse({unicodeTextInMathMode: true});
3035+
expect("\\text{é試}").toParse();
3036+
});
3037+
});
3038+
30123039
describe("Internal __* interface", function() {
30133040
const latex = "\\sum_{k = 0}^{\\infty} x^k";
30143041
const rendered = katex.renderToString(latex);

test/mathml-spec.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ describe("A MathML builder", function() {
9595
});
9696

9797
it('accents turn into <mover accent="true"> in MathML', function() {
98-
expect(getMathML("über fiancée")).toMatchSnapshot();
98+
expect(getMathML("über fiancée", {unicodeTextInMathMode: true}))
99+
.toMatchSnapshot();
99100
});
100101
});

test/unicode-spec.js

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,16 @@ describe("unicode", function() {
7272
'ÆÇÐØÞßæçðøþ}').toParse();
7373
});
7474

75+
it("should not parse Latin-1 outside \\text{} without setting", function() {
76+
const chars = 'ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿÇÐÞçþ';
77+
for (const ch of chars) {
78+
expect(ch).toNotParse();
79+
}
80+
});
81+
7582
it("should parse Latin-1 outside \\text{}", function() {
7683
expect('ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ' +
77-
'ÇÐÞçðþ').toParse();
84+
'ÇÐÞçðþ').toParse({unicodeTextInMathMode: true});
7885
});
7986

8087
it("should parse all lower case Greek letters", function() {

0 commit comments

Comments
 (0)