@@ -13,29 +13,37 @@ class Scala < RegexLexer
1313 mimetypes 'text/x-scala' , 'application/x-scala'
1414
1515 # As documented in the ENBF section of the scala specification
16- # https://scala-lang.org/files/archive/spec/2.13/13-syntax-summary.html
16+ # Scala 2: https://scala-lang.org/files/archive/spec/2.13/13-syntax-summary.html
17+ # Scala 3: https://docs.scala-lang.org/scala3/reference/syntax.html
1718 # https://en.wikipedia.org/wiki/Unicode_character_property#General_Category
1819 whitespace = /\p {Space}/
1920 letter = /[\p {L}$_]/
2021 upper = /[\p {Lu}$_]/
22+ lower = /[\p {Ll}$_]/
2123 digits = /[0-9]/
2224 parens = /[(){}\[ \] ]/
23- delims = %r([‘’ ".;,])
25+ delims = %r(['' ".;,])
2426
2527 # negative lookahead to filter out other classes
2628 op = %r(
2729 (?!#{ whitespace } |#{ letter } |#{ digits } |#{ parens } |#{ delims } )
28- [-!#%&*/:?@\\ ^\p {Sm}\p {So}]
30+ [-!#%&*/:?@\\ ^\p {Sm}\p {So}] # Basic operators and Unicode math/symbol chars
2931 )x
30- # manually removed +<=>|~ from regexp because they're in property Sm
31- # pp CHRS:(0x00..0x7f).map(&:chr).grep(/\p{Sm}/)
32+ # Note: Some operators like +<=>|~ are in Unicode property Sm
3233
33- idrest = %r(#{ letter } (?:#{ letter } |#{ digits } )*(?:(?<=_)#{ op } +)?)x
34+ idrest_core = %r((?:#{ letter } |#{ digits } )*(?:_#{ op } +)?)x
35+ idrest = %r(#{ lower } #{ idrest_core } )x
36+ upper_idrest = %r(#{ upper } #{ idrest_core } )x
37+
38+ # For string interpolation prefixes like s"", f"" - simplified identifier
39+ plain_interpol_id = %r(#{ letter } (?:#{ letter } |#{ digits } )*)x
3440
3541 keywords = %w(
3642 abstract case catch def do else extends final finally for forSome
3743 if implicit lazy match new override private protected requires return
3844 sealed super this throw try val var while with yield
45+ enum export given open transparent extension using derives then end
46+ inline opaque infix transparent
3947 )
4048
4149 state :root do
@@ -49,6 +57,17 @@ class Scala < RegexLexer
4957 rule %r(//.*) , Comment ::Single
5058 rule %r(/\* ) , Comment ::Multiline , :comment
5159
60+ # Interpolated strings: s"...", f"""...""", etc.
61+ # Must be before general string rules and identifier rules that might catch the prefix.
62+ # s"..."
63+ rule %r/(#{ plain_interpol_id } )(")((?:\\ (?:["\\ \/ bfnrt']|u[0-9a-fA-F]{4})|[^"\\ ])*?)(")/ do
64+ groups Name ::Tag , Str , Str , Str
65+ end
66+ # s"""..."""
67+ rule %r/(#{ plain_interpol_id } )(""".*?"""(?!"))/m do
68+ groups Name ::Tag , Str
69+ end
70+
5271 rule %r/@#{ idrest } / , Name ::Decorator
5372
5473 rule %r/(def)(\s +)(#{ idrest } |#{ op } +|`[^`]+`)(\s *)/ do
@@ -67,7 +86,7 @@ class Scala < RegexLexer
6786 groups Name ::Variable , Text , Operator , Name ::Property
6887 end
6988
70- rule %r/#{ upper } #{ idrest } \b / , Name ::Class
89+ rule %r/#{ upper_idrest } \b / , Name ::Class
7190
7291 rule %r/(#{ idrest } )(#{ whitespace } *)(\( )/ do
7392 groups Name ::Function , Text , Operator
@@ -95,7 +114,8 @@ class Scala < RegexLexer
95114
96115 rule %r/""".*?"""(?!")/m , Str
97116 rule %r/"(\\ \\ |\\ "|[^"])*"/ , Str
98- rule %r/'\\ .'|'[^\\ ]'|'\\ u[0-9a-fA-F]{4}'/ , Str ::Char
117+ rule %r/'([^'\\ ]|\\ [\\ '"bfnrt]|\\ u[0-9a-fA-F]{4})'/ , Str ::Char
118+ rule %r/'[^']*'/ , Str
99119
100120 rule idrest , Name
101121 rule %r/`[^`]+`/ , Name
@@ -104,15 +124,40 @@ class Scala < RegexLexer
104124 rule %r/[\( \) \{ \} ;,.#]/ , Operator
105125 rule %r/#{ op } +/ , Operator
106126
107- rule %r/([0-9][0-9]*\. [0-9]*|\. [0-9]+)([eE][+-]?[0-9]+)?[fFdD]?/ , Num ::Float
108- rule %r/([0-9][0-9]*[fFdD])/ , Num ::Float
109- rule %r/0x[0-9a-fA-F]+/ , Num ::Hex
110- rule %r/[0-9]+L?/ , Num ::Integer
127+ # Order: Floats, Binary, Hex, Decimal Integers.
128+ # Floating point with optional underscores
129+ rule %r/([0-9](?:_?[0-9])*\. (?:_?[0-9])*(?:[eE][+-]?[0-9](?:_?[0-9])*)?|\. [0-9](?:_?[0-9])*(?:[eE][+-]?[0-9](?:_?[0-9])*)?)[fFdD]?/ , Num ::Float
130+ rule %r/([0-9](?:_?[0-9])*)[fFdD]/ , Num ::Float
131+
132+ # Binary literals (e.g., 0b1010, 0B1_0_1L)
133+ rule %r/0[bB][01](?:_?[01])*[lL]?/ , Num ::Integer
134+ # Hex literals (e.g., 0xFF, 0xAB_CDL)
135+ rule %r/0[xX][0-9a-fA-F](?:_?[0-9a-fA-F])*[lL]?/ , Num ::Hex
136+ # Decimal integers (e.g., 123, 1_000_000L)
137+ # This must be after float, hex, bin which might also start with digits.
138+ rule %r/[0-9](?:_?[0-9])*[lL]?/ , Num ::Integer
139+
111140 rule %r/\n / , Text
141+
142+ # End markers for control structures and definitions
143+ rule %r/(end)(\s +)(if|while|for|match|try|new|this|given|extension|val|def|class|object|trait)\b / do
144+ groups Keyword , Text , Keyword
145+ end
146+
147+ # Type operators for union and intersection types
148+ rule %r/[&|](?![&|])/ , Operator
149+
150+ # Named type arguments
151+ rule %r/([A-Z]\w *)(\s *)(=)(?=\s *[A-Z])/ do
152+ groups Name ::Class , Text , Operator
153+ end
154+
155+ # Context function type
156+ rule %r/\? =>/ , Operator
112157 end
113158
114159 state :class do
115- rule %r/(#{ idrest } |#{ op } +|`[^`]+`)(\s *)(\[ )/ do
160+ rule %r/(#{ idrest } |#{ upper_idrest } | #{ op } +|`[^`]+`)(\s *)(\[ )/ do
116161 groups Name ::Class , Text , Operator
117162 push :typeparam
118163 end
@@ -121,7 +166,7 @@ class Scala < RegexLexer
121166 rule %r/{/ , Operator , :pop!
122167 rule %r/\( / , Operator , :pop!
123168 rule %r(//.*) , Comment ::Single , :pop!
124- rule %r(#{ idrest } |#{ op } +|`[^`]+`) , Name ::Class , :pop!
169+ rule %r(#{ idrest } |#{ upper_idrest } | #{ op } +|`[^`]+`) , Name ::Class , :pop!
125170 end
126171
127172 state :type do
@@ -133,7 +178,7 @@ class Scala < RegexLexer
133178 end
134179 rule %r/[\( \{ ]/ , Operator , :type
135180
136- typechunk = /(?:#{ idrest } |#{ op } +\` [^`]+`)/
181+ typechunk = /(?:#{ idrest } |#{ upper_idrest } | #{ op } +\` [^`]+`)/
137182 rule %r/(#{ typechunk } (?:\. #{ typechunk } )*)(\s *)(\[ )/ do
138183 groups Keyword ::Type , Text , Operator
139184 pop!
@@ -146,15 +191,15 @@ class Scala < RegexLexer
146191 end
147192
148193 rule %r(//.*) , Comment ::Single , :pop!
149- rule %r/\. |#{ idrest } |#{ op } +|`[^`]+`/ , Keyword ::Type
194+ rule %r/\. |#{ idrest } |#{ upper_idrest } | #{ op } +|`[^`]+`/ , Keyword ::Type
150195 end
151196
152197 state :typeparam do
153198 rule %r/[\s ,]+/ , Text
154199 rule %r/<[%:]|=>|>:|[#_\u21D2 ]|forSome|type/ , Keyword
155200 rule %r/([\] \) \} ])/ , Operator , :pop!
156201 rule %r/[\( \[ \{ ]/ , Operator , :typeparam
157- rule %r/\. |#{ idrest } |#{ op } +|`[^`]+`/ , Keyword ::Type
202+ rule %r/\. |#{ idrest } |#{ upper_idrest } | #{ op } +|`[^`]+`/ , Keyword ::Type
158203 end
159204
160205 state :comment do
@@ -165,7 +210,12 @@ class Scala < RegexLexer
165210 end
166211
167212 state :import do
168- rule %r((#{ idrest } |\. )+) , Name ::Namespace , :pop!
213+ # Handle 'as' imports with optional whitespace
214+ rule %r/((#{ idrest } |#{ upper_idrest } |\. )+)(\s *)(as)(\s *)(#{ idrest } |#{ upper_idrest } )/ do |m |
215+ groups Name ::Namespace , Text , Keyword , Text , Name ::Namespace
216+ pop!
217+ end
218+ rule %r/((#{ idrest } |#{ upper_idrest } |\. )+)/ , Name ::Namespace , :pop!
169219 end
170220 end
171221 end
0 commit comments