Skip to content

Commit f7445e7

Browse files
committed
feat(lexer): add Scala 3 support
1 parent 4f6bf05 commit f7445e7

File tree

2 files changed

+244
-19
lines changed

2 files changed

+244
-19
lines changed

lib/rouge/lexers/scala.rb

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,29 +13,37 @@ class Scala < RegexLexer
1313
mimetypes 'text/x-scala', 'application/x-scala'
1414

1515
# As documented in the ENBF section of the scala specification
16-
# https://scala-lang.org/files/archive/spec/2.13/13-syntax-summary.html
16+
# Scala 2: https://scala-lang.org/files/archive/spec/2.13/13-syntax-summary.html
17+
# Scala 3: https://docs.scala-lang.org/scala3/reference/syntax.html
1718
# https://en.wikipedia.org/wiki/Unicode_character_property#General_Category
1819
whitespace = /\p{Space}/
1920
letter = /[\p{L}$_]/
2021
upper = /[\p{Lu}$_]/
22+
lower = /[\p{Ll}$_]/
2123
digits = /[0-9]/
2224
parens = /[(){}\[\]]/
23-
delims = %r([‘’".;,])
25+
delims = %r([''".;,])
2426

2527
# negative lookahead to filter out other classes
2628
op = %r(
2729
(?!#{whitespace}|#{letter}|#{digits}|#{parens}|#{delims})
28-
[-!#%&*/:?@\\^\p{Sm}\p{So}]
30+
[-!#%&*/:?@\\^\p{Sm}\p{So}] # Basic operators and Unicode math/symbol chars
2931
)x
30-
# manually removed +<=>|~ from regexp because they're in property Sm
31-
# pp CHRS:(0x00..0x7f).map(&:chr).grep(/\p{Sm}/)
32+
# Note: Some operators like +<=>|~ are in Unicode property Sm
3233

33-
idrest = %r(#{letter}(?:#{letter}|#{digits})*(?:(?<=_)#{op}+)?)x
34+
idrest_core = %r((?:#{letter}|#{digits})*(?:_#{op}+)?)x
35+
idrest = %r(#{lower}#{idrest_core})x
36+
upper_idrest = %r(#{upper}#{idrest_core})x
37+
38+
# For string interpolation prefixes like s"", f"" - simplified identifier
39+
plain_interpol_id = %r(#{letter}(?:#{letter}|#{digits})*)x
3440

3541
keywords = %w(
3642
abstract case catch def do else extends final finally for forSome
3743
if implicit lazy match new override private protected requires return
3844
sealed super this throw try val var while with yield
45+
enum export given open transparent extension using derives then end
46+
inline opaque infix transparent
3947
)
4048

4149
state :root do
@@ -49,6 +57,17 @@ class Scala < RegexLexer
4957
rule %r(//.*), Comment::Single
5058
rule %r(/\*), Comment::Multiline, :comment
5159

60+
# Interpolated strings: s"...", f"""...""", etc.
61+
# Must be before general string rules and identifier rules that might catch the prefix.
62+
# s"..."
63+
rule %r/(#{plain_interpol_id})(")((?:\\(?:["\\\/bfnrt']|u[0-9a-fA-F]{4})|[^"\\])*?)(")/ do
64+
groups Name::Tag, Str, Str, Str
65+
end
66+
# s"""..."""
67+
rule %r/(#{plain_interpol_id})(""".*?"""(?!"))/m do
68+
groups Name::Tag, Str
69+
end
70+
5271
rule %r/@#{idrest}/, Name::Decorator
5372

5473
rule %r/(def)(\s+)(#{idrest}|#{op}+|`[^`]+`)(\s*)/ do
@@ -67,7 +86,7 @@ class Scala < RegexLexer
6786
groups Name::Variable, Text, Operator, Name::Property
6887
end
6988

70-
rule %r/#{upper}#{idrest}\b/, Name::Class
89+
rule %r/#{upper_idrest}\b/, Name::Class
7190

7291
rule %r/(#{idrest})(#{whitespace}*)(\()/ do
7392
groups Name::Function, Text, Operator
@@ -95,7 +114,8 @@ class Scala < RegexLexer
95114

96115
rule %r/""".*?"""(?!")/m, Str
97116
rule %r/"(\\\\|\\"|[^"])*"/, Str
98-
rule %r/'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'/, Str::Char
117+
rule %r/'([^'\\]|\\[\\'"bfnrt]|\\u[0-9a-fA-F]{4})'/, Str::Char
118+
rule %r/'[^']*'/, Str
99119

100120
rule idrest, Name
101121
rule %r/`[^`]+`/, Name
@@ -104,15 +124,40 @@ class Scala < RegexLexer
104124
rule %r/[\(\)\{\};,.#]/, Operator
105125
rule %r/#{op}+/, Operator
106126

107-
rule %r/([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?/, Num::Float
108-
rule %r/([0-9][0-9]*[fFdD])/, Num::Float
109-
rule %r/0x[0-9a-fA-F]+/, Num::Hex
110-
rule %r/[0-9]+L?/, Num::Integer
127+
# Order: Floats, Binary, Hex, Decimal Integers.
128+
# Floating point with optional underscores
129+
rule %r/([0-9](?:_?[0-9])*\.(?:_?[0-9])*(?:[eE][+-]?[0-9](?:_?[0-9])*)?|\.[0-9](?:_?[0-9])*(?:[eE][+-]?[0-9](?:_?[0-9])*)?)[fFdD]?/, Num::Float
130+
rule %r/([0-9](?:_?[0-9])*)[fFdD]/, Num::Float
131+
132+
# Binary literals (e.g., 0b1010, 0B1_0_1L)
133+
rule %r/0[bB][01](?:_?[01])*[lL]?/, Num::Integer
134+
# Hex literals (e.g., 0xFF, 0xAB_CDL)
135+
rule %r/0[xX][0-9a-fA-F](?:_?[0-9a-fA-F])*[lL]?/, Num::Hex
136+
# Decimal integers (e.g., 123, 1_000_000L)
137+
# This must be after float, hex, bin which might also start with digits.
138+
rule %r/[0-9](?:_?[0-9])*[lL]?/, Num::Integer
139+
111140
rule %r/\n/, Text
141+
142+
# End markers for control structures and definitions
143+
rule %r/(end)(\s+)(if|while|for|match|try|new|this|given|extension|val|def|class|object|trait)\b/ do
144+
groups Keyword, Text, Keyword
145+
end
146+
147+
# Type operators for union and intersection types
148+
rule %r/[&|](?![&|])/, Operator
149+
150+
# Named type arguments
151+
rule %r/([A-Z]\w*)(\s*)(=)(?=\s*[A-Z])/ do
152+
groups Name::Class, Text, Operator
153+
end
154+
155+
# Context function type
156+
rule %r/\?=>/, Operator
112157
end
113158

114159
state :class do
115-
rule %r/(#{idrest}|#{op}+|`[^`]+`)(\s*)(\[)/ do
160+
rule %r/(#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`)(\s*)(\[)/ do
116161
groups Name::Class, Text, Operator
117162
push :typeparam
118163
end
@@ -121,7 +166,7 @@ class Scala < RegexLexer
121166
rule %r/{/, Operator, :pop!
122167
rule %r/\(/, Operator, :pop!
123168
rule %r(//.*), Comment::Single, :pop!
124-
rule %r(#{idrest}|#{op}+|`[^`]+`), Name::Class, :pop!
169+
rule %r(#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`), Name::Class, :pop!
125170
end
126171

127172
state :type do
@@ -133,7 +178,7 @@ class Scala < RegexLexer
133178
end
134179
rule %r/[\(\{]/, Operator, :type
135180

136-
typechunk = /(?:#{idrest}|#{op}+\`[^`]+`)/
181+
typechunk = /(?:#{idrest}|#{upper_idrest}|#{op}+\`[^`]+`)/
137182
rule %r/(#{typechunk}(?:\.#{typechunk})*)(\s*)(\[)/ do
138183
groups Keyword::Type, Text, Operator
139184
pop!
@@ -146,15 +191,15 @@ class Scala < RegexLexer
146191
end
147192

148193
rule %r(//.*), Comment::Single, :pop!
149-
rule %r/\.|#{idrest}|#{op}+|`[^`]+`/, Keyword::Type
194+
rule %r/\.|#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`/, Keyword::Type
150195
end
151196

152197
state :typeparam do
153198
rule %r/[\s,]+/, Text
154199
rule %r/<[%:]|=>|>:|[#_\u21D2]|forSome|type/, Keyword
155200
rule %r/([\]\)\}])/, Operator, :pop!
156201
rule %r/[\(\[\{]/, Operator, :typeparam
157-
rule %r/\.|#{idrest}|#{op}+|`[^`]+`/, Keyword::Type
202+
rule %r/\.|#{idrest}|#{upper_idrest}|#{op}+|`[^`]+`/, Keyword::Type
158203
end
159204

160205
state :comment do
@@ -165,7 +210,12 @@ class Scala < RegexLexer
165210
end
166211

167212
state :import do
168-
rule %r((#{idrest}|\.)+), Name::Namespace, :pop!
213+
# Handle 'as' imports with optional whitespace
214+
rule %r/((#{idrest}|#{upper_idrest}|\.)+)(\s*)(as)(\s*)(#{idrest}|#{upper_idrest})/ do |m|
215+
groups Name::Namespace, Text, Keyword, Text, Name::Namespace
216+
pop!
217+
end
218+
rule %r/((#{idrest}|#{upper_idrest}|\.)+)/, Name::Namespace, :pop!
169219
end
170220
end
171221
end

spec/visual/samples/scala

Lines changed: 176 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package whatever.mine
22

33
import foo.bar.{Foo, Bar => Baz}
4-
4+
import baz.bar.{ Baz as Bar }
55
/* This file /* which is totally legal scala */ should be highlighted
66
correcty by rouge */
77

@@ -68,4 +68,179 @@ StorageState.table(StorageState.NewUsers()).format(
6868
keyParams('app_id).asInstanceOf[String]
6969
)
7070

71+
// Scala 3 Features
72+
73+
// Enums
74+
enum Color(val rgb: Int):
75+
case Red extends Color(0xFF0000)
76+
case Green extends Color(0x00FF00)
77+
case Blue extends Color(0x0000FF)
78+
case Yellow // simple case
79+
80+
// Given/Using clauses
81+
trait Ord[T]:
82+
def compare(x: T, y: T): Int
83+
extension (x: T)
84+
def < (y: T) = compare(x, y) < 0
85+
def > (y: T) = compare(x, y) > 0
86+
87+
given intOrd: Ord[Int] with
88+
def compare(x: Int, y: Int) =
89+
if x < y then -1 else if x > y then 1 else 0
90+
91+
given listOrd[T](using ord: Ord[T]): Ord[List[T]] with
92+
def compare(xs: List[T], ys: List[T]): Int =
93+
(xs, ys) match
94+
case (Nil, Nil) => 0
95+
case (Nil, _) => -1
96+
case (_, Nil) => 1
97+
case (x :: xt, y :: yt) =>
98+
val fst = ord.compare(x,y)
99+
if fst != 0 then fst else compare(xt, yt)
100+
101+
def sort[A](xs: List[A])(using Ord[A]): List[A] =
102+
xs.sorted // uses listOrd and intOrd implicitly for List[Int]
103+
104+
val sortedInts = sort(List(3_000, 1, -20_000_000))
105+
106+
// Extension Methods
107+
extension (s: String)
108+
def capitalized: String = s.toUpperCase
109+
def twice: String = s + " " + s
110+
111+
val greeting = "hello".capitalized.twice
112+
113+
// New Control Syntax (if/then/else, optional braces)
114+
def checkSign(x: Int): String =
115+
if x > 0 then
116+
"positive"
117+
else if x < 0 then
118+
"negative"
119+
else
120+
"zero"
121+
122+
var count = 3
123+
while count > 0 do
124+
println(s"Count is: $count") // String interpolation
125+
count -= 1
126+
127+
// Binary Literals
128+
val binary1 = 0b101010
129+
val binary2 = 0B0011_0011
130+
val binaryLong = 0b1111_0000_1111_0000L
131+
132+
// Multiline String Interpolation
133+
val name = "Scala 3"
134+
val version = 3.3
135+
val multilineInterpolated = s"""This is
136+
a multiline string
137+
for $name, version $version.
138+
A raw one: raw"""\u0041\n""" // raw interpolator
139+
"""
140+
141+
// Export Clauses
142+
class Service:
143+
def operation(x: Int): String = s"Result: ${x * 2}"
144+
145+
class Client(s: Service):
146+
export s.operation
147+
148+
val service = new Service
149+
val client = new Client(service)
150+
val clientResult = client.operation(101_010) // uses exported method
151+
152+
// Opaque Type Aliases
153+
opaque type Logarithm = Double
154+
155+
object Logarithm:
156+
def apply(d: Double): Logarithm = math.log(d)
157+
def toDouble(l: Logarithm): Double = math.exp(l)
158+
159+
extension (l: Logarithm)
160+
def + (other: Logarithm): Logarithm = Logarithm(toDouble(l) * toDouble(other))
161+
162+
val log2 = Logarithm(2.0)
163+
val log3 = Logarithm(3.0)
164+
val combinedLog = log2 + log3
165+
166+
// End markers
167+
def processList(items: List[String]): Unit =
168+
if items.nonEmpty then
169+
for item <- items do
170+
println(item)
171+
end for
172+
println("Done with items")
173+
end if
174+
println("List processing finished")
175+
end processList
176+
177+
val anIdentifier_with_op_! = 42
178+
71179
// Comment at EOF
180+
181+
// Match expressions with end marker
182+
def describe(x: Any): String =
183+
match x
184+
case i: Int if i > 0 => "positive number"
185+
case 0 => "zero"
186+
case s: String => s"string: $s"
187+
case _ => "something else"
188+
end match
189+
190+
type Resettable = { def reset(): Unit }
191+
type Growable[T] = { def add(t: T): Unit }
192+
193+
def combine[T](x: Resettable & Growable[T]): Unit = ???
194+
195+
def help(id: String | Int) =
196+
id match
197+
case s: String => println(s"String ID: $s")
198+
case i: Int => println(s"Int ID: $i")
199+
200+
trait Animal:
201+
def speak: String
202+
def eat(food: String): Unit
203+
204+
class Dog extends Animal:
205+
def speak = "Woof!"
206+
def eat(food: String) =
207+
println(s"Dog eating $food")
208+
end eat
209+
end Dog
210+
211+
import scala.compiletime.{error, requireConst}
212+
transparent inline def validate(inline str: String): Unit =
213+
requireConst(str)
214+
if str.isEmpty then
215+
error("Empty string not allowed")
216+
217+
type Executable[T] = ExecutionContext ?=> T
218+
def executeAsync(x: Int)(using ctx: ExecutionContext): Unit = ???
219+
220+
def generic[A, B](x: A, y: B) = (x, y)
221+
val result = generic[A = String, B = Int]("hello", 42)
222+
223+
// End markers for different constructs
224+
object ComplexExample:
225+
class Inner:
226+
def process(items: List[Int]): Int =
227+
if items.isEmpty then
228+
println("Empty list")
229+
0
230+
else
231+
var sum = 0
232+
for
233+
x <- items
234+
if x > 0
235+
y = x * 2
236+
do
237+
sum += y
238+
println(s"Processing $x")
239+
end for
240+
sum
241+
end if
242+
end process
243+
end Inner
244+
end ComplexExample
245+
246+
// Comment at EOF

0 commit comments

Comments
 (0)