1+ # -*- coding: utf-8 -*- #
2+ # frozen_string_literal: true
3+
4+ #
5+ # Rouge Lexer for Ruchy Programming Language
6+ # RSYN-0404: Rouge grammar for GitHub/GitLab syntax highlighting
7+ #
8+ # @fileoverview Ruchy language support for Rouge (GitHub/GitLab highlighter)
9+ # @version 1.0.0
10+ # @license MIT
11+ #
12+ # Quality Requirements:
13+ # - Test Coverage: ≥80%
14+ # - Cyclomatic Complexity: ≤20
15+ # - Performance: <25ms for 50K lines
16+ #
17+
18+ module Rouge
19+ module Lexers
20+ # Rouge lexer for the Ruchy programming language
21+ #
22+ # Ruchy is a systems programming language with built-in actor model support
23+ # and pipeline operators for functional programming.
24+ class Ruchy < RegexLexer
25+ title "Ruchy"
26+ desc "The Ruchy programming language (ruchy-lang.org)"
27+
28+ tag 'ruchy'
29+ aliases 'rhy'
30+ filenames '*.rhy' , '*.ruchy'
31+ mimetypes 'text/x-ruchy' , 'application/x-ruchy'
32+
33+ # Define keyword categories for better organization
34+ KEYWORDS = %w[
35+ fn let mut const static struct enum trait impl type mod use
36+ if else match case for while loop break continue return
37+ pub async await unsafe extern move ref box
38+ actor spawn send
39+ self Self super crate as in where
40+ ] . freeze
41+
42+ BUILTIN_TYPES = %w[
43+ bool char str
44+ i8 i16 i32 i64 i128 isize
45+ u8 u16 u32 u64 u128 usize
46+ f32 f64
47+ String Vec HashMap HashSet Result Option Box Rc Arc
48+ Some None Ok Err
49+ ] . freeze
50+
51+ LITERALS = %w[ true false ] . freeze
52+
53+ # Main tokenization state
54+ state :root do
55+ rule %r/\s +/ , Text
56+
57+ # Shebang line
58+ rule %r/^#!.*$/ , Comment ::Hashbang
59+
60+ # Documentation comments (/// or /** */)
61+ rule %r{///.*$} , Comment ::Doc
62+ rule %r{/\* \* .*?\* /}m , Comment ::Doc
63+
64+ # Regular comments with SATD detection
65+ rule %r{//.*$} do |m |
66+ if m [ 0 ] =~ /\b (?:TODO|FIXME|NOTE|HACK|XXX|BUG|DEBT|WORKAROUND)\b /
67+ token Comment ::Special
68+ else
69+ token Comment ::Single
70+ end
71+ end
72+
73+ rule %r{/\* } do
74+ token Comment ::Multiline
75+ push :comment
76+ end
77+
78+ # Attributes (#[...] or #![...])
79+ rule %r{#!?\[ [^\] ]*\] } , Comment ::Preproc
80+
81+ # Lifetimes ('static, 'a, etc.)
82+ rule %r{'[a-z_]\w *} , Name ::Label
83+
84+ # Raw strings (r"..." or r#"..."#)
85+ rule %r{r#*"} do |m |
86+ @string_delim = m [ 0 ]
87+ @hash_count = m [ 0 ] . count ( '#' )
88+ token Str ::Other
89+ push :raw_string
90+ end
91+
92+ # Regular strings with interpolation support
93+ rule %r{"} do
94+ token Str ::Double
95+ push :string
96+ end
97+
98+ # Character literals
99+ rule %r{'(?:[^'\\ ]|\\ .)'} , Str ::Char
100+
101+ # Numeric literals
102+ # Binary literals
103+ rule %r{0b[01_]+(?:[iu](?:8|16|32|64|128|size))?} , Num ::Bin
104+
105+ # Octal literals
106+ rule %r{0o[0-7_]+(?:[iu](?:8|16|32|64|128|size))?} , Num ::Oct
107+
108+ # Hexadecimal literals
109+ rule %r{0x[0-9a-fA-F_]+(?:[iu](?:8|16|32|64|128|size))?} , Num ::Hex
110+
111+ # Float literals
112+ rule %r{\d [\d _]*\. [\d _]*(?:[eE][+-]?[\d _]+)?(?:f32|f64)?} , Num ::Float
113+ rule %r{\d [\d _]*(?:[eE][+-]?[\d _]+)(?:f32|f64)?} , Num ::Float
114+ rule %r{\d [\d _]*(?:f32|f64)} , Num ::Float
115+
116+ # Integer literals with type suffixes
117+ rule %r{\d [\d _]*(?:[iu](?:8|16|32|64|128|size))?} , Num ::Integer
118+
119+ # Pipeline operator (Ruchy-specific)
120+ rule %r{>>} , Operator
121+
122+ # Actor operators (Ruchy-specific)
123+ rule %r{<-|<\? } , Operator
124+
125+ # Other operators
126+ rule %r{[=!<>+\- */%&|^~:?]+} , Operator
127+ rule %r{\. \. =?} , Operator
128+ rule %r{=>} , Operator
129+ rule %r{->} , Operator
130+ rule %r{::} , Operator
131+
132+ # Macro invocations (identifier!)
133+ rule %r{[a-zA-Z_]\w *!} do |m |
134+ token Name ::Builtin
135+ end
136+
137+ # Function definitions
138+ rule %r{(fn)\s +([a-zA-Z_]\w *)} do |m |
139+ groups Keyword , Name ::Function
140+ end
141+
142+ # Actor definitions (Ruchy-specific)
143+ rule %r{(actor)\s +([A-Z]\w *)} do |m |
144+ groups Keyword , Name ::Class
145+ end
146+
147+ # Type definitions
148+ rule %r{(struct|enum|trait|type)\s +([A-Z]\w *)} do |m |
149+ groups Keyword , Name ::Class
150+ end
151+
152+ # Keywords
153+ rule %r{\b (?:#{ KEYWORDS . join ( '|' ) } )\b } , Keyword
154+
155+ # Built-in types
156+ rule %r{\b (?:#{ BUILTIN_TYPES . join ( '|' ) } )\b } , Keyword ::Type
157+
158+ # Literals
159+ rule %r{\b (?:#{ LITERALS . join ( '|' ) } )\b } , Keyword ::Constant
160+
161+ # Type names (PascalCase identifiers)
162+ rule %r{[A-Z]\w *} , Name ::Class
163+
164+ # Regular identifiers
165+ rule %r{[a-z_]\w *} , Name
166+
167+ # Delimiters
168+ rule %r{[{}()\[ \] ;,.]} , Punctuation
169+
170+ # Generic brackets
171+ rule %r{<} , Punctuation , :generic
172+ rule %r{>} , Error # Unmatched >
173+ end
174+
175+ # Comment state for nested block comments
176+ state :comment do
177+ rule %r{/\* } , Comment ::Multiline , :comment
178+ rule %r{\* /} , Comment ::Multiline , :pop!
179+
180+ # SATD keyword detection in comments
181+ rule %r{\b (?:TODO|FIXME|NOTE|HACK|XXX|BUG|DEBT|WORKAROUND)\b } , Comment ::Special
182+ rule %r{[^/*]+} , Comment ::Multiline
183+ rule %r{[/*]} , Comment ::Multiline
184+ end
185+
186+ # Raw string state
187+ state :raw_string do
188+ rule %r{"#{ Regexp . escape ( '#' * @hash_count ) } } do
189+ token Str ::Other
190+ pop!
191+ end
192+ rule %r{[^"]+} , Str ::Other
193+ rule %r{"} , Str ::Other
194+ end
195+
196+ # Regular string state with interpolation
197+ state :string do
198+ rule %r{"} , Str ::Double , :pop!
199+ rule %r{\\ [\\ '"nrt0]} , Str ::Escape
200+ rule %r{\\ x[0-9a-fA-F]{2}} , Str ::Escape
201+ rule %r{\\ u\{ [0-9a-fA-F]{1,6}\} } , Str ::Escape
202+ rule %r{\\ .} , Str ::Escape # Invalid escape
203+
204+ # String interpolation (${...})
205+ rule %r{\$ \{ } do
206+ token Str ::Interpol
207+ push :interpolation
208+ end
209+
210+ rule %r{[^"\\ $]+} , Str ::Double
211+ rule %r{\$ } , Str ::Double
212+ end
213+
214+ # String interpolation state
215+ state :interpolation do
216+ rule %r{\} } , Str ::Interpol , :pop!
217+
218+ # Nested braces tracking
219+ rule %r{\{ } , Punctuation , :interpolation
220+
221+ # Include most root rules inside interpolation
222+ rule %r{[a-zA-Z_]\w *} , Name
223+ rule %r{\d +} , Num ::Integer
224+ rule %r{[+\- */]} , Operator
225+ rule %r{[()]} , Punctuation
226+ rule %r{\s +} , Text
227+ rule %r{[^} ] +} , Text
228+ end
229+
230+ # Generic type parameters state
231+ state :generic do
232+ rule %r{>} , Punctuation , :pop!
233+ rule %r{<} , Punctuation , :generic
234+ rule %r{[A-Z]\w *} , Name ::Class
235+ rule %r{[a-z_]\w *} , Name
236+ rule %r{'[a-z_]\w *} , Name ::Label # lifetimes
237+ rule %r{,\s *} , Punctuation
238+ rule %r{\s +} , Text
239+ rule %r{::} , Operator
240+ rule %r{where\b } , Keyword
241+ rule %r{[+]} , Operator
242+ rule %r{[^<>,+]+} , Name
243+ end
244+
245+ # Preprocessing step for better tokenization
246+ def self . analyze_text ( text )
247+ # Look for Ruchy-specific constructs
248+ return 0.3 if text . include? ( 'actor ' )
249+ return 0.2 if text . include? ( 'spawn ' )
250+ return 0.2 if text . include? ( ' >> ' )
251+ return 0.1 if text . include? ( ' <- ' )
252+ return 0.1 if text =~ /fn\s +\w +/
253+ return 0.1 if text . include? ( '#[' )
254+ return 0.0
255+ end
256+ end
257+ end
258+ end
0 commit comments