@@ -118,6 +118,33 @@ let lident = [%sedlex.regexp?
118118 (Sub (xid_start , lu ) | '_' , Star (xid_continue ))
119119];
120120
121+ let operator_char = [% sedlex . regexp ?
122+ '$' | '&' | '*' | '/' | '+' | '-' | '=' | '>' | '<' | '^' | '|' | '!' | '?' |
123+ '%' |
124+ ':' |
125+ '.'
126+ ];
127+ let operator_chars = [% sedlex . regexp ? Star (operator_char )];
128+
129+ // We make sure that `>` operators contain at least one non-`>` char to not
130+ // confuse them for `>>>` chains at the end of types, e.g. `List<Option<Box<a>>>`
131+ let rcaret_operator_char = [% sedlex . regexp ? Sub (operator_char , '>' )];
132+ let rcaret_operator_chars = [% sedlex . regexp ?
133+ (operator_chars , rcaret_operator_char , operator_chars )
134+ ];
135+
136+ // Similarly, we do this for `<` even though it's a simpler case
137+ let lcaret_operator_char = [% sedlex . regexp ? Sub (operator_char , '<' )];
138+ let lcaret_operator_chars = [% sedlex . regexp ?
139+ (operator_chars , lcaret_operator_char , operator_chars )
140+ ];
141+
142+ // Infix operators are not allowed to start with `//` or `/*` as they
143+ // indicate comments
144+ let slash_operator_chars = [% sedlex . regexp ?
145+ (Sub (operator_char , '/' | '*' ), operator_chars )
146+ ];
147+
121148// Tabs and space separators (https://www.compart.com/en/unicode/category/Zs)
122149let blank = [% sedlex . regexp ? Plus (zs | '\t' )];
123150
@@ -203,6 +230,10 @@ let rec token = lexbuf => {
203230 | "export" => positioned(EXPORT )
204231 | "except" => positioned(EXCEPT )
205232 | "from" => positioned(FROM )
233+ | "*" => positioned(STAR )
234+ | "/" => positioned(SLASH )
235+ | "|" => positioned(PIPE )
236+ | "-" => positioned(DASH )
206237 | "->" => positioned(ARROW )
207238 | "=>" => positioned(THICKARROW )
208239 | "type" => positioned(TYPE )
@@ -223,9 +254,6 @@ let rec token = lexbuf => {
223254 | "::" => positioned(COLONCOLON )
224255 | ":=" => positioned(GETS )
225256 | ":" => positioned(COLON )
226- | "is" => positioned(IS )
227- | "isnt" => positioned(ISNT )
228- | "==" => positioned(EQEQ )
229257 | "=" => positioned(EQUAL )
230258 | "," => positioned(COMMA )
231259 | ";" => positioned(SEMI )
@@ -237,32 +265,44 @@ let rec token = lexbuf => {
237265 | "[" => positioned(LBRACK )
238266 | "[>" => positioned(LBRACKRCARET )
239267 | "]" => positioned(RBRACK )
240- | "^" => positioned(CARET )
241268 | "<" => positioned(LCARET )
242- | "<<" => positioned(LCARETLCARET )
243269 /* We do not lex >> or >>> as a single token as type vectors can contain
244- these, e.g. List<Option<a>> */
270+ these, e.g. List<Option<a>>. An operator like `>>>>` is lexed as four
271+ seperate tokens and the parser sorts it out. */
245272 | ">" => positioned(RCARET )
246- | "^" => positioned(CARET )
247- | "++" => positioned(PLUSPLUS )
248- | "+" => positioned(PLUS )
249- | "-" => positioned(DASH )
250- | "*" => positioned(STAR )
251- | "/" => positioned(SLASH )
252- | "% " => positioned(PERCENT )
253- | "+=" => positioned(PLUSEQ )
254- | "-=" => positioned(DASHEQ )
255- | "*=" => positioned(STAREQ )
256- | "/=" => positioned(SLASHEQ )
257- | "% =" => positioned(PERCENTEQ )
258- | "<=" => positioned(LESSEQ )
259- | ">=" => positioned(GREATEREQ )
260- | "&" => positioned(AMP )
261- | "&&" => positioned(AMPAMP )
262- | "|" => positioned(PIPE )
263- | "||" => positioned(PIPEPIPE )
264- | "!" => positioned(NOT )
265- | "!=" => positioned(NOTEQ )
273+ /* The order of these is somewhat important and is why they are
274+ not sorted by precedence */
275+ | "+="
276+ | "-="
277+ | "*="
278+ | "/="
279+ | "% =" =>
280+ positioned(INFIX_ASSIGNMENT_10 (Sedlexing . Utf8 . sub_lexeme(lexbuf, 0 , 1 )))
281+ | ("==" | "!=" , operator_chars )
282+ | "is"
283+ | "isnt" => positioned(INFIX_80 (Sedlexing . Utf8 . lexeme(lexbuf)))
284+ | ("<<" , operator_chars )
285+ | (">>" , rcaret_operator_chars ) =>
286+ positioned(INFIX_100 (Sedlexing . Utf8 . lexeme(lexbuf)))
287+ | ("<" , lcaret_operator_chars )
288+ | (">" , rcaret_operator_chars ) =>
289+ positioned(INFIX_90 (Sedlexing . Utf8 . lexeme(lexbuf)))
290+ | ("^" , operator_chars ) =>
291+ positioned(INFIX_60 (Sedlexing . Utf8 . lexeme(lexbuf)))
292+ | ("+" | "-" , operator_chars ) =>
293+ positioned(INFIX_110 (Sedlexing . Utf8 . lexeme(lexbuf)))
294+ | ("*" | "% " , operator_chars )
295+ | ("/" , slash_operator_chars ) =>
296+ positioned(INFIX_120 (Sedlexing . Utf8 . lexeme(lexbuf)))
297+ | ("&&" , operator_chars ) =>
298+ positioned(INFIX_40 (Sedlexing . Utf8 . lexeme(lexbuf)))
299+ | ("&" , operator_chars ) =>
300+ positioned(INFIX_70 (Sedlexing . Utf8 . lexeme(lexbuf)))
301+ | ("||" | "??" , operator_chars ) =>
302+ positioned(INFIX_30 (Sedlexing . Utf8 . lexeme(lexbuf)))
303+ | ("|" , operator_chars ) =>
304+ positioned(INFIX_50 (Sedlexing . Utf8 . lexeme(lexbuf)))
305+ | "!" => positioned(PREFIX_150 (Sedlexing . Utf8 . lexeme(lexbuf)))
266306 | "@" => positioned(AT )
267307 | '"' =>
268308 let (start_p , _ ) = Sedlexing . lexing_positions(lexbuf);
0 commit comments