Skip to content

Commit 58ce6c1

Browse files
RiskRunner0meta-codesync[bot]
authored andcommitted
Abstract parser behind ParseError and Parser trait
Summary: Extract the parser interface from module.rs into a clean abstraction layer. This prepares for swapping the parser implementation (e.g., to a recursive descent parser) without changing any calling code or test infrastructure. Changes: - `parse_error.rs`: parser-agnostic `ParseError` type that normalizes errors from any parser implementation into a common format (message + span). - `parser.rs`: defines the `Parser` trait + `Lexeme` alias. Each parser backend impls this trait. - `parser_lalrpop.rs`: `LalrpopParser` unit struct that impls `Parser`, wrapping the LALRPOP grammar invocation and converting its errors to `ParseError`. - `ast_load.rs`: renamed from the misnamed `parser.rs` (which only held `AstLoad`); frees the `parser.rs` name for the trait. - `module.rs`: `parse()` now calls `LalrpopParser::parse_module` through the trait. No more `lalrpop_util` import in module.rs. To add a new parser backend, drop in another module that impls `Parser` — all tests (including golden error message tests) run through the same `parse()` path. Reviewed By: JakobDegen Differential Revision: D98740934 fbshipit-source-id: b9671748cc6a9ee84b4e8a4e3a953ecd38cfeff3
1 parent bca718a commit 58ce6c1

6 files changed

Lines changed: 249 additions & 117 deletions

File tree

starlark_syntax/src/syntax.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,21 @@
1717

1818
//! The AST of Starlark as [`AstModule`], along with a [`parse`](AstModule::parse) function.
1919
20+
pub use ast_load::AstLoad;
2021
pub use module::AstModule;
21-
pub use parser::AstLoad;
2222

2323
pub use crate::dialect::Dialect;
2424
pub use crate::dialect::DialectTypes;
2525

2626
pub mod ast;
27+
pub mod ast_load;
2728
pub mod call;
2829
pub mod def;
2930
#[cfg(test)]
3031
mod grammar_tests;
3132
pub mod grammar_util;
3233
mod lint_suppressions;
3334
pub mod module;
34-
pub mod parser;
3535
pub mod payload_map;
3636
pub(crate) mod state;
3737
#[cfg(test)]
@@ -41,6 +41,10 @@ pub mod type_expr;
4141
pub mod uniplate;
4242
pub mod validate;
4343

44+
pub(crate) mod parse_error;
45+
pub(crate) mod parser;
46+
pub(crate) mod parser_lalrpop;
47+
4448
#[allow(clippy::all)]
4549
// Things we explicitly turn on need to be explicitly turned off
4650
#[allow(clippy::inefficient_to_string)]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright 2018 The Starlark in Rust Authors.
3+
* Copyright (c) Facebook, Inc. and its affiliates.
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* https://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
use starlark_map::small_map::SmallMap;
19+
20+
use crate::codemap::FileSpan;
21+
22+
/// A `load` statement loading zero or more symbols from another module.
23+
#[derive(Debug)]
24+
pub struct AstLoad<'a> {
25+
/// Span where this load is written
26+
pub span: FileSpan,
27+
/// Module being loaded
28+
pub module_id: &'a str,
29+
/// Symbols loaded from that module (local ident -> source ident)
30+
pub symbols: SmallMap<&'a str, &'a str>,
31+
}

starlark_syntax/src/syntax/module.rs

Lines changed: 5 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,17 @@
1616
*/
1717

1818
use std::collections::HashMap;
19-
use std::fmt::Write;
2019
use std::fs;
2120
use std::mem;
2221
use std::path::Path;
2322

2423
use derivative::Derivative;
2524
use dupe::Dupe;
26-
use lalrpop_util as lu;
2725

2826
use crate::codemap::CodeMap;
2927
use crate::codemap::FileSpan;
30-
use crate::codemap::Pos;
3128
use crate::codemap::Span;
3229
use crate::codemap::Spanned;
33-
use crate::eval_exception::EvalException;
3430
use crate::lexer::Lexer;
3531
use crate::lexer::Token;
3632
use crate::syntax::AstLoad;
@@ -43,69 +39,13 @@ use crate::syntax::ast::ExprP;
4339
use crate::syntax::ast::IdentP;
4440
use crate::syntax::ast::LoadArgP;
4541
use crate::syntax::ast::Stmt;
46-
use crate::syntax::grammar::StarlarkParser;
4742
use crate::syntax::lint_suppressions::LintSuppressions;
4843
use crate::syntax::lint_suppressions::LintSuppressionsBuilder;
44+
use crate::syntax::parser::Parser;
45+
use crate::syntax::parser_lalrpop::LalrpopParser;
4946
use crate::syntax::state::ParserState;
5047
use crate::syntax::validate::validate_module;
5148

52-
fn one_of(expected: &[String]) -> String {
53-
let mut result = String::new();
54-
for (i, e) in expected.iter().enumerate() {
55-
let sep = match i {
56-
0 => "one of",
57-
_ if i < expected.len() - 1 => ",",
58-
// Last expected message to be written
59-
_ => " or",
60-
};
61-
write!(result, "{sep} {e}").unwrap();
62-
}
63-
result
64-
}
65-
66-
/// Convert the error to a codemap diagnostic.
67-
///
68-
/// To build this diagnostic, the method needs the file span corresponding
69-
/// to the parsed file.
70-
fn parse_error_add_span(
71-
err: lu::ParseError<usize, Token, EvalException>,
72-
pos: usize,
73-
codemap: &CodeMap,
74-
) -> crate::Error {
75-
let (message, span) = match err {
76-
lu::ParseError::InvalidToken { location } => (
77-
"Parse error: invalid token".to_owned(),
78-
Span::new(Pos::new(location as u32), Pos::new(location as u32)),
79-
),
80-
lu::ParseError::UnrecognizedToken {
81-
token: (x, t, y),
82-
expected,
83-
} => (
84-
format!(
85-
"Parse error: unexpected {} here, expected {}",
86-
t,
87-
one_of(&expected)
88-
),
89-
Span::new(Pos::new(x as u32), Pos::new(y as u32)),
90-
),
91-
lu::ParseError::UnrecognizedEOF { .. } => (
92-
"Parse error: unexpected end of file".to_owned(),
93-
Span::new(Pos::new(pos as u32), Pos::new(pos as u32)),
94-
),
95-
lu::ParseError::ExtraToken { token: (x, t, y) } => (
96-
format!("Parse error: extraneous token {t}"),
97-
Span::new(Pos::new(x as u32), Pos::new(y as u32)),
98-
),
99-
lu::ParseError::User { error } => return error.into_error(),
100-
};
101-
102-
crate::Error::new_spanned(
103-
crate::ErrorKind::Parser(anyhow::anyhow!(message)),
104-
span,
105-
codemap,
106-
)
107-
}
108-
10949
/// A representation of a Starlark module abstract syntax tree.
11050
///
11151
/// Created with either [`parse`](AstModule::parse) or [`parse_file`](AstModule::parse_file),
@@ -218,7 +158,7 @@ impl AstModule {
218158
// Keep track of block of comments, used for accumulating lint suppressions
219159
let mut in_comment_block = false;
220160
let mut errors = Vec::new();
221-
match StarlarkParser::new().parse(
161+
match LalrpopParser::parse_module(
222162
&mut ParserState {
223163
codemap: &codemap,
224164
dialect,
@@ -239,6 +179,7 @@ impl AstModule {
239179
true
240180
}
241181
}),
182+
codemap.source().len(),
242183
) {
243184
Ok(v) => {
244185
if let Some(err) = errors.into_iter().next() {
@@ -252,7 +193,7 @@ impl AstModule {
252193
lint_suppressions_builder.build(),
253194
)?)
254195
}
255-
Err(p) => Err(parse_error_add_span(p, codemap.source().len(), &codemap)),
196+
Err(e) => Err(e.into_crate_error(&codemap)),
256197
}
257198
}
258199

@@ -374,11 +315,6 @@ impl AstModule {
374315

375316
#[cfg(test)]
376317
mod tests {
377-
use lalrpop_util as lu;
378-
379-
use super::parse_error_add_span;
380-
use crate::codemap::CodeMap;
381-
use crate::lexer::Token;
382318
use crate::slice_vec_ext::SliceExt;
383319
use crate::syntax::grammar_tests;
384320

@@ -394,39 +330,4 @@ mod tests {
394330
assert_eq!(&get("foo"), "1:1-4");
395331
assert_eq!(&get("foo\ndef x():\n pass"), "1:1-4 2:1-3:8 3:4-8");
396332
}
397-
398-
#[test]
399-
fn test_parse_error_add_span_bucket_mapping() {
400-
let codemap = CodeMap::new("test.bzl".to_owned(), "pass".to_owned());
401-
402-
let assert_parse_error = |parse_error, pos, want_message, want_span| {
403-
let err = parse_error_add_span(parse_error, pos, &codemap);
404-
assert_eq!(format!("{}", err.without_diagnostic()), want_message);
405-
assert_eq!(err.span().unwrap().to_string(), want_span);
406-
};
407-
408-
assert_parse_error(
409-
lu::ParseError::InvalidToken { location: 2 },
410-
4,
411-
"Parse error: invalid token",
412-
"test.bzl:1:3",
413-
);
414-
assert_parse_error(
415-
lu::ParseError::UnrecognizedEOF {
416-
location: 1,
417-
expected: vec![],
418-
},
419-
4,
420-
"Parse error: unexpected end of file",
421-
"test.bzl:1:5",
422-
);
423-
assert_parse_error(
424-
lu::ParseError::ExtraToken {
425-
token: (1, Token::ClosingRound, 2),
426-
},
427-
4,
428-
"Parse error: extraneous token symbol ')'",
429-
"test.bzl:1:2-3",
430-
);
431-
}
432333
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Copyright 2018 The Starlark in Rust Authors.
3+
* Copyright (c) Facebook, Inc. and its affiliates.
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* https://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
//! Parser-agnostic error type for parse failures.
19+
20+
use crate::codemap::CodeMap;
21+
use crate::codemap::Span;
22+
use crate::eval_exception::EvalException;
23+
24+
/// A parse error with a message and source span.
25+
/// This is the common error type returned by any parser implementation,
26+
/// independent of the underlying parsing strategy (LALRPOP, recursive descent, etc.).
27+
pub(crate) enum ParseError {
28+
/// An error with a message and span, to be rendered as a diagnostic.
29+
Error { message: String, span: Span },
30+
/// An error that already has full diagnostic information (e.g., from
31+
/// user-defined error callbacks in the parser state).
32+
EvalException(EvalException),
33+
}
34+
35+
impl ParseError {
36+
/// Convert this parse error into a `crate::Error` with source location.
37+
pub(crate) fn into_crate_error(self, codemap: &CodeMap) -> crate::Error {
38+
match self {
39+
ParseError::Error { message, span } => crate::Error::new_spanned(
40+
crate::ErrorKind::Parser(anyhow::anyhow!(message)),
41+
span,
42+
codemap,
43+
),
44+
ParseError::EvalException(e) => e.into_error(),
45+
}
46+
}
47+
}

starlark_syntax/src/syntax/parser.rs

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,28 @@
1515
* limitations under the License.
1616
*/
1717

18-
use starlark_map::small_map::SmallMap;
18+
//! Parser abstraction for Starlark.
19+
//!
20+
//! [`Parser`] is the common interface implemented by each parser backend.
21+
//! Today the only impl is [`super::parser_lalrpop::LalrpopParser`]; additional
22+
//! impls (e.g. recursive descent) plug in here.
1923
20-
use crate::codemap::FileSpan;
24+
use crate::eval_exception::EvalException;
25+
use crate::lexer::Token;
26+
use crate::syntax::ast::AstStmt;
27+
use crate::syntax::parse_error::ParseError;
28+
use crate::syntax::state::ParserState;
2129

22-
/// A `load` statement loading zero or more symbols from another module.
23-
#[derive(Debug)]
24-
pub struct AstLoad<'a> {
25-
/// Span where this load is written
26-
pub span: FileSpan,
27-
/// Module being loaded
28-
pub module_id: &'a str,
29-
/// Symbols loaded from that module (local ident -> source ident)
30-
pub symbols: SmallMap<&'a str, &'a str>,
30+
pub(crate) type Lexeme = Result<(usize, Token, usize), EvalException>;
31+
32+
/// Parse a Starlark module from a token stream.
33+
///
34+
/// Implementors normalize backend-specific errors into [`ParseError`] so
35+
/// callers can render diagnostics independently of the parser in use.
36+
pub(crate) trait Parser {
37+
fn parse_module<I: Iterator<Item = Lexeme>>(
38+
state: &mut ParserState<'_>,
39+
tokens: I,
40+
eof_pos: usize,
41+
) -> Result<AstStmt, ParseError>;
3142
}

0 commit comments

Comments
 (0)