-
-
Notifications
You must be signed in to change notification settings - Fork 898
implement lenient parser #2129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
implement lenient parser #2129
Changes from 5 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
42e35c5
move query parser to nom
trinity-1686a bac9236
add suupport for term grouping
trinity-1686a 3fdcc04
initial work on infallible parser
trinity-1686a a50f6e6
fmt
trinity-1686a f0fb438
add tests and fix minor parsing bugs
trinity-1686a 895fe26
address review comments
trinity-1686a c4cb0a5
add support for lenient queries in tantivy
trinity-1686a 0a575fb
clippy
trinity-1686a 29f2901
actually return errors in lenient queries
trinity-1686a d24ddf0
rustfmt
trinity-1686a 86b63db
make lenient parser report errors
trinity-1686a f7ed0b7
allow mixing occur and bool in query
trinity-1686a File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,286 @@ | ||
| //! nom combinators for infallible operations | ||
|
|
||
| use nom::{IResult, InputLength}; | ||
| pub type ErrorList = Vec<(usize, String)>; | ||
| pub type JResult<I, O> = IResult<I, (O, ErrorList), std::convert::Infallible>; | ||
|
|
||
| // when rfcs#1733 get stabilized, this can make things clearer | ||
| // trait InfallibleParser<I, O> = nom::Parser<I, (O, ErrorList), std::convert::Infallible>; | ||
|
|
||
| // TODO space0 and space1: space0 can't fail, space1 can parse nothing but reports missing space | ||
|
|
||
| /// A variant of the classical `opt` parser, except it returns an infallible error type. | ||
| /// | ||
| /// It's less generic than the original to ease type resolution in the rest of the code. | ||
| pub fn opt_i<I: Clone, O, F>(mut f: F) -> impl FnMut(I) -> JResult<I, Option<O>> | ||
| where F: nom::Parser<I, O, nom::error::Error<I>> { | ||
| move |input: I| { | ||
| let i = input.clone(); | ||
| match f.parse(input) { | ||
| Ok((i, o)) => Ok((i, (Some(o), Vec::new()))), | ||
| Err(_) => Ok((i, (None, Vec::new()))), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| pub fn fallible<I, O, E: nom::error::ParseError<I>, F>( | ||
| mut f: F, | ||
| ) -> impl FnMut(I) -> IResult<I, O, E> | ||
| where F: nom::Parser<I, (O, ErrorList), std::convert::Infallible> { | ||
| use nom::Err; | ||
| move |input: I| match f.parse(input) { | ||
| Ok((input, (output, _err))) => Ok((input, output)), | ||
| Err(Err::Incomplete(needed)) => Err(Err::Incomplete(needed)), | ||
| Err(Err::Error(val)) | Err(Err::Failure(val)) => match val {}, | ||
| } | ||
| } | ||
|
|
||
| pub fn delimited_infallible<I, O1, O2, O3, F, G, H>( | ||
| mut first: F, | ||
| mut second: G, | ||
| mut third: H, | ||
| ) -> impl FnMut(I) -> JResult<I, O2> | ||
| where | ||
| F: nom::Parser<I, (O1, ErrorList), std::convert::Infallible>, | ||
| G: nom::Parser<I, (O2, ErrorList), std::convert::Infallible>, | ||
| H: nom::Parser<I, (O3, ErrorList), std::convert::Infallible>, | ||
| { | ||
| move |input: I| { | ||
| let (input, (_, mut err)) = first.parse(input)?; | ||
| let (input, (o2, mut err2)) = second.parse(input)?; | ||
| err.append(&mut err2); | ||
| let (input, (_, mut err3)) = third.parse(input)?; | ||
| err.append(&mut err3); | ||
| Ok((input, (o2, err))) | ||
| } | ||
| } | ||
|
|
||
| // Parse nothing. Just a lazy way to not implement terminated/preceded and use delimited instead | ||
| pub fn nothing(i: &str) -> JResult<&str, ()> { | ||
| Ok((i, ((), Vec::new()))) | ||
| } | ||
|
|
||
| pub trait TupleInfallible<I, O> { | ||
| /// Parses the input and returns a tuple of results of each parser. | ||
| fn parse(&mut self, input: I) -> JResult<I, O>; | ||
| } | ||
|
|
||
| impl<Input, Output, F: nom::Parser<Input, (Output, ErrorList), std::convert::Infallible>> | ||
| TupleInfallible<Input, (Output,)> for (F,) | ||
| { | ||
| fn parse(&mut self, input: Input) -> JResult<Input, (Output,)> { | ||
| self.0.parse(input).map(|(i, (o, e))| (i, ((o,), e))) | ||
| } | ||
| } | ||
|
|
||
| // these macros are heavily copied from nom, with some minor adaptations for our type | ||
| macro_rules! tuple_trait( | ||
| ($name1:ident $ty1:ident, $name2: ident $ty2:ident, $($name:ident $ty:ident),*) => ( | ||
| tuple_trait!(__impl $name1 $ty1, $name2 $ty2; $($name $ty),*); | ||
| ); | ||
| (__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident, $($name2:ident $ty2:ident),*) => ( | ||
| tuple_trait_impl!($($name $ty),+); | ||
| tuple_trait!(__impl $($name $ty),+ , $name1 $ty1; $($name2 $ty2),*); | ||
| ); | ||
| (__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident) => ( | ||
| tuple_trait_impl!($($name $ty),+); | ||
| tuple_trait_impl!($($name $ty),+, $name1 $ty1); | ||
| ); | ||
| ); | ||
|
|
||
| macro_rules! tuple_trait_impl( | ||
| ($($name:ident $ty: ident),+) => ( | ||
| impl< | ||
| Input: Clone, $($ty),+ , | ||
| $($name: nom::Parser<Input, ($ty, ErrorList), std::convert::Infallible>),+ | ||
| > TupleInfallible<Input, ( $($ty),+ )> for ( $($name),+ ) { | ||
|
|
||
| fn parse(&mut self, input: Input) -> JResult<Input, ( $($ty),+ )> { | ||
| let mut error_list = Vec::new(); | ||
| tuple_trait_inner!(0, self, input, (), error_list, $($name)+) | ||
| } | ||
| } | ||
| ); | ||
| ); | ||
|
|
||
| macro_rules! tuple_trait_inner( | ||
| ($it:tt, $self:expr, $input:expr, (), $error_list:expr, $head:ident $($id:ident)+) => ({ | ||
| let (i, (o, mut err)) = $self.$it.parse($input.clone())?; | ||
| $error_list.append(&mut err); | ||
|
|
||
| succ!($it, tuple_trait_inner!($self, i, ( o ), $error_list, $($id)+)) | ||
| }); | ||
| ($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident $($id:ident)+) => ({ | ||
| let (i, (o, mut err)) = $self.$it.parse($input.clone())?; | ||
| $error_list.append(&mut err); | ||
|
|
||
| succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $error_list, $($id)+)) | ||
| }); | ||
| ($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident) => ({ | ||
| let (i, (o, mut err)) = $self.$it.parse($input.clone())?; | ||
| $error_list.append(&mut err); | ||
|
|
||
| Ok((i, (($($parsed)* , o), $error_list))) | ||
| }); | ||
| ); | ||
|
|
||
| macro_rules! succ ( | ||
| (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*)); | ||
| (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*)); | ||
| (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*)); | ||
| (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*)); | ||
| (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*)); | ||
| (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*)); | ||
| (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*)); | ||
| (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*)); | ||
| (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*)); | ||
| (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*)); | ||
| (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*)); | ||
| (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*)); | ||
| (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*)); | ||
| (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*)); | ||
| (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*)); | ||
| (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*)); | ||
| (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*)); | ||
| (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*)); | ||
| (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*)); | ||
| (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*)); | ||
| (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*)); | ||
| ); | ||
|
|
||
| tuple_trait!(FnA A, FnB B, FnC C, FnD D, FnE E, FnF F, FnG G, FnH H, FnI I, FnJ J, FnK K, FnL L, | ||
| FnM M, FnN N, FnO O, FnP P, FnQ Q, FnR R, FnS S, FnT T, FnU U); | ||
|
|
||
| // Special case: implement `TupleInfallible` for `()`, the unit type. | ||
| // This can come up in macros which accept a variable number of arguments. | ||
| // Literally, `()` is an empty tuple, so it should simply parse nothing. | ||
| impl<I> TupleInfallible<I, ()> for () { | ||
| fn parse(&mut self, input: I) -> JResult<I, ()> { | ||
| Ok((input, ((), Vec::new()))) | ||
| } | ||
| } | ||
|
|
||
| pub fn tuple_infallible<I, O, List: TupleInfallible<I, O>>( | ||
| mut l: List, | ||
| ) -> impl FnMut(I) -> JResult<I, O> { | ||
| move |i: I| l.parse(i) | ||
| } | ||
|
|
||
| pub fn separated_list_infallible<I, O, O2, F, G>( | ||
| mut sep: G, | ||
| mut f: F, | ||
| ) -> impl FnMut(I) -> JResult<I, Vec<O>> | ||
| where | ||
| I: Clone + InputLength, | ||
| F: nom::Parser<I, (O, ErrorList), std::convert::Infallible>, | ||
| G: nom::Parser<I, (O2, ErrorList), std::convert::Infallible>, | ||
| { | ||
| move |mut i: I| { | ||
| let mut res = Vec::new(); | ||
| let mut errors = Vec::new(); | ||
|
trinity-1686a marked this conversation as resolved.
Outdated
|
||
|
|
||
| match f.parse(i.clone()) { | ||
| Err(_) => unreachable!(), | ||
| Ok((i1, (o, mut err))) => { | ||
| errors.append(&mut err); | ||
| res.push(o); | ||
| i = i1; | ||
| } | ||
| } | ||
|
|
||
| loop { | ||
| let len = i.input_len(); | ||
| match sep.parse(i.clone()) { | ||
| Err(_) => unreachable!(), | ||
| Ok((i1, (_, mut err))) => { | ||
|
trinity-1686a marked this conversation as resolved.
Outdated
|
||
| errors.append(&mut err); | ||
|
|
||
| match f.parse(i1.clone()) { | ||
| Err(_) => unreachable!(), | ||
|
trinity-1686a marked this conversation as resolved.
Outdated
|
||
| Ok((i2, (o, mut err))) => { | ||
| // infinite loop check: the parser must always consume | ||
| // if we consumed nothing here, don't produce an element. | ||
| if i2.input_len() == len { | ||
| return Ok((i1, (res, errors))); | ||
| } | ||
| res.push(o); | ||
| errors.append(&mut err); | ||
| i = i2; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| pub trait Alt<I, O> { | ||
| /// Tests each parser in the tuple and returns the result of the first one that succeeds | ||
| fn choice(&mut self, input: I) -> Option<JResult<I, O>>; | ||
| } | ||
|
|
||
| macro_rules! alt_trait( | ||
| ($first_cond:ident $first:ident, $($id_cond:ident $id: ident),+) => ( | ||
| alt_trait!(__impl $first_cond $first; $($id_cond $id),+); | ||
| ); | ||
| (__impl $($current_cond:ident $current:ident),*; $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => ( | ||
| alt_trait_impl!($($current_cond $current),*); | ||
|
|
||
| alt_trait!(__impl $($current_cond $current,)* $head_cond $head; $($id_cond $id),+); | ||
| ); | ||
| (__impl $($current_cond:ident $current:ident),*; $head_cond:ident $head:ident) => ( | ||
| alt_trait_impl!($($current_cond $current),*); | ||
| alt_trait_impl!($($current_cond $current,)* $head_cond $head); | ||
| ); | ||
| ); | ||
|
|
||
| macro_rules! alt_trait_impl( | ||
| ($($id_cond:ident $id:ident),+) => ( | ||
| impl< | ||
| Input: Clone, Output, | ||
| $( | ||
| // () are to make things easier on me, but I'm not entirely sure whether we can do better | ||
| // with rule E0207 | ||
| $id_cond: nom::Parser<Input, (), ()>, | ||
| $id: nom::Parser<Input, (Output, ErrorList), std::convert::Infallible> | ||
| ),+ | ||
| > Alt<Input, Output> for ( $(($id_cond, $id),)+ ) { | ||
|
|
||
| fn choice(&mut self, input: Input) -> Option<JResult<Input, Output>> { | ||
| match self.0.0.parse(input.clone()) { | ||
| Err(_) => alt_trait_inner!(1, self, input, $($id_cond $id),+), | ||
| Ok((i1, _)) => Some(self.0.1.parse(i1)), | ||
| } | ||
| } | ||
| } | ||
| ); | ||
| ); | ||
|
|
||
| macro_rules! alt_trait_inner( | ||
| ($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => ( | ||
| match $self.$it.0.parse($input.clone()) { | ||
| Err(_) => succ!($it, alt_trait_inner!($self, $input, $($id_cond $id),+)), | ||
| Ok((i1, _)) => Some($self.$it.1.parse(i1)), | ||
| } | ||
| ); | ||
| ($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident) => ( | ||
| None | ||
| ); | ||
| ); | ||
|
|
||
| alt_trait!(A1 A, B1 B, C1 C, D1 D, E1 E, F1 F, G1 G, H1 H, I1 I, J1 J, K1 K, | ||
| L1 L, M1 M, N1 N, O1 O, P1 P, Q1 Q, R1 R, S1 S, T1 T, U1 U); | ||
|
|
||
| /// An alt() like combinator. For each branch, it first tries a fallible parser, which commits to | ||
| /// this branch, or tells to check next branch, and the execute the infallible parser which follow. | ||
| /// | ||
| /// In case no branch match, the default (fallible) parser is executed. | ||
| pub fn alt_infallible<I: Clone, O, F, List: Alt<I, O>>( | ||
| mut l: List, | ||
| mut default: F, | ||
| ) -> impl FnMut(I) -> JResult<I, O> | ||
| where | ||
| F: nom::Parser<I, (O, ErrorList), std::convert::Infallible>, | ||
| { | ||
| move |i: I| l.choice(i.clone()).unwrap_or_else(|| default.parse(i)) | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,19 +1,25 @@ | ||
| #![allow(clippy::derive_partial_eq_without_eq)] | ||
|
|
||
| mod infallible; | ||
| mod occur; | ||
| mod query_grammar; | ||
| mod user_input_ast; | ||
| use combine::parser::Parser; | ||
|
|
||
| pub use crate::occur::Occur; | ||
| use crate::query_grammar::parse_to_ast; | ||
| use crate::query_grammar::{parse_to_ast, parse_to_ast_lenient}; | ||
| pub use crate::user_input_ast::{ | ||
| Delimiter, UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral, | ||
| }; | ||
|
|
||
| pub struct Error; | ||
|
|
||
| /// Parse a query | ||
| pub fn parse_query(query: &str) -> Result<UserInputAst, Error> { | ||
| let (user_input_ast, _remaining) = parse_to_ast().parse(query).map_err(|_| Error)?; | ||
| let (_remaining, user_input_ast) = parse_to_ast(query).map_err(|_| Error)?; | ||
| Ok(user_input_ast) | ||
| } | ||
|
|
||
| /// Parse a query, trying to recover from syntax errors, and giving hints toward fixing errors. | ||
| pub fn parse_query_leniet(query: &str) -> (UserInputAst, Vec<(usize, String)>) { | ||
|
trinity-1686a marked this conversation as resolved.
Outdated
|
||
| parse_to_ast_lenient(query) | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.