Skip to content
4 changes: 1 addition & 3 deletions query-grammar/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,4 @@ keywords = ["search", "information", "retrieval"]
edition = "2021"

[dependencies]
combine = {version="4", default-features=false, features=[] }
once_cell = "1.7.2"
regex ={ version = "1.5.4", default-features = false, features = ["std", "unicode"] }
nom = "7"
286 changes: 286 additions & 0 deletions query-grammar/src/infallible.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
//! nom combinators for infallible operations

use nom::{IResult, InputLength};
pub type ErrorList = Vec<(usize, String)>;
Comment thread
trinity-1686a marked this conversation as resolved.
Outdated
pub type JResult<I, O> = IResult<I, (O, ErrorList), std::convert::Infallible>;

// when rfcs#1733 get stabilized, this can make things clearer
// trait InfallibleParser<I, O> = nom::Parser<I, (O, ErrorList), std::convert::Infallible>;

// TODO space0 and space1: space0 can't fail, space1 can parse nothing but reports missing space

/// A variant of the classical `opt` parser, except it returns an infallible error type.
///
/// It's less generic than the original to ease type resolution in the rest of the code.
pub fn opt_i<I: Clone, O, F>(mut f: F) -> impl FnMut(I) -> JResult<I, Option<O>>
where F: nom::Parser<I, O, nom::error::Error<I>> {
move |input: I| {
let i = input.clone();
match f.parse(input) {
Ok((i, o)) => Ok((i, (Some(o), Vec::new()))),
Err(_) => Ok((i, (None, Vec::new()))),
}
}
}

pub fn fallible<I, O, E: nom::error::ParseError<I>, F>(
mut f: F,
) -> impl FnMut(I) -> IResult<I, O, E>
where F: nom::Parser<I, (O, ErrorList), std::convert::Infallible> {
use nom::Err;
move |input: I| match f.parse(input) {
Ok((input, (output, _err))) => Ok((input, output)),
Err(Err::Incomplete(needed)) => Err(Err::Incomplete(needed)),
Err(Err::Error(val)) | Err(Err::Failure(val)) => match val {},
}
}

pub fn delimited_infallible<I, O1, O2, O3, F, G, H>(
mut first: F,
mut second: G,
mut third: H,
) -> impl FnMut(I) -> JResult<I, O2>
where
F: nom::Parser<I, (O1, ErrorList), std::convert::Infallible>,
G: nom::Parser<I, (O2, ErrorList), std::convert::Infallible>,
H: nom::Parser<I, (O3, ErrorList), std::convert::Infallible>,
{
move |input: I| {
let (input, (_, mut err)) = first.parse(input)?;
let (input, (o2, mut err2)) = second.parse(input)?;
err.append(&mut err2);
let (input, (_, mut err3)) = third.parse(input)?;
err.append(&mut err3);
Ok((input, (o2, err)))
}
}

// Parse nothing. Just a lazy way to not implement terminated/preceded and use delimited instead
pub fn nothing(i: &str) -> JResult<&str, ()> {
Ok((i, ((), Vec::new())))
}

pub trait TupleInfallible<I, O> {
/// Parses the input and returns a tuple of results of each parser.
fn parse(&mut self, input: I) -> JResult<I, O>;
}

impl<Input, Output, F: nom::Parser<Input, (Output, ErrorList), std::convert::Infallible>>
TupleInfallible<Input, (Output,)> for (F,)
{
fn parse(&mut self, input: Input) -> JResult<Input, (Output,)> {
self.0.parse(input).map(|(i, (o, e))| (i, ((o,), e)))
}
}

// these macros are heavily copied from nom, with some minor adaptations for our type
macro_rules! tuple_trait(
($name1:ident $ty1:ident, $name2: ident $ty2:ident, $($name:ident $ty:ident),*) => (
tuple_trait!(__impl $name1 $ty1, $name2 $ty2; $($name $ty),*);
);
(__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident, $($name2:ident $ty2:ident),*) => (
tuple_trait_impl!($($name $ty),+);
tuple_trait!(__impl $($name $ty),+ , $name1 $ty1; $($name2 $ty2),*);
);
(__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident) => (
tuple_trait_impl!($($name $ty),+);
tuple_trait_impl!($($name $ty),+, $name1 $ty1);
);
);

macro_rules! tuple_trait_impl(
($($name:ident $ty: ident),+) => (
impl<
Input: Clone, $($ty),+ ,
$($name: nom::Parser<Input, ($ty, ErrorList), std::convert::Infallible>),+
> TupleInfallible<Input, ( $($ty),+ )> for ( $($name),+ ) {

fn parse(&mut self, input: Input) -> JResult<Input, ( $($ty),+ )> {
let mut error_list = Vec::new();
tuple_trait_inner!(0, self, input, (), error_list, $($name)+)
}
}
);
);

macro_rules! tuple_trait_inner(
($it:tt, $self:expr, $input:expr, (), $error_list:expr, $head:ident $($id:ident)+) => ({
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
$error_list.append(&mut err);

succ!($it, tuple_trait_inner!($self, i, ( o ), $error_list, $($id)+))
});
($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident $($id:ident)+) => ({
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
$error_list.append(&mut err);

succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $error_list, $($id)+))
});
($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident) => ({
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
$error_list.append(&mut err);

Ok((i, (($($parsed)* , o), $error_list)))
});
);

macro_rules! succ (
(0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*));
(1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*));
(2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*));
(3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*));
(4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*));
(5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*));
(6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*));
(7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*));
(8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*));
(9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*));
(10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*));
(11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*));
(12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*));
(13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*));
(14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*));
(15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*));
(16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*));
(17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*));
(18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*));
(19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*));
(20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*));
);

tuple_trait!(FnA A, FnB B, FnC C, FnD D, FnE E, FnF F, FnG G, FnH H, FnI I, FnJ J, FnK K, FnL L,
FnM M, FnN N, FnO O, FnP P, FnQ Q, FnR R, FnS S, FnT T, FnU U);

// Special case: implement `TupleInfallible` for `()`, the unit type.
// This can come up in macros which accept a variable number of arguments.
// Literally, `()` is an empty tuple, so it should simply parse nothing.
impl<I> TupleInfallible<I, ()> for () {
fn parse(&mut self, input: I) -> JResult<I, ()> {
Ok((input, ((), Vec::new())))
}
}

pub fn tuple_infallible<I, O, List: TupleInfallible<I, O>>(
mut l: List,
) -> impl FnMut(I) -> JResult<I, O> {
move |i: I| l.parse(i)
}

pub fn separated_list_infallible<I, O, O2, F, G>(
mut sep: G,
mut f: F,
) -> impl FnMut(I) -> JResult<I, Vec<O>>
where
I: Clone + InputLength,
F: nom::Parser<I, (O, ErrorList), std::convert::Infallible>,
G: nom::Parser<I, (O2, ErrorList), std::convert::Infallible>,
{
move |mut i: I| {
let mut res = Vec::new();
let mut errors = Vec::new();
Comment thread
trinity-1686a marked this conversation as resolved.
Outdated

match f.parse(i.clone()) {
Err(_) => unreachable!(),
Ok((i1, (o, mut err))) => {
errors.append(&mut err);
res.push(o);
i = i1;
}
}

loop {
let len = i.input_len();
match sep.parse(i.clone()) {
Err(_) => unreachable!(),
Ok((i1, (_, mut err))) => {
Comment thread
trinity-1686a marked this conversation as resolved.
Outdated
errors.append(&mut err);

match f.parse(i1.clone()) {
Err(_) => unreachable!(),
Comment thread
trinity-1686a marked this conversation as resolved.
Outdated
Ok((i2, (o, mut err))) => {
// infinite loop check: the parser must always consume
// if we consumed nothing here, don't produce an element.
if i2.input_len() == len {
return Ok((i1, (res, errors)));
}
res.push(o);
errors.append(&mut err);
i = i2;
}
}
}
}
}
}
}

pub trait Alt<I, O> {
/// Tests each parser in the tuple and returns the result of the first one that succeeds
fn choice(&mut self, input: I) -> Option<JResult<I, O>>;
}

macro_rules! alt_trait(
($first_cond:ident $first:ident, $($id_cond:ident $id: ident),+) => (
alt_trait!(__impl $first_cond $first; $($id_cond $id),+);
);
(__impl $($current_cond:ident $current:ident),*; $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => (
alt_trait_impl!($($current_cond $current),*);

alt_trait!(__impl $($current_cond $current,)* $head_cond $head; $($id_cond $id),+);
);
(__impl $($current_cond:ident $current:ident),*; $head_cond:ident $head:ident) => (
alt_trait_impl!($($current_cond $current),*);
alt_trait_impl!($($current_cond $current,)* $head_cond $head);
);
);

macro_rules! alt_trait_impl(
($($id_cond:ident $id:ident),+) => (
impl<
Input: Clone, Output,
$(
// () are to make things easier on me, but I'm not entirely sure whether we can do better
// with rule E0207
$id_cond: nom::Parser<Input, (), ()>,
$id: nom::Parser<Input, (Output, ErrorList), std::convert::Infallible>
),+
> Alt<Input, Output> for ( $(($id_cond, $id),)+ ) {

fn choice(&mut self, input: Input) -> Option<JResult<Input, Output>> {
match self.0.0.parse(input.clone()) {
Err(_) => alt_trait_inner!(1, self, input, $($id_cond $id),+),
Ok((i1, _)) => Some(self.0.1.parse(i1)),
}
}
}
);
);

macro_rules! alt_trait_inner(
($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => (
match $self.$it.0.parse($input.clone()) {
Err(_) => succ!($it, alt_trait_inner!($self, $input, $($id_cond $id),+)),
Ok((i1, _)) => Some($self.$it.1.parse(i1)),
}
);
($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident) => (
None
);
);

alt_trait!(A1 A, B1 B, C1 C, D1 D, E1 E, F1 F, G1 G, H1 H, I1 I, J1 J, K1 K,
L1 L, M1 M, N1 N, O1 O, P1 P, Q1 Q, R1 R, S1 S, T1 T, U1 U);

/// An alt() like combinator. For each branch, it first tries a fallible parser, which commits to
/// this branch, or tells to check next branch, and the execute the infallible parser which follow.
///
/// In case no branch match, the default (fallible) parser is executed.
pub fn alt_infallible<I: Clone, O, F, List: Alt<I, O>>(
mut l: List,
mut default: F,
) -> impl FnMut(I) -> JResult<I, O>
where
F: nom::Parser<I, (O, ErrorList), std::convert::Infallible>,
{
move |i: I| l.choice(i.clone()).unwrap_or_else(|| default.parse(i))
}
12 changes: 9 additions & 3 deletions query-grammar/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
#![allow(clippy::derive_partial_eq_without_eq)]

mod infallible;
mod occur;
mod query_grammar;
mod user_input_ast;
use combine::parser::Parser;

pub use crate::occur::Occur;
use crate::query_grammar::parse_to_ast;
use crate::query_grammar::{parse_to_ast, parse_to_ast_lenient};
pub use crate::user_input_ast::{
Delimiter, UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral,
};

pub struct Error;

/// Parse a query
pub fn parse_query(query: &str) -> Result<UserInputAst, Error> {
let (user_input_ast, _remaining) = parse_to_ast().parse(query).map_err(|_| Error)?;
let (_remaining, user_input_ast) = parse_to_ast(query).map_err(|_| Error)?;
Ok(user_input_ast)
}

/// Parse a query, trying to recover from syntax errors, and giving hints toward fixing errors.
pub fn parse_query_leniet(query: &str) -> (UserInputAst, Vec<(usize, String)>) {
Comment thread
trinity-1686a marked this conversation as resolved.
Outdated
parse_to_ast_lenient(query)
}
Loading