use partiql_source_map::location::ByteOffset;
use regex::{Regex, RegexSet, RegexSetBuilder};
use std::collections::VecDeque;
use std::ops::Range;
use crate::error::LexError;
use crate::lexer::{InternalLexResult, LexResult, PartiqlLexer, Spanned, Token};
use crate::token_parser::{BufferedToken, TokenParser};
use once_cell::sync::Lazy;
use partiql_source_map::line_offset_tracker::LineOffsetTracker;
pub(crate) static BUILT_INS: Lazy<FnExprSet<'static>> = Lazy::new(built_ins);
#[derive(Debug, Clone)]
pub(crate) enum FnExprArgMatch<'a> {
AnyOne(bool),
AnyZeroOrMore(bool),
#[allow(dead_code)]
Match(Token<'a>),
NamedArgKw(Token<'a>),
NamedArgId(Regex),
Synthesize(Token<'a>),
}
pub(crate) type FnExprArgList<'a> = Vec<FnExprArgMatch<'a>>;
#[derive(Debug, Clone)]
pub(crate) struct FnExpr<'a> {
pub fn_names: Vec<&'a str>,
pub patterns: Vec<FnExprArgList<'a>>,
}
mod built_ins {
use super::*;
use regex::Regex;
use FnExprArgMatch::{
AnyOne, AnyZeroOrMore as AnyStar, NamedArgId as Id, NamedArgKw as Kw, Synthesize as Syn,
};
const TRIM_SPECIFIER: &str = "(?i:leading)|(?i:trailing)|(?i:both)";
pub(crate) fn built_in_trim() -> FnExpr<'static> {
let re = Regex::new(TRIM_SPECIFIER).unwrap();
FnExpr {
fn_names: vec!["trim"],
#[rustfmt::skip]
patterns: vec![
vec![Id(re.clone()), AnyOne(true), AnyStar(false), Kw(Token::From), AnyOne(true), AnyStar(false)],
vec![Id(re), Syn(Token::String(" ")), Kw(Token::From), AnyOne(true), AnyStar(false)],
vec![AnyOne(true), AnyStar(false), Kw(Token::From), AnyOne(true), AnyStar(false)],
vec![Kw(Token::From), AnyOne(true), AnyStar(false)],
],
}
}
const EXTRACT_SPECIFIER: &str =
"(?i:second)|(?i:minute)|(?i:hour)|(?i:day)|(?i:month)|(?i:year)|(?i:timezone_hour)|(?i:timezone_minute)";
pub(crate) fn built_in_extract() -> FnExpr<'static> {
let re = Regex::new(EXTRACT_SPECIFIER).unwrap();
FnExpr {
fn_names: vec!["extract"],
#[rustfmt::skip]
patterns: vec![
vec![Id(re), Syn(Token::True), Kw(Token::From), AnyOne(true), AnyStar(true)]
],
}
}
pub(crate) fn built_in_position() -> FnExpr<'static> {
FnExpr {
fn_names: vec!["position"],
#[rustfmt::skip]
patterns: vec![
vec![AnyOne(true), AnyStar(false), Kw(Token::In), AnyOne(true), AnyStar(false)]
],
}
}
const PLACING: &str = "(?i:placing)";
pub(crate) fn built_in_overlay() -> FnExpr<'static> {
let re = Regex::new(PLACING).unwrap();
FnExpr {
fn_names: vec!["overlay"],
#[rustfmt::skip]
patterns: vec![
vec![AnyOne(true), AnyStar(false), Id(re.clone()), AnyOne(true), AnyStar(false), Kw(Token::From), AnyOne(true), AnyStar(false), Kw(Token::For), AnyOne(true), AnyStar(false)],
vec![AnyOne(true), AnyStar(false), Id(re), AnyOne(true), AnyStar(false), Kw(Token::From), AnyOne(true), AnyStar(false)],
],
}
}
pub(crate) fn built_in_aggs() -> FnExpr<'static> {
FnExpr {
fn_names: vec!["count", "avg", "min", "max", "sum", "any", "some", "every"],
#[rustfmt::skip]
patterns: vec![
vec![Kw(Token::All), AnyOne(true), AnyStar(false)],
vec![Kw(Token::Distinct), AnyOne(true), AnyStar(false)],
],
}
}
pub(crate) fn built_in_substring() -> FnExpr<'static> {
FnExpr {
fn_names: vec!["substring"],
#[rustfmt::skip]
patterns: vec![
vec![AnyOne(true), AnyStar(false), Kw(Token::From), AnyOne(true), AnyStar(false), Kw(Token::For), AnyOne(true), AnyStar(false)],
vec![AnyOne(true), AnyStar(false), Kw(Token::From), AnyOne(true), AnyStar(false)],
vec![AnyOne(true), AnyStar(false), Kw(Token::For), AnyOne(true), AnyStar(false)],
],
}
}
pub(crate) fn built_in_cast() -> FnExpr<'static> {
FnExpr {
fn_names: vec!["cast"],
#[rustfmt::skip]
patterns: vec![
vec![AnyOne(true), AnyStar(false), Kw(Token::As), AnyOne(true), AnyStar(true)]
],
}
}
}
#[derive(Debug, Clone)]
pub(crate) struct FnExprSet<'a> {
fn_names: RegexSet,
fn_exprs: Vec<FnExpr<'a>>,
}
impl<'a> FnExprSet<'a> {
pub fn new(fn_exprs: Vec<FnExpr<'a>>) -> Self {
let pats = fn_exprs.iter().map(|spc| {
if spc.fn_names.len() == 1 {
spc.fn_names[0].to_owned()
} else {
spc.fn_names
.iter()
.map(|n| format!("(?:{n})"))
.collect::<Vec<_>>()
.join("|")
}
});
let fn_names = RegexSetBuilder::new(pats)
.case_insensitive(true)
.build()
.unwrap();
FnExprSet { fn_names, fn_exprs }
}
#[inline]
pub fn find(&self, name: &'a str) -> Option<&FnExpr<'a>> {
self.fn_names
.matches(name)
.into_iter()
.next()
.map(|idx| &self.fn_exprs[idx])
}
#[inline]
pub fn contains(&self, name: &'a str) -> bool {
self.fn_names.is_match(name)
}
}
pub(crate) fn built_ins() -> FnExprSet<'static> {
FnExprSet::new(vec![
built_ins::built_in_trim(),
built_ins::built_in_aggs(),
built_ins::built_in_extract(),
built_ins::built_in_position(),
built_ins::built_in_overlay(),
built_ins::built_in_substring(),
built_ins::built_in_cast(),
])
}
type SpannedToken<'input> = Spanned<Token<'input>, ByteOffset>;
type SpannedTokenVec<'input> = Vec<SpannedToken<'input>>;
#[derive(Debug, Clone)]
enum ArgMatch<'input> {
Failed,
Consume(usize),
Replace((usize, SpannedTokenVec<'input>)),
}
pub(crate) struct PreprocessingPartiqlLexer<'input, 'tracker>
where
'input: 'tracker,
{
fn_exprs: &'input FnExprSet<'input>,
parser: TokenParser<'input, 'tracker>,
buff: VecDeque<InternalLexResult<'input>>,
}
type Substitutions<'input> = Vec<Option<SpannedTokenVec<'input>>>;
impl<'input, 'tracker> PreprocessingPartiqlLexer<'input, 'tracker>
where
'input: 'tracker,
{
#[inline]
pub fn new(
input: &'input str,
tracker: &'tracker mut LineOffsetTracker,
fn_exprs: &'input FnExprSet<'input>,
) -> PreprocessingPartiqlLexer<'input, 'tracker> {
PreprocessingPartiqlLexer {
fn_exprs,
parser: TokenParser::new(PartiqlLexer::new(input, tracker)),
buff: VecDeque::with_capacity(20),
}
}
#[inline]
fn next(&mut self) -> Option<InternalLexResult<'input>> {
if !self.buff.is_empty() {
self.buff.pop_front()
} else {
match self.parser.consume() {
Some(Ok(_)) => match self.parser.flush_1() {
None => None,
Some(token) => {
let (tok, buffered) = self.parse_fn_expr(token, 0);
if let Some(buffered) = buffered {
self.buff.extend(buffered.into_iter().map(Ok));
}
Some(Ok(tok))
}
},
Some(Err(e)) => {
self.buff
.extend(self.parser.flush().into_iter().map(|(t, _)| Ok(t)));
self.buff.push_back(Err(e));
self.buff.pop_front()
}
None => None,
}
}
}
#[inline]
fn parse_fn_expr(
&mut self,
(tok, _): BufferedToken<'input>,
next_idx: usize,
) -> (SpannedToken<'input>, Option<SpannedTokenVec<'input>>) {
match tok {
(_, Token::UnquotedIdent(id), _) | (_, Token::QuotedIdent(id), _) => {
if let Some(((_, Token::OpenParen, _), _)) = self.parser.peek_n(next_idx) {
if let Some(fn_expr) = self.fn_exprs.find(id) {
let replacement = match self.rewrite_fn_expr(fn_expr) {
Ok(rewrites) => rewrites,
Err(_err) => self.parser.flush().into_iter().map(|(t, _)| t).collect(),
};
return (tok, Some(replacement));
}
}
}
_ => (),
}
(tok, None)
}
fn rewrite_fn_expr(
&mut self,
fn_expr: &'input FnExpr<'input>,
) -> Result<SpannedTokenVec<'input>, Spanned<LexError<'input>, ByteOffset>> {
self.parser.expect(&Token::OpenParen)?;
let fn_expr_args = &fn_expr.patterns;
let mut patterns: Vec<(&[FnExprArgMatch], Substitutions)> = fn_expr_args
.iter()
.map(|args| (args.as_slice(), vec![]))
.collect();
let mut nesting = 1;
let mut span: Option<Range<ByteOffset>> = None;
while nesting > 0 && !patterns.is_empty() {
let is_nested = nesting > 1;
let next_tok = self.parser.peek_n(0);
match &next_tok {
None => break,
Some(buffered @ ((s, tok, e), _)) => {
span = match span {
None => Some(*s..*e),
Some(range) => Some(range.start..*e),
};
match tok {
Token::OpenParen
| Token::OpenSquare
| Token::OpenDblAngle
| Token::OpenCurly => {
patterns.iter_mut().for_each(|(_, subs)| subs.push(None));
nesting += 1;
self.parser.consume();
}
Token::CloseParen
| Token::CloseSquare
| Token::CloseDblAngle
| Token::CloseCurly => {
patterns.iter_mut().for_each(|(_, subs)| subs.push(None));
nesting -= 1;
self.parser.consume();
}
Token::CommentBlock(_) | Token::CommentLine(_) => {
patterns.iter_mut().for_each(|(_, subs)| subs.push(None));
self.parser.consume();
}
Token::UnquotedIdent(id) | Token::QuotedIdent(id)
if self.fn_exprs.contains(id) =>
{
let buffered: BufferedToken<'input> = (*buffered).clone();
let backup = self.parser.flush();
self.parser.consume();
let name = self.parser.flush_1();
let (first, rest) = self.parse_fn_expr(buffered.clone(), 0);
self.parser.unflush_1(name);
self.parser.unflush(backup);
match rest {
Some(substitutions) => {
let replacement: Vec<_> =
std::iter::once(first).chain(substitutions).collect();
patterns = self.process_patterns(
&buffered,
is_nested,
patterns,
Some(replacement),
);
}
None => {
self.parser.unconsume();
patterns =
self.process_patterns(&buffered, is_nested, patterns, None);
self.parser.consume();
}
}
}
_ => {
let buffered = (*buffered).clone();
patterns = self.process_patterns(&buffered, is_nested, patterns, None);
self.parser.consume();
}
}
}
}
}
let pattern = patterns
.into_iter()
.filter_map(|(args, subs)| if args.len() == 1 { Some(subs) } else { None })
.next();
match pattern {
None => {
let range = span.unwrap_or_else(|| 0.into()..0.into());
Err((range.start, LexError::Unknown, range.end))
}
Some(subs) => Ok(Self::rewrite_tokens(self.parser.flush(), subs)),
}
}
#[inline]
fn process_patterns(
&mut self,
buffered: &BufferedToken<'input>,
is_nested: bool,
patterns: Vec<(&'input [FnExprArgMatch<'input>], Substitutions<'input>)>,
token_replacement: Option<Vec<SpannedToken<'input>>>,
) -> Vec<(&'input [FnExprArgMatch<'input>], Substitutions<'input>)> {
patterns
.into_iter()
.filter_map(|(args, mut subs)| {
match self.match_arg(buffered, is_nested, subs.is_empty(), args) {
ArgMatch::Failed => None,
ArgMatch::Consume(n) => {
subs.push(token_replacement.clone());
args.get(n..).map(|a| (a, subs))
}
ArgMatch::Replace((n, r)) => {
subs.push(Some(r));
args.get(n..).map(|a| (a, subs))
}
}
})
.collect()
}
#[allow(clippy::only_used_in_recursion)]
fn match_arg(
&self,
tok: &BufferedToken<'input>,
is_nested: bool,
is_init_arg: bool,
matchers: &[FnExprArgMatch<'input>],
) -> ArgMatch<'input> {
use FnExprArgMatch::*;
match (&matchers[0], tok) {
(AnyZeroOrMore(_), _) if is_nested => ArgMatch::Consume(0),
(AnyZeroOrMore(keyword_allowed), ((_, t, _), _)) => match &matchers.get(1) {
Some(_m) => match self.match_arg(tok, is_nested, false, &matchers[1..]) {
ArgMatch::Failed => {
if (t.is_keyword() && !keyword_allowed) || t == &Token::Comma {
ArgMatch::Failed
} else {
ArgMatch::Consume(0)
}
}
ArgMatch::Consume(n) => ArgMatch::Consume(n + 1),
ArgMatch::Replace((n, r)) => ArgMatch::Replace((n + 1, r)),
},
None => {
if (t.is_keyword() && !keyword_allowed) || t == &Token::Comma {
ArgMatch::Failed
} else {
ArgMatch::Consume(0)
}
}
},
(AnyOne(_), _) if is_nested => ArgMatch::Consume(1),
(AnyOne(_), ((_, Token::Comma, _), _)) => ArgMatch::Failed,
(AnyOne(keyword_allowed), ((_, t, _), _)) if t.is_keyword() && !keyword_allowed => {
ArgMatch::Failed
}
(AnyOne(_), _) => ArgMatch::Consume(1),
(Match(target), ((_, tok, _), _)) if target == tok => ArgMatch::Consume(1),
(NamedArgId(re), (tok_id @ (s, Token::UnquotedIdent(id), e), _)) if re.is_match(id) => {
let args = [
(*s, Token::Comma, *s),
tok_id.clone(),
(*e, Token::Colon, *e),
];
let args = if is_init_arg { &args[1..] } else { &args }.to_owned();
ArgMatch::Replace((1, args))
}
(NamedArgKw(kw), ((s, t, e), txt)) if kw == t => {
let args = [
(*s, Token::Comma, *s),
(*s, Token::QuotedIdent(txt), *e),
(*e, Token::Colon, *e),
];
let args = if is_init_arg { &args[1..] } else { &args }.to_owned();
ArgMatch::Replace((1, args))
}
(Synthesize(syn), ((s, _, _), _)) => match &matchers.get(1) {
Some(_m) => match self.match_arg(tok, false, false, &matchers[1..]) {
ArgMatch::Failed => ArgMatch::Failed,
ArgMatch::Consume(n) => ArgMatch::Replace((n + 1, vec![(*s, syn.clone(), *s)])),
ArgMatch::Replace((n, mut r)) => {
r.insert(0, (*s, syn.clone(), *s));
ArgMatch::Replace((n + 1, r))
}
},
None => ArgMatch::Failed,
},
(_, _) => ArgMatch::Failed,
}
}
#[inline]
fn rewrite_tokens(
mut toks: Vec<BufferedToken<'input>>,
substitution: Substitutions<'input>,
) -> SpannedTokenVec<'input> {
let mut rewrite = vec![toks[0].0.clone()];
for ((t, _), r) in std::iter::zip(toks.drain(1..toks.len() - 1), substitution) {
match (r, t) {
(None, t) => rewrite.push(t),
(Some(subs), _) => rewrite.extend(subs),
}
}
if let Some(t) = toks.pop().map(|(t, _)| t) {
rewrite.push(t)
}
rewrite
}
}
impl<'input, 'tracker> Iterator for PreprocessingPartiqlLexer<'input, 'tracker>
where
'input: 'tracker,
{
type Item = LexResult<'input>;
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
self.next().map(|res| res.map_err(|e| e.into()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use partiql_source_map::line_offset_tracker::LineOffsetTracker;
use crate::ParseError;
#[test]
fn cast() -> Result<(), ParseError<'static>> {
let query = "CAST(a AS VARCHAR)";
let mut offset_tracker = LineOffsetTracker::default();
let lexer = PreprocessingPartiqlLexer::new(query, &mut offset_tracker, &BUILT_INS);
let toks: Vec<_> = lexer.collect::<Result<_, _>>()?;
assert_eq!(
vec![
Token::UnquotedIdent("CAST"),
Token::OpenParen,
Token::UnquotedIdent("a"),
Token::Comma,
Token::QuotedIdent("AS"),
Token::Colon,
Token::UnquotedIdent("VARCHAR"),
Token::CloseParen,
],
toks.into_iter().map(|(_s, t, _e)| t).collect::<Vec<_>>()
);
Ok(())
}
#[test]
fn composed() -> Result<(), ParseError<'static>> {
let query =
"cast(trim(LEADING 'Foo' from substring('BarFooBar' from 4 for 6)) AS VARCHAR(20))";
let mut offset_tracker = LineOffsetTracker::default();
let lexer = PreprocessingPartiqlLexer::new(query, &mut offset_tracker, &BUILT_INS);
let toks: Vec<_> = lexer.collect::<Result<_, _>>()?;
let substring_expect = vec![
Token::UnquotedIdent("substring"),
Token::OpenParen,
Token::String("BarFooBar"),
Token::Comma,
Token::QuotedIdent("from"),
Token::Colon,
Token::Int("4"),
Token::Comma,
Token::QuotedIdent("for"),
Token::Colon,
Token::Int("6"),
Token::CloseParen,
];
let trim_expect = [
vec![
Token::UnquotedIdent("trim"),
Token::OpenParen,
Token::UnquotedIdent("LEADING"),
Token::Colon,
Token::String("Foo"),
Token::Comma,
Token::QuotedIdent("from"),
Token::Colon,
],
substring_expect,
vec![Token::CloseParen],
]
.concat();
let cast_expect = [
vec![Token::UnquotedIdent("cast"), Token::OpenParen],
trim_expect,
vec![
Token::Comma,
Token::QuotedIdent("AS"),
Token::Colon,
Token::UnquotedIdent("VARCHAR"),
Token::OpenParen,
Token::Int("20"),
Token::CloseParen,
Token::CloseParen,
],
]
.concat();
assert_eq!(
cast_expect,
toks.into_iter().map(|(_s, t, _e)| t).collect::<Vec<_>>()
);
Ok(())
}
#[test]
fn preprocessor() -> Result<(), ParseError<'static>> {
fn to_tokens<'a>(
lexer: impl Iterator<Item = LexResult<'a>>,
) -> Result<Vec<Token<'a>>, ParseError<'a>> {
lexer
.map(|result| result.map(|(_, t, _)| t))
.collect::<Result<Vec<_>, _>>()
}
fn lex(query: &str) -> Result<Vec<Token>, ParseError> {
let mut offset_tracker = LineOffsetTracker::default();
let lexer = PartiqlLexer::new(query, &mut offset_tracker);
to_tokens(lexer)
}
fn preprocess(query: &str) -> Result<Vec<Token>, ParseError> {
let mut offset_tracker = LineOffsetTracker::default();
let lexer = PreprocessingPartiqlLexer::new(query, &mut offset_tracker, &BUILT_INS);
to_tokens(lexer)
}
assert_eq!(
preprocess(r#"trim(both from missing)"#)?,
lex(r#"trim(both: ' ', "from": missing)"#)?
);
assert_eq!(
preprocess(r#"substring('FooBar' from 2 for 3"#)?,
lex(r#"substring('FooBar', "from": 2, "for": 3"#)?
);
assert_eq!(
preprocess(r#"trim(LEADING 'Foo' from 'FooBar')"#)?,
lex(r#"trim(LEADING : 'Foo', "from" : 'FooBar')"#)?
);
assert_eq!(
preprocess(r#"trim(LEADING /*blah*/ 'Foo' from 'FooBar')"#)?,
lex(r#"trim(LEADING : /*blah*/ 'Foo', "from" : 'FooBar')"#)?
);
assert_eq!(
preprocess(
r#"trim(LEADING --blah
'Foo' from 'FooBar')"#
)?,
lex(r#"trim(LEADING : --blah
'Foo', "from" : 'FooBar')"#)?
);
assert_eq!(
preprocess(r#"trim(BOTH TrAiLiNg from TRAILING)"#)?,
lex(r#"trim(BOTH : TrAiLiNg, "from" : TRAILING)"#)?
);
assert_eq!(
preprocess(r#"trim(LEADING LEADING from 'FooBar')"#)?,
lex(r#"trim(LEADING : LEADING, "from" : 'FooBar')"#)?
);
assert_eq!(
preprocess(r#"trim(LEADING TrAiLiNg from 'FooBar')"#)?,
lex(r#"trim(LEADING : TrAiLiNg, "from" : 'FooBar')"#)?
);
assert_eq!(
preprocess(r#"trim(tRaIlInG TrAiLiNg from 'FooBar')"#)?,
lex(r#"trim(tRaIlInG : TrAiLiNg, "from" : 'FooBar')"#)?
);
assert_eq!(
preprocess(r#"trim(LEADING 'Foo' from leaDing)"#)?,
lex(r#"trim(LEADING : 'Foo', "from" : leaDing)"#)?
);
assert_eq!(
preprocess(r#"trim('LEADING' from leaDing)"#)?,
lex(r#"trim('LEADING', "from" : leaDing)"#)?
);
assert_eq!(
preprocess(r#"trim('a' from leaDing)"#)?,
lex(r#"trim('a', "from" : leaDing)"#)?
);
assert_eq!(
preprocess(r#"trim(leading from ' Bar')"#)?,
lex(r#"trim(leading : ' ', "from" : ' Bar')"#)?
);
assert_eq!(
preprocess(r#"trim(TrAiLiNg 'Bar' from 'FooBar')"#)?,
lex(r#"trim(TrAiLiNg : 'Bar', "from" : 'FooBar')"#)?
);
assert_eq!(
preprocess(r#"trim(TRAILING from 'Bar ')"#)?,
lex(r#"trim(TRAILING: ' ', "from": 'Bar ')"#)?
);
assert_eq!(
preprocess(r#"trim(BOTH 'Foo' from 'FooBarBar')"#)?,
lex(r#"trim(BOTH: 'Foo', "from": 'FooBarBar')"#)?
);
assert_eq!(
preprocess(r#"trim(botH from ' Bar ')"#)?,
lex(r#"trim(botH: ' ', "from": ' Bar ')"#)?
);
assert_eq!(
preprocess(r#"trim(from ' Bar ')"#)?,
lex(r#"trim("from": ' Bar ')"#)?
);
assert_eq!(
preprocess(r#"position('o' in 'foo')"#)?,
lex(r#"position('o', "in" : 'foo')"#)?
);
assert_eq!(
preprocess(r#"substring('FooBar' from 2 for 3)"#)?,
lex(r#"substring('FooBar', "from": 2, "for": 3)"#)?
);
assert_eq!(
preprocess(r#"substring('FooBar' from 2)"#)?,
lex(r#"substring('FooBar', "from": 2)"#)?
);
assert_eq!(
preprocess(r#"substring('FooBar' for 3)"#)?,
lex(r#"substring('FooBar', "for": 3)"#)?
);
assert_eq!(
preprocess(r#"substring('FooBar',1,3)"#)?,
lex(r#"substring('FooBar', 1,3)"#)?
);
assert_eq!(
preprocess(r#"substring('FooBar',3)"#)?,
lex(r#"substring('FooBar', 3)"#)?
);
assert_eq!(preprocess(r#"CAST(9 AS b)"#)?, lex(r#"CAST(9, "AS": b)"#)?);
assert_eq!(
preprocess(r#"CAST(a AS VARCHAR)"#)?,
lex(r#"CAST(a, "AS": VARCHAR)"#)?
);
assert_eq!(
preprocess(r#"CAST(a AS VARCHAR(20))"#)?,
lex(r#"CAST(a, "AS": VARCHAR(20))"#)?
);
assert_eq!(
preprocess(r#"CAST(TRUE AS INTEGER)"#)?,
lex(r#"CAST(TRUE, "AS": INTEGER)"#)?
);
assert_eq!(
preprocess(r#"CAST( (4 in (1,2,3,4)) AS INTEGER)"#)?,
lex(r#"CAST( (4 in (1,2,3,4)) , "AS": INTEGER)"#)?
);
assert_eq!(
preprocess(r#"cast([1, 2] as INT)"#)?,
lex(r#"cast([1, 2] , "as": INT)"#)?
);
assert_eq!(
preprocess(r#"cast(<<1, 2>> as INT)"#)?,
lex(r#"cast(<<1, 2>> , "as": INT)"#)?
);
assert_eq!(
preprocess(r#"cast({a:1} as INT)"#)?,
lex(r#"cast({a:1} , "as": INT)"#)?
);
assert_eq!(
preprocess(r#"extract(timezone_minute from a)"#)?,
lex(r#"extract(timezone_minute:True, "from" : a)"#)?
);
assert_eq!(
preprocess(r#"extract(timezone_hour from a)"#)?,
lex(r#"extract(timezone_hour:True, "from" : a)"#)?
);
assert_eq!(
preprocess(r#"extract(year from a)"#)?,
lex(r#"extract(year:True, "from" : a)"#)?
);
assert_eq!(
preprocess(r#"extract(month from a)"#)?,
lex(r#"extract(month:True, "from" : a)"#)?
);
assert_eq!(
preprocess(r#"extract(day from a)"#)?,
lex(r#"extract(day:True, "from" : a)"#)?
);
assert_eq!(
preprocess(r#"extract(day from day)"#)?,
lex(r#"extract(day:True, "from" : day)"#)?
);
assert_eq!(
preprocess(r#"extract(hour from a)"#)?,
lex(r#"extract(hour:True, "from" : a)"#)?
);
assert_eq!(
preprocess(r#"extract(minute from a)"#)?,
lex(r#"extract(minute:True, "from" : a)"#)?
);
assert_eq!(
preprocess(r#"extract(second from a)"#)?,
lex(r#"extract(second:True, "from" : a)"#)?
);
assert_eq!(
preprocess(r#"extract(hour from TIME WITH TIME ZONE '01:23:45.678-06:30')"#)?,
lex(r#"extract(hour:True, "from" : TIME WITH TIME ZONE '01:23:45.678-06:30')"#)?
);
assert_eq!(
preprocess(r#"extract(minute from TIME WITH TIME ZONE '01:23:45.678-06:30')"#)?,
lex(r#"extract(minute:True, "from" : TIME WITH TIME ZONE '01:23:45.678-06:30')"#)?
);
assert_eq!(
preprocess(r#"extract(second from TIME WITH TIME ZONE '01:23:45.678-06:30')"#)?,
lex(r#"extract(second:True, "from" : TIME WITH TIME ZONE '01:23:45.678-06:30')"#)?
);
assert_eq!(
preprocess(r#"extract(timezone_hour from TIME WITH TIME ZONE '01:23:45.678-06:30')"#)?,
lex(
r#"extract(timezone_hour:True, "from" : TIME WITH TIME ZONE '01:23:45.678-06:30')"#
)?
);
assert_eq!(
preprocess(
r#"extract(timezone_minute from TIME WITH TIME ZONE '01:23:45.678-06:30')"#
)?,
lex(
r#"extract(timezone_minute:True, "from" : TIME WITH TIME ZONE '01:23:45.678-06:30')"#
)?
);
assert_eq!(
preprocess(r#"extract(hour from TIME (2) WITH TIME ZONE '01:23:45.678-06:30')"#)?,
lex(r#"extract(hour:True, "from" : TIME (2) WITH TIME ZONE '01:23:45.678-06:30')"#)?
);
assert_eq!(
preprocess(r#"extract(minute from TIME (2) WITH TIME ZONE '01:23:45.678-06:30')"#)?,
lex(r#"extract(minute:True, "from" : TIME (2) WITH TIME ZONE '01:23:45.678-06:30')"#)?
);
assert_eq!(
preprocess(r#"extract(second from TIME (2) WITH TIME ZONE '01:23:45.678-06:30')"#)?,
lex(r#"extract(second:True, "from" : TIME (2) WITH TIME ZONE '01:23:45.678-06:30')"#)?
);
assert_eq!(
preprocess(
r#"extract(timezone_hour from TIME (2) WITH TIME ZONE '01:23:45.678-06:30')"#
)?,
lex(
r#"extract(timezone_hour:True, "from" : TIME (2) WITH TIME ZONE '01:23:45.678-06:30')"#
)?
);
assert_eq!(
preprocess(
r#"extract(timezone_minute from TIME (2) WITH TIME ZONE '01:23:45.678-06:30')"#
)?,
lex(
r#"extract(timezone_minute:True, "from" : TIME (2) WITH TIME ZONE '01:23:45.678-06:30')"#
)?
);
assert_eq!(preprocess(r#"count(a)"#)?, lex(r#"count(a)"#)?);
assert_eq!(
preprocess(r#"count(DISTINCT a)"#)?,
lex(r#"count("DISTINCT": a)"#)?
);
assert_eq!(preprocess(r#"count(all a)"#)?, lex(r#"count("all": a)"#)?);
let q_count_1 = r#"count(1)"#;
assert_eq!(preprocess(q_count_1)?, lex(q_count_1)?);
let q_count_star = r#"count(*)"#;
assert_eq!(preprocess(q_count_star)?, lex(q_count_star)?);
assert_eq!(preprocess(r#"sum(a)"#)?, lex(r#"sum(a)"#)?);
assert_eq!(
preprocess(r#"sum(DISTINCT a)"#)?,
lex(r#"sum("DISTINCT": a)"#)?
);
assert_eq!(preprocess(r#"sum(all a)"#)?, lex(r#"sum("all": a)"#)?);
let q_sum_1 = r#"sum(1)"#;
assert_eq!(preprocess(q_sum_1)?, lex(q_sum_1)?);
let q_sum_star = r#"sum(*)"#;
assert_eq!(preprocess(q_sum_star)?, lex(q_sum_star)?);
assert_eq!(
preprocess(r#"COUNT(DISTINCT [1,1,1,1,2])"#)?,
lex(r#"COUNT("DISTINCT" : [1,1,1,1,2])"#)?
);
let empty_q = "";
assert_eq!(preprocess(empty_q)?, lex(empty_q)?);
let union_q = "SELECT a FROM b UNION (SELECT x FROM y ORDER BY a LIMIT 10 OFFSET 5)";
assert_eq!(preprocess(union_q)?, lex(union_q)?);
Ok(())
}
}