use crate::text::parse_result::{
fatal_parse_error, IonParseResult, OrFatalParseError, UpgradeIResult,
};
use crate::text::parsers::numeric_support::base_10_integer_digits;
use crate::text::parsers::stop_character;
use crate::text::text_value::TextValue;
use crate::types::Int;
use nom::branch::alt;
use nom::bytes::streaming::{is_a, tag, take_while1};
use nom::character::streaming::char;
use nom::combinator::{opt, recognize};
use nom::multi::many0_count;
use nom::sequence::{pair, preceded, separated_pair, terminated};
use nom::Err;
use num_bigint::BigInt;
use num_traits::Num;
use std::num::IntErrorKind;
pub(crate) fn parse_integer(input: &str) -> IonParseResult<TextValue> {
terminated(
alt((base_16_integer, base_2_integer, base_10_integer)),
stop_character,
)(input)
}
fn base_16_integer(input: &str) -> IonParseResult<TextValue> {
let (remaining, (maybe_sign, text_digits)) = separated_pair(
opt(char('-')),
alt((tag("0x"), tag("0X"))),
base_16_integer_digits,
)(input)?;
let integer = parse_integer_with_radix(text_digits, 16)
.map(|(_, i)| if maybe_sign.is_some() { -i } else { i })
.map(TextValue::Int)
.or_fatal_parse_error(input, "could not parse hex integer")?
.1;
Ok((remaining, integer))
}
fn base_16_integer_digits(input: &str) -> IonParseResult<&str> {
recognize(terminated(
many0_count(pair(take_base_16_digits1, char('_'))),
take_base_16_digits1,
))(input)
}
fn take_base_16_digits1(input: &str) -> IonParseResult<&str> {
take_while1(|c: char| c.is_ascii_hexdigit())(input).upgrade()
}
fn base_2_integer(input: &str) -> IonParseResult<TextValue> {
let (remaining, (maybe_sign, text_digits)) = separated_pair(
opt(char('-')),
alt((tag("0b"), tag("0B"))),
base_2_integer_digits,
)(input)?;
let integer = parse_integer_with_radix(text_digits, 2)
.map(|(_, i)| if maybe_sign.is_some() { -i } else { i })
.map(TextValue::Int)
.or_fatal_parse_error(input, "could not parse binary integer")?
.1;
Ok((remaining, integer))
}
fn base_2_integer_digits(input: &str) -> IonParseResult<&str> {
recognize(terminated(
many0_count(pair(is_a("01"), char('_'))),
is_a("01"),
))(input)
.upgrade()
}
fn base_10_integer(input: &str) -> IonParseResult<TextValue> {
let (remaining, int_text) = recognize(preceded(opt(char('-')), base_10_integer_digits))(input)?;
let integer = parse_integer_with_radix(int_text, 10)
.map(|(_, i)| TextValue::Int(i))
.or_fatal_parse_error(input, "could not parse decimal integer")?
.1;
Ok((remaining, integer))
}
fn parse_integer_with_radix(text: &str, radix: u32) -> IonParseResult<Int> {
if text.contains('_') {
let sanitized = text.replace('_', "");
return match parse_sanitized_text_with_radix(&sanitized, radix) {
Ok((_, integer)) => Ok(("", integer)),
Err(Err::Error(e)) => Err(Err::Error(e.replace_input(text))),
Err(Err::Failure(e)) => Err(Err::Failure(e.replace_input(text))),
Err(Err::Incomplete(needed)) => Err(Err::Incomplete(needed)),
};
}
parse_sanitized_text_with_radix(text, radix)
}
fn parse_sanitized_text_with_radix(text: &str, radix: u32) -> IonParseResult<Int> {
match i64::from_str_radix(text, radix) {
Ok(integer) => Ok(("", Int::I64(integer))),
Err(e)
if e.kind() == &IntErrorKind::NegOverflow || e.kind() == &IntErrorKind::PosOverflow =>
{
BigInt::from_str_radix(text, radix)
.map(Int::BigInt)
.or_fatal_parse_error(text, "found big integer with invalid text")
}
Err(e) => {
fatal_parse_error(text, format!("found integer with invalid text: {e:?}"))
}
}
}
#[cfg(test)]
mod integer_parsing_tests {
use super::*;
use crate::text::parsers::integer::parse_integer;
use crate::text::parsers::unit_test_support::{parse_test_err, parse_test_ok};
use crate::text::text_value::TextValue;
fn parse_equals_i64(text: &str, expected: i64) {
parse_test_ok(parse_integer, text, TextValue::Int(Int::I64(expected)))
}
fn parse_fails(text: &str) {
parse_test_err(parse_integer, text)
}
#[test]
fn test_parse_base_10_integers() {
parse_equals_i64("1 ", 1);
parse_equals_i64("305 ", 305);
parse_equals_i64("-279 ", -279);
parse_fails(" 305 ");
parse_fails("+305 ");
parse_fails("--305 ");
parse_fails("305");
}
#[test]
fn test_parse_base_10_integers_with_underscores() {
parse_equals_i64("111_111_222 ", 111_111_222);
parse_equals_i64("-999_999 ", -999_999);
parse_equals_i64("1_2_3_4_5_6 ", 123456);
parse_fails("_111_111_222 ");
parse_fails("111_111_222_ ");
parse_fails("111__111_222 ");
}
#[test]
fn test_parse_base_2_integers() {
parse_equals_i64("0b1 ", 1);
parse_equals_i64("0b101 ", 5);
parse_equals_i64("0B101 ", 5);
parse_equals_i64("0b11110000 ", 240);
parse_equals_i64("-0b11110000 ", -240);
parse_equals_i64("0B11111111 ", 255);
parse_equals_i64("-0B11111111 ", -255);
parse_fails(" 0b0011_0001 ");
parse_fails("+0b0011_0001 ");
parse_fails("--0b0011_0001 ");
parse_fails("0b0011_0001");
}
#[test]
fn test_parse_base_2_integers_with_underscores() {
parse_equals_i64("0b1_0_1 ", 5);
parse_equals_i64("-0b111 ", -7);
parse_equals_i64("-0b1111_0000 ", -240);
parse_fails("0b_0011_0001 ");
parse_fails("_0b_0011_0001 ");
parse_fails("0b0011_0001_ ");
parse_fails("0b0011__0001 ");
}
#[test]
fn test_parse_base_16_integers() {
parse_equals_i64("0x1 ", 1);
parse_equals_i64("0xA ", 10);
parse_equals_i64("0xFF ", 255);
parse_equals_i64("0xff ", 255);
parse_equals_i64("0XfF ", 255);
parse_equals_i64("-0xDECAF ", -912559);
parse_fails(" 0xCAFE ");
parse_fails("+0xCAFE ");
parse_fails("--0xCAFE ");
parse_fails("0xCAFE");
}
#[test]
fn test_parse_base_16_integers_with_underscores() {
parse_equals_i64("0xFA_CE ", 64_206);
parse_equals_i64("0xF_A_C_E ", 64_206);
parse_fails("0x_CAFE ");
parse_fails("_0xCAFE ");
parse_fails("0xCAFE_ ");
parse_fails("0xCA__FE ");
}
}