1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
//! This module contains logic to parse the the text representation of each Ion type.
//! See: <https://amazon-ion.github.io/ion-docs/docs/spec.html>

use std::str::FromStr;

use crate::text::parse_result::{fatal_parse_error, IonParseResult, UpgradeIResult};
use nom::bytes::streaming::is_a;
use nom::character::streaming::one_of;
use nom::combinator::peek;

pub(crate) mod annotations;
pub(crate) mod blob;
pub(crate) mod boolean;
pub(crate) mod clob;
mod comments;
pub(crate) mod containers;
pub(crate) mod decimal;
pub(crate) mod float;
pub(crate) mod integer;
pub(crate) mod null;
pub(crate) mod numeric_support;
pub(crate) mod string;
pub(crate) mod symbol;
pub(crate) mod text_support;
pub(crate) mod timestamp;
pub(crate) mod top_level;
pub(crate) mod value;

const WHITESPACE_CHARACTERS: &[char] = &[
    ' ',    // Space
    '\t',   // Tab
    '\r',   // Carriage return
    '\n',   // Newline
    '\x09', // Horizontal tab
    '\x0B', // Vertical tab
    '\x0C', // Form feed
];

/// Same as [WHITESPACE_CHARACTERS], but formatted as a string for use in some `nom` APIs
const WHITESPACE_CHARACTERS_AS_STR: &str = " \t\r\n\x09\x0B\x0C";

// ===== The functions below are used by several modules and live here for common access. =====

/// Matches (but does not consume) the next character in the input stream if it is one of the Ion
/// stop characters. These characters must follow several different Ion text encodings, including
/// integers, floats, decimals, and timestamps.
pub(crate) fn stop_character(input: &str) -> IonParseResult<char> {
    peek(one_of("{}[](),\"' \t\n\r\u{0b}\u{0c}"))(input).upgrade()
}

/// Takes a numeric string and removes all leading zeros. If the string is entirely zeros
/// (for example, "0" or "000"), it will be reduced to a single zero ("0").
pub(crate) fn trim_leading_zeros(input: &str) -> &str {
    // Remove all leading zeros. If the last character is a zero, leave it alone.
    let trimmed = input.trim_start_matches('0');
    if trimmed.is_empty() {
        return "0";
    }
    trimmed
}

/// Takes a numeric string and removes all leading zeros before attempting to parse it as a u32.
/// Callers are expected to validate the numeric text being passed before calling this method.
/// If parsing fails, `trim_zeros_expect_u32` will panic and produce an error message containing
/// the text in `label`.
pub(crate) fn trim_zeros_and_parse_u32<'a>(input: &'a str, label: &str) -> IonParseResult<'a, u32> {
    match u32::from_str(trim_leading_zeros(input)) {
        Ok(value) => Ok(("", value)), // The entire input was consumed, leaving the empty string
        Err(e) => fatal_parse_error(input, format!("parsing {label} as a u32 failed: {e}")),
    }
}

/// Takes a numeric string and removes all leading zeros before attempting to parse it as an i32.
/// Callers are expected to validate the numeric text being passed before calling this method.
/// If parsing fails, `trim_zeros_expect_i32` will panic and produce an error message containing
/// the text in `label`.
pub(crate) fn trim_zeros_and_parse_i32<'a>(input: &'a str, label: &str) -> IonParseResult<'a, i32> {
    match i32::from_str(trim_leading_zeros(input)) {
        Ok(value) => Ok(("", value)), // The entire input was consumed, leaving the empty string
        Err(e) => fatal_parse_error(input, format!("parsing {label} as an i32 failed: {e}")),
    }
}

/// Matches one or more whitespace characters.
pub(crate) fn whitespace(input: &str) -> IonParseResult<&str> {
    is_a(WHITESPACE_CHARACTERS_AS_STR)(input).upgrade()
}

/// Helper functions used in the unit tests for each parsing module.
#[cfg(test)]
pub(crate) mod unit_test_support {
    use crate::text::parse_result::IonParseResult;
    use nom::Finish;
    use std::fmt::Debug;

    /// Uses `parser` to parse the provided `text` and then asserts that the output is equal
    /// to `expected`.
    pub(crate) fn parse_test_ok<'a, T, P>(parser: P, text: &'a str, expected: T)
    where
        T: Debug + PartialEq,
        P: Fn(&'a str) -> IonParseResult<'a, T>,
    {
        let actual = parse_unwrap(parser, text);
        assert_eq!(actual, expected);
    }

    /// Uses `parser` to parse the provided `text` expecting it to fail. If it succeeds, this
    /// method will panic and display the value that was read.
    pub(crate) fn parse_test_err<'a, T, P>(parser: P, text: &'a str)
    where
        T: Debug,
        P: Fn(&'a str) -> IonParseResult<'a, T>,
    {
        let parsed = parser(text);
        if parsed.is_ok() {
            panic!(
                "parse unexpectedly succeeded: {:?} -> {:?}",
                text,
                parsed.unwrap().1
            );
        }
    }

    /// Uses `parser` to parse the provided `text` and then unwraps the resulting value.
    /// If parsing fails, this method will panic.
    pub(crate) fn parse_unwrap<'a, T, P>(parser: P, text: &'a str) -> T
    where
        T: Debug + PartialEq,
        P: Fn(&'a str) -> IonParseResult<'a, T>,
    {
        let parsed = parser(text);
        if parsed.is_err() {
            panic!(
                "{:?}: parse unexpectedly failed on input: {:?}",
                parsed.finish(),
                text
            );
        }
        parsed.unwrap().1
    }
}