use crate::location::{ByteOffset, BytePosition, LineAndCharPosition, LineOffset};
use smallvec::{smallvec, SmallVec};
use std::ops::Range;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct LineOffsetTracker {
line_starts: SmallVec<[ByteOffset; 16]>,
}
impl Default for LineOffsetTracker {
fn default() -> Self {
LineOffsetTracker {
line_starts: smallvec![ByteOffset(0)], }
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum LineOffsetError {
EndOfInput,
InsideUnicodeCodepoint,
}
impl LineOffsetTracker {
#[inline(always)]
pub fn record(&mut self, line_start: ByteOffset) {
self.line_starts.push(line_start);
}
#[inline(always)]
pub fn append(&mut self, other: &LineOffsetTracker, offset: ByteOffset) {
for start in &other.line_starts[1..] {
self.record(offset + *start);
}
}
#[inline(always)]
pub fn num_lines(&self) -> usize {
self.line_starts.len()
}
#[inline(always)]
fn byte_span_from_line_num(&self, num: LineOffset, max: ByteOffset) -> Range<ByteOffset> {
let start = self.line_starts[num.to_usize()];
let end = self
.line_starts
.get((num + 1).to_usize())
.unwrap_or(&max)
.min(&max);
start..*end
}
#[inline(always)]
fn line_num_from_byte_offset(&self, offset: ByteOffset) -> LineOffset {
match self.line_starts.binary_search(&offset) {
Err(i) => i - 1,
Ok(i) => i,
}
.into()
}
pub fn at(
&self,
source: &str,
BytePosition(offset): BytePosition,
) -> Result<LineAndCharPosition, LineOffsetError> {
let full_len = source.len() as u32;
match offset {
ByteOffset(0) => Ok(LineAndCharPosition::new(0, 0)),
ByteOffset(n) if n > full_len => Err(LineOffsetError::EndOfInput),
_ => {
let line_num = self.line_num_from_byte_offset(offset);
let line_span = self.byte_span_from_line_num(line_num, source.len().into());
let limit = (offset - line_span.start).0 as usize;
let line = &source[line_span.start.0 as usize..line_span.end.0 as usize];
let column_num = line
.char_indices()
.enumerate()
.find(|(_i, (idx, _char))| idx == &limit);
match column_num {
None if limit == line.len() => Ok(LineAndCharPosition::new(
line_num.to_usize(),
line.char_indices().count(),
)),
None => Err(LineOffsetError::InsideUnicodeCodepoint),
Some((column_num, (_idx, _char))) => {
Ok(LineAndCharPosition::new(line_num.to_usize(), column_num))
}
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn tracker_from_str(s: &str) -> LineOffsetTracker {
let mut tracker = LineOffsetTracker::default();
let mut start = 0;
while let Some(l) = s[start..].find('\n') {
let span = (start + l)..(start + l + 1);
tracker.record(span.end.into());
start += l + 1;
}
tracker
}
#[test]
fn single() {
let s = "1";
let tracker = tracker_from_str(s);
assert_eq!(tracker.num_lines(), 1);
assert_eq!(&s[0..1], "1");
assert_eq!(
tracker.at(s, 0.into()).unwrap(),
LineAndCharPosition::new(0, 0)
);
assert_eq!(
tracker.at(s, 1.into()).unwrap(),
LineAndCharPosition::new(0, 1)
);
}
#[test]
fn simple() {
let s = "01\n345";
let tracker = tracker_from_str(s);
assert_eq!(tracker.num_lines(), 2);
assert_eq!(&s[0..1], "0");
assert_eq!(
tracker.at(s, 0.into()).unwrap(),
LineAndCharPosition::new(0, 0)
);
assert_eq!(&s[1..2], "1");
assert_eq!(
tracker.at(s, 1.into()).unwrap(),
LineAndCharPosition::new(0, 1)
);
assert_eq!(&s[2..3], "\n");
assert_eq!(
tracker.at(s, 2.into()).unwrap(),
LineAndCharPosition::new(0, 2)
);
assert_eq!(&s[3..4], "3");
assert_eq!(
tracker.at(s, 3.into()).unwrap(),
LineAndCharPosition::new(1, 0)
);
assert_eq!(&s[4..5], "4");
assert_eq!(
tracker.at(s, 4.into()).unwrap(),
LineAndCharPosition::new(1, 1)
);
assert_eq!(&s[5..6], "5");
assert_eq!(
tracker.at(s, 5.into()).unwrap(),
LineAndCharPosition::new(1, 2)
);
assert_eq!(s.len(), 6);
assert_eq!(
tracker.at(s, 6.into()).unwrap(),
LineAndCharPosition::new(1, 3)
);
assert_eq!(tracker.at(s, 7.into()), Err(LineOffsetError::EndOfInput));
}
#[test]
fn append() {
let s = "01234\nab`de\nqr`tu";
let s1 = 0;
let s2 = s.find('`').unwrap();
let s3 = s.rfind('`').unwrap();
let s4 = s.len();
let mut tracker1 = tracker_from_str(&s[s1..s2]);
let mut tracker2 = tracker_from_str(&s[s2..s3]);
let tracker3 = tracker_from_str(&s[s3..s4]);
assert_eq!(tracker1.num_lines(), 2);
assert_eq!(tracker2.num_lines(), 2);
assert_eq!(tracker3.num_lines(), 1);
tracker2.append(&tracker3, (s3 - s2).into());
tracker1.append(&tracker2, (s2 - s1).into());
assert_eq!(tracker1.num_lines(), 3);
assert_eq!(&s[9..10], "d");
assert_eq!(
tracker1.at(s, 9.into()).unwrap(),
LineAndCharPosition::new(1, 3)
);
assert_eq!(&s[16..17], "u");
assert_eq!(
tracker1.at(s, 16.into()).unwrap(),
LineAndCharPosition::new(2, 4)
);
}
#[test]
fn complex() {
let s = "0123456789\n0123456789\n012345\n012345\n🤷\n\n";
let tracker = tracker_from_str(s);
assert_eq!(tracker.num_lines(), 7);
assert_eq!(
tracker.at(s, 0.into()).unwrap(),
LineAndCharPosition::new(0, 0)
);
assert_eq!(
tracker.at(s, s.len().into()).unwrap(),
LineAndCharPosition::new(6, 0)
);
assert_eq!(
tracker.at(s, (s.len() + 1).into()),
Err(LineOffsetError::EndOfInput)
);
let idx = s.find('2').unwrap();
assert_eq!(&s[idx..idx + 1], "2");
assert_eq!(
tracker.at(s, idx.into()).unwrap(),
LineAndCharPosition::new(0, 2)
);
let idx = 1 + idx + s[idx + 1..].find('2').unwrap();
assert_eq!(&s[idx..idx + 1], "2");
assert_eq!(
tracker.at(s, idx.into()).unwrap(),
LineAndCharPosition::new(1, 2)
);
let idx = 1 + idx + s[idx + 1..].find('2').unwrap();
assert_eq!(&s[idx..idx + 1], "2");
assert_eq!(
tracker.at(s, idx.into()).unwrap(),
LineAndCharPosition::new(2, 2)
);
let idx = 1 + idx + s[idx + 1..].find('2').unwrap();
assert_eq!(&s[idx..idx + 1], "2");
assert_eq!(
tracker.at(s, idx.into()).unwrap(),
LineAndCharPosition::new(3, 2)
);
let idx = s.find('🤷').unwrap();
assert_eq!(&s[idx..idx + '🤷'.len_utf8()], "🤷");
assert_eq!(
tracker.at(s, idx.into()).unwrap(),
LineAndCharPosition::new(4, 0)
);
}
}