use alloc::{
format,
string::{String, ToString},
vec,
vec::Vec,
};
use crate::{ast, hir};
/// This error type encompasses any error that can be returned by this crate.
///
/// This error type is marked as `non_exhaustive`. This means that adding a
/// new variant is not considered a breaking change.
#[non_exhaustive]
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Error {
/// An error that occurred while translating concrete syntax into abstract
/// syntax (AST).
Parse(ast::Error),
/// An error that occurred while translating abstract syntax into a high
/// level intermediate representation (HIR).
Translate(hir::Error),
}
impl From<ast::Error> for Error {
fn from(err: ast::Error) -> Error {
Error::Parse(err)
}
}
impl From<hir::Error> for Error {
fn from(err: hir::Error) -> Error {
Error::Translate(err)
}
}
#[cfg(feature = "std")]
impl std::error::Error for Error {}
impl core::fmt::Display for Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match *self {
Error::Parse(ref x) => x.fmt(f),
Error::Translate(ref x) => x.fmt(f),
}
}
}
/// A helper type for formatting nice error messages.
///
/// This type is responsible for reporting regex parse errors in a nice human
/// readable format. Most of its complexity is from interspersing notational
/// markers pointing out the position where an error occurred.
#[derive(Debug)]
pub struct Formatter<'e, E> {
/// The original regex pattern in which the error occurred.
pattern: &'e str,
/// The error kind. It must impl fmt::Display.
err: &'e E,
/// The primary span of the error.
span: &'e ast::Span,
/// An auxiliary and optional span, in case the error needs to point to
/// two locations (e.g., when reporting a duplicate capture group name).
aux_span: Option<&'e ast::Span>,
}
impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
fn from(err: &'e ast::Error) -> Self {
Formatter {
pattern: err.pattern(),
err: err.kind(),
span: err.span(),
aux_span: err.auxiliary_span(),
}
}
}
impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
fn from(err: &'e hir::Error) -> Self {
Formatter {
pattern: err.pattern(),
err: err.kind(),
span: err.span(),
aux_span: None,
}
}
}
impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let spans = Spans::from_formatter(self);
if self.pattern.contains('\n') {
let divider = repeat_char('~', 79);
writeln!(f, "regex parse error:")?;
writeln!(f, "{}", divider)?;
let notated = spans.notate();
write!(f, "{}", notated)?;
writeln!(f, "{}", divider)?;
// If we have error spans that cover multiple lines, then we just
// note the line numbers.
if !spans.multi_line.is_empty() {
let mut notes = vec![];
for span in &spans.multi_line {
notes.push(format!(
"on line {} (column {}) through line {} (column {})",
span.start.line,
span.start.column,
span.end.line,
span.end.column - 1
));
}
writeln!(f, "{}", notes.join("\n"))?;
}
write!(f, "error: {}", self.err)?;
} else {
writeln!(f, "regex parse error:")?;
let notated = Spans::from_formatter(self).notate();
write!(f, "{}", notated)?;
write!(f, "error: {}", self.err)?;
}
Ok(())
}
}
/// This type represents an arbitrary number of error spans in a way that makes
/// it convenient to notate the regex pattern. ("Notate" means "point out
/// exactly where the error occurred in the regex pattern.")
///
/// Technically, we can only ever have two spans given our current error
/// structure. However, after toiling with a specific algorithm for handling
/// two spans, it became obvious that an algorithm to handle an arbitrary
/// number of spans was actually much simpler.
struct Spans<'p> {
/// The original regex pattern string.
pattern: &'p str,
/// The total width that should be used for line numbers. The width is
/// used for left padding the line numbers for alignment.
///
/// A value of `0` means line numbers should not be displayed. That is,
/// the pattern is itself only one line.
line_number_width: usize,
/// All error spans that occur on a single line. This sequence always has
/// length equivalent to the number of lines in `pattern`, where the index
/// of the sequence represents a line number, starting at `0`. The spans
/// in each line are sorted in ascending order.
by_line: Vec<Vec<ast::Span>>,
/// All error spans that occur over one or more lines. That is, the start
/// and end position of the span have different line numbers. The spans are
/// sorted in ascending order.
multi_line: Vec<ast::Span>,
}
impl<'p> Spans<'p> {
/// Build a sequence of spans from a formatter.
fn from_formatter<'e, E: core::fmt::Display>(
fmter: &'p Formatter<'e, E>,
) -> Spans<'p> {
let mut line_count = fmter.pattern.lines().count();
// If the pattern ends with a `\n` literal, then our line count is
// off by one, since a span can occur immediately after the last `\n`,
// which is consider to be an additional line.
if fmter.pattern.ends_with('\n') {
line_count += 1;
}
let line_number_width =
if line_count <= 1 { 0 } else { line_count.to_string().len() };
let mut spans = Spans {
pattern: &fmter.pattern,
line_number_width,
by_line: vec![vec![]; line_count],
multi_line: vec![],
};
spans.add(fmter.span.clone());
if let Some(span) = fmter.aux_span {
spans.add(span.clone());
}
spans
}
/// Add the given span to this sequence, putting it in the right place.
fn add(&mut self, span: ast::Span) {
// This is grossly inefficient since we sort after each add, but right
// now, we only ever add two spans at most.
if span.is_one_line() {
let i = span.start.line - 1; // because lines are 1-indexed
self.by_line[i].push(span);
self.by_line[i].sort();
} else {
self.multi_line.push(span);
self.multi_line.sort();
}
}
/// Notate the pattern string with carents (`^`) pointing at each span
/// location. This only applies to spans that occur within a single line.
fn notate(&self) -> String {
let mut notated = String::new();
for (i, line) in self.pattern.lines().enumerate() {
if self.line_number_width > 0 {
notated.push_str(&self.left_pad_line_number(i + 1));
notated.push_str(": ");
} else {
notated.push_str(" ");
}
notated.push_str(line);
notated.push('\n');
if let Some(notes) = self.notate_line(i) {
notated.push_str(¬es);
notated.push('\n');
}
}
notated
}
/// Return notes for the line indexed at `i` (zero-based). If there are no
/// spans for the given line, then `None` is returned. Otherwise, an
/// appropriately space padded string with correctly positioned `^` is
/// returned, accounting for line numbers.
fn notate_line(&self, i: usize) -> Option<String> {
let spans = &self.by_line[i];
if spans.is_empty() {
return None;
}
let mut notes = String::new();
for _ in 0..self.line_number_padding() {
notes.push(' ');
}
let mut pos = 0;
for span in spans {
for _ in pos..(span.start.column - 1) {
notes.push(' ');
pos += 1;
}
let note_len = span.end.column.saturating_sub(span.start.column);
for _ in 0..core::cmp::max(1, note_len) {
notes.push('^');
pos += 1;
}
}
Some(notes)
}
/// Left pad the given line number with spaces such that it is aligned with
/// other line numbers.
fn left_pad_line_number(&self, n: usize) -> String {
let n = n.to_string();
let pad = self.line_number_width.checked_sub(n.len()).unwrap();
let mut result = repeat_char(' ', pad);
result.push_str(&n);
result
}
/// Return the line number padding beginning at the start of each line of
/// the pattern.
///
/// If the pattern is only one line, then this returns a fixed padding
/// for visual indentation.
fn line_number_padding(&self) -> usize {
if self.line_number_width == 0 {
4
} else {
2 + self.line_number_width
}
}
}
fn repeat_char(c: char, count: usize) -> String {
core::iter::repeat(c).take(count).collect()
}
#[cfg(test)]
mod tests {
use alloc::string::ToString;
use crate::ast::parse::Parser;
fn assert_panic_message(pattern: &str, expected_msg: &str) {
let result = Parser::new().parse(pattern);
match result {
Ok(_) => {
panic!("regex should not have parsed");
}
Err(err) => {
assert_eq!(err.to_string(), expected_msg.trim());
}
}
}
// See: https://github.com/rust-lang/regex/issues/464
#[test]
fn regression_464() {
let err = Parser::new().parse("a{\n").unwrap_err();
// This test checks that the error formatter doesn't panic.
assert!(!err.to_string().is_empty());
}
// See: https://github.com/rust-lang/regex/issues/545
#[test]
fn repetition_quantifier_expects_a_valid_decimal() {
assert_panic_message(
r"\\u{[^}]*}",
r#"
regex parse error:
\\u{[^}]*}
^
error: repetition quantifier expects a valid decimal
"#,
);
}
}