llvm/libcxx/src/filesystem/path_parser.h

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef PATH_PARSER_H
#define PATH_PARSER_H

#include <__config>
#include <__utility/unreachable.h>
#include <cstddef>
#include <filesystem>
#include <utility>

#include "format_string.h"

_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM

inline bool isSeparator(path::value_type C) {
  if (C == '/')
    return true;
#if defined(_LIBCPP_WIN32API)
  if (C == '\\')
    return true;
#endif
  return false;
}

inline bool isDriveLetter(path::value_type C) { return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); }

namespace parser {

using string_view_t    = path::__string_view;
using string_view_pair = pair<string_view_t, string_view_t>;
using PosPtr           = path::value_type const*;

struct PathParser {
  enum ParserState : unsigned char {
    // Zero is a special sentinel value used by default constructed iterators.
    PS_BeforeBegin   = path::iterator::_BeforeBegin,
    PS_InRootName    = path::iterator::_InRootName,
    PS_InRootDir     = path::iterator::_InRootDir,
    PS_InFilenames   = path::iterator::_InFilenames,
    PS_InTrailingSep = path::iterator::_InTrailingSep,
    PS_AtEnd         = path::iterator::_AtEnd
  };

  const string_view_t Path;
  string_view_t RawEntry;
  ParserState State_;

private:
  PathParser(string_view_t P, ParserState State) noexcept : Path(P), State_(State) {}

public:
  PathParser(string_view_t P, string_view_t E, unsigned char S)
      : Path(P), RawEntry(E), State_(static_cast<ParserState>(S)) {
    // S cannot be '0' or PS_BeforeBegin.
  }

  static PathParser CreateBegin(string_view_t P) noexcept {
    PathParser PP(P, PS_BeforeBegin);
    PP.increment();
    return PP;
  }

  static PathParser CreateEnd(string_view_t P) noexcept {
    PathParser PP(P, PS_AtEnd);
    return PP;
  }

  PosPtr peek() const noexcept {
    auto TkEnd = getNextTokenStartPos();
    auto End   = getAfterBack();
    return TkEnd == End ? nullptr : TkEnd;
  }

  void increment() noexcept {
    const PosPtr End   = getAfterBack();
    const PosPtr Start = getNextTokenStartPos();
    if (Start == End)
      return makeState(PS_AtEnd);

    switch (State_) {
    case PS_BeforeBegin: {
      PosPtr TkEnd = consumeRootName(Start, End);
      if (TkEnd)
        return makeState(PS_InRootName, Start, TkEnd);
    }
      _LIBCPP_FALLTHROUGH();
    case PS_InRootName: {
      PosPtr TkEnd = consumeAllSeparators(Start, End);
      if (TkEnd)
        return makeState(PS_InRootDir, Start, TkEnd);
      else
        return makeState(PS_InFilenames, Start, consumeName(Start, End));
    }
    case PS_InRootDir:
      return makeState(PS_InFilenames, Start, consumeName(Start, End));

    case PS_InFilenames: {
      PosPtr SepEnd = consumeAllSeparators(Start, End);
      if (SepEnd != End) {
        PosPtr TkEnd = consumeName(SepEnd, End);
        if (TkEnd)
          return makeState(PS_InFilenames, SepEnd, TkEnd);
      }
      return makeState(PS_InTrailingSep, Start, SepEnd);
    }

    case PS_InTrailingSep:
      return makeState(PS_AtEnd);

    case PS_AtEnd:
      __libcpp_unreachable();
    }
  }

  void decrement() noexcept {
    const PosPtr REnd   = getBeforeFront();
    const PosPtr RStart = getCurrentTokenStartPos() - 1;
    if (RStart == REnd) // we're decrementing the begin
      return makeState(PS_BeforeBegin);

    switch (State_) {
    case PS_AtEnd: {
      // Try to consume a trailing separator or root directory first.
      if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {
        if (SepEnd == REnd)
          return makeState(PS_InRootDir, Path.data(), RStart + 1);
        PosPtr TkStart = consumeRootName(SepEnd, REnd);
        if (TkStart == REnd)
          return makeState(PS_InRootDir, RStart, RStart + 1);
        return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1);
      } else {
        PosPtr TkStart = consumeRootName(RStart, REnd);
        if (TkStart == REnd)
          return makeState(PS_InRootName, TkStart + 1, RStart + 1);
        TkStart = consumeName(RStart, REnd);
        return makeState(PS_InFilenames, TkStart + 1, RStart + 1);
      }
    }
    case PS_InTrailingSep:
      return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1, RStart + 1);
    case PS_InFilenames: {
      PosPtr SepEnd = consumeAllSeparators(RStart, REnd);
      if (SepEnd == REnd)
        return makeState(PS_InRootDir, Path.data(), RStart + 1);
      PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd);
      if (TkStart == REnd) {
        if (SepEnd)
          return makeState(PS_InRootDir, SepEnd + 1, RStart + 1);
        return makeState(PS_InRootName, TkStart + 1, RStart + 1);
      }
      TkStart = consumeName(SepEnd, REnd);
      return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1);
    }
    case PS_InRootDir:
      return makeState(PS_InRootName, Path.data(), RStart + 1);
    case PS_InRootName:
    case PS_BeforeBegin:
      __libcpp_unreachable();
    }
  }

  /// \brief Return a view with the "preferred representation" of the current
  ///   element. For example trailing separators are represented as a '.'
  string_view_t operator*() const noexcept {
    switch (State_) {
    case PS_BeforeBegin:
    case PS_AtEnd:
      return PATHSTR("");
    case PS_InRootDir:
      if (RawEntry[0] == '\\')
        return PATHSTR("\\");
      else
        return PATHSTR("/");
    case PS_InTrailingSep:
      return PATHSTR("");
    case PS_InRootName:
    case PS_InFilenames:
      return RawEntry;
    }
    __libcpp_unreachable();
  }

  explicit operator bool() const noexcept { return State_ != PS_BeforeBegin && State_ != PS_AtEnd; }

  PathParser& operator++() noexcept {
    increment();
    return *this;
  }

  PathParser& operator--() noexcept {
    decrement();
    return *this;
  }

  bool atEnd() const noexcept { return State_ == PS_AtEnd; }

  bool inRootDir() const noexcept { return State_ == PS_InRootDir; }

  bool inRootName() const noexcept { return State_ == PS_InRootName; }

  bool inRootPath() const noexcept { return inRootName() || inRootDir(); }

private:
  void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
    State_    = NewState;
    RawEntry = string_view_t(Start, End - Start);
  }
  void makeState(ParserState NewState) noexcept {
    State_    = NewState;
    RawEntry = {};
  }

  PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }

  PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }

  /// \brief Return a pointer to the first character after the currently
  ///   lexed element.
  PosPtr getNextTokenStartPos() const noexcept {
    switch (State_) {
    case PS_BeforeBegin:
      return Path.data();
    case PS_InRootName:
    case PS_InRootDir:
    case PS_InFilenames:
      return &RawEntry.back() + 1;
    case PS_InTrailingSep:
    case PS_AtEnd:
      return getAfterBack();
    }
    __libcpp_unreachable();
  }

  /// \brief Return a pointer to the first character in the currently lexed
  ///   element.
  PosPtr getCurrentTokenStartPos() const noexcept {
    switch (State_) {
    case PS_BeforeBegin:
    case PS_InRootName:
      return &Path.front();
    case PS_InRootDir:
    case PS_InFilenames:
    case PS_InTrailingSep:
      return &RawEntry.front();
    case PS_AtEnd:
      return &Path.back() + 1;
    }
    __libcpp_unreachable();
  }

  // Consume all consecutive separators.
  PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {
    if (P == nullptr || P == End || !isSeparator(*P))
      return nullptr;
    const int Inc = P < End ? 1 : -1;
    P += Inc;
    while (P != End && isSeparator(*P))
      P += Inc;
    return P;
  }

  // Consume exactly N separators, or return nullptr.
  PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {
    PosPtr Ret = consumeAllSeparators(P, End);
    if (Ret == nullptr)
      return nullptr;
    if (P < End) {
      if (Ret == P + N)
        return Ret;
    } else {
      if (Ret == P - N)
        return Ret;
    }
    return nullptr;
  }

  PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {
    PosPtr Start = P;
    if (P == nullptr || P == End || isSeparator(*P))
      return nullptr;
    const int Inc = P < End ? 1 : -1;
    P += Inc;
    while (P != End && !isSeparator(*P))
      P += Inc;
    if (P == End && Inc < 0) {
      // Iterating backwards and consumed all the rest of the input.
      // Check if the start of the string would have been considered
      // a root name.
      PosPtr RootEnd = consumeRootName(End + 1, Start);
      if (RootEnd)
        return RootEnd - 1;
    }
    return P;
  }

  PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {
    if (P == End)
      return nullptr;
    if (P < End) {
      if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':')
        return nullptr;
      return P + 2;
    } else {
      if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':')
        return nullptr;
      return P - 2;
    }
  }

  PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {
    if (P == End)
      return nullptr;
    if (P < End)
      return consumeName(consumeNSeparators(P, End, 2), End);
    else
      return consumeNSeparators(consumeName(P, End), End, 2);
  }

  PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {
#if defined(_LIBCPP_WIN32API)
    if (PosPtr Ret = consumeDriveLetter(P, End))
      return Ret;
    if (PosPtr Ret = consumeNetworkRoot(P, End))
      return Ret;
#endif
    return nullptr;
  }
};

inline string_view_pair separate_filename(string_view_t const& s) {
  if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty())
    return string_view_pair{s, PATHSTR("")};
  auto pos = s.find_last_of('.');
  if (pos == string_view_t::npos || pos == 0)
    return string_view_pair{s, string_view_t{}};
  return string_view_pair{s.substr(0, pos), s.substr(pos)};
}

inline string_view_t createView(PosPtr S, PosPtr E) noexcept { return {S, static_cast<size_t>(E - S) + 1}; }

} // namespace parser

_LIBCPP_END_NAMESPACE_FILESYSTEM

#endif // PATH_PARSER_H