chromium/chromeos/printing/uri_impl.cc

// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "chromeos/printing/uri_impl.h"

#include <algorithm>
#include <array>
#include <set>

#include "base/check_op.h"
#include "base/i18n/streaming_utf8_validator.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "chromeos/printing/uri.h"

namespace chromeos {

namespace {

constexpr int kPortInvalid = -2;
constexpr int kPortUnspecified = -1;
constexpr int kPortMaxNumber = 65535;

// Parses a single character from *|current| and interprets it as a hex
// digit ('0'-'9' or 'A'-'F' or 'a'-'f'). If the character is incorrect or
// *|current| is not less than |end|, the function returns false.
// Otherwise, the value in *|out| is shifted left by 4 bits and the parsed
// value is saved on its rightmost 4 bits. The iterator *|current| is
// increased by one, and the function returns true.
// |current| and |out| must be not nullptr.
bool ParseHexDigit(const Iter& end, Iter* current, unsigned char* out) {
  Iter& it = *current;
  if (it >= end)
    return false;
  *out <<= 4;
  if (base::IsAsciiDigit(*it)) {
    *out += (*it - '0');
  } else if (*it >= 'A' && *it <= 'F') {
    *out += (*it - 'A' + 10);
  } else if (*it >= 'a' && *it <= 'f') {
    *out += (*it - 'a' + 10);
  } else {
    return false;
  }
  ++it;
  return true;
}

// The function parses from *|current|-|end| the first character and saves it
// to |out|. If |encoded| equals true, the % sign is treated as the beginning
// of %-escaped character - in this case the whole escaped character is read
// and decoded. The function fails and returns false when unexpected end of
// string is reached or invalid %-escaped character is spotted. The iterator
// *|current| is shifted accordingly.
// |current| and |out| must be not nullptr and *|current| must be less than
// |end|.
template <bool encoded>
bool ParseCharacter(const Iter& end, Iter* current, char* out) {
  Iter& it = *current;
  DCHECK(it < end);
  *out = *it;
  ++it;
  if (encoded && *out == '%') {
    unsigned char c = 0;
    if (!ParseHexDigit(end, &it, &c))
      return false;
    if (!ParseHexDigit(end, &it, &c))
      return false;
    *out = static_cast<char>(c);
  }
  return true;
}

// Returns iterator to the first occurrence of any character from |chars|
// in |begin|-|end|. Returns |end| if none of the characters were found.
Iter FindFirstOf(Iter begin, Iter end, const std::string& chars) {
  return std::find_first_of(begin, end, chars.begin(), chars.end());
}

}  // namespace

template <bool encoded, bool case_insensitive>
bool Uri::Pim::ParseString(const Iter& begin,
                           const Iter& end,
                           std::string* out,
                           bool plus_to_space) {
  parser_error_.parsed_chars = 0;
  out->reserve(end - begin);
  for (Iter it = begin; it < end;) {
    char c;
    // Read and decode a single character or a %-escaped character.
    if (plus_to_space && *it == '+') {
      c = ' ';
      ++it;
    } else if (!ParseCharacter<encoded>(end, &it, &c)) {
      parser_error_.status = ParserStatus::kInvalidPercentEncoding;
      return false;
    }
    // Analyze the character.
    if (base::IsAsciiPrintable(c)) {  // c >= 0x20(' ') && c <= 0x7E('~')
      // Copy the character with normalization.
      out->push_back(case_insensitive ? base::ToLowerASCII(c) : c);
      parser_error_.parsed_chars = it - begin;
    } else {
      // Try to parse UTF-8 character.
      base::StreamingUtf8Validator utf_parser;
      base::StreamingUtf8Validator::State state =
          utf_parser.AddBytes(base::byte_span_from_ref(c));
      if (state != base::StreamingUtf8Validator::State::VALID_MIDPOINT) {
        parser_error_.status = ParserStatus::kDisallowedASCIICharacter;
        return false;
      }
      std::string utf8_character(1, c);
      parser_error_.parsed_chars = it - begin;
      do {
        if (it == end) {
          parser_error_.status = ParserStatus::kInvalidUTF8Character;
          return false;
        }
        if (!ParseCharacter<encoded>(end, &it, &c)) {
          parser_error_.status = ParserStatus::kInvalidPercentEncoding;
          return false;
        }
        state = utf_parser.AddBytes(base::byte_span_from_ref(c));
        if (state == base::StreamingUtf8Validator::State::INVALID) {
          parser_error_.status = ParserStatus::kInvalidUTF8Character;
          return false;
        }
        utf8_character.push_back(c);
        parser_error_.parsed_chars = it - begin;
      } while (state != base::StreamingUtf8Validator::State::VALID_ENDPOINT);
      // Saves the UTF-8 character to the output.
      out->append(std::move(utf8_character));
    }
  }
  ++(parser_error_.parsed_strings);
  return true;
}

template <bool encoded>
bool Uri::Pim::SaveUserinfo(const std::string& val) {
  parser_error_.status = ParserStatus::kNoErrors;
  parser_error_.parsed_strings = 0;
  std::string out;
  if (!ParseString<encoded>(val.begin(), val.end(), &out))
    return false;
  userinfo_ = std::move(out);
  return true;
}

template <bool encoded>
bool Uri::Pim::SaveHost(const std::string& val) {
  parser_error_.status = ParserStatus::kNoErrors;
  parser_error_.parsed_strings = 0;
  std::string out;
  if (!ParseString<encoded, true>(val.begin(), val.end(), &out))
    return false;
  host_ = std::move(out);
  return true;
}

bool Uri::Pim::SavePort(int value) {
  parser_error_.status = ParserStatus::kNoErrors;
  parser_error_.parsed_strings = 0;
  parser_error_.parsed_chars = 0;
  if (value < -1 || value > 65535) {
    parser_error_.status = ParserStatus::kInvalidPortNumber;
    return false;
  }
  if (value == kPortUnspecified)
    value = Uri::GetDefaultPort(scheme_);
  port_ = value;
  return true;
}

template <bool encoded>
bool Uri::Pim::SavePath(const std::vector<std::string>& val) {
  parser_error_.status = ParserStatus::kNoErrors;
  parser_error_.parsed_strings = 0;
  parser_error_.parsed_chars = 0;
  std::vector<std::string> out;
  out.reserve(val.size());
  for (size_t i = 0; i < val.size(); ++i) {
    std::string segment;
    auto it1 = val[i].begin();
    auto it2 = val[i].end();
    if (!ParseString<encoded>(it1, it2, &segment))
      return false;
    if (segment == ".") {
      // do nothing
    } else if (segment == ".." && !out.empty() && out.back() != "..") {
      out.pop_back();
    } else if (segment.empty()) {
      --parser_error_.parsed_strings;  // it was already counted
      parser_error_.parsed_chars = 0;
      parser_error_.status = ParserStatus::kEmptySegmentInPath;
      return false;
    } else {
      out.push_back(std::move(segment));
    }
  }
  path_ = std::move(out);
  return true;
}

template <bool encoded>
bool Uri::Pim::SaveQuery(
    const std::vector<std::pair<std::string, std::string>>& val) {
  parser_error_.status = ParserStatus::kNoErrors;
  parser_error_.parsed_strings = 0;
  parser_error_.parsed_chars = 0;
  std::vector<std::pair<std::string, std::string>> out(val.size());
  for (size_t i = 0; i < out.size(); ++i) {
    // Process parameter name.
    auto it1 = val[i].first.begin();
    auto it2 = val[i].first.end();
    if (!ParseString<encoded>(it1, it2, &out[i].first, encoded))
      return false;
    if (out[i].first.empty()) {
      --parser_error_.parsed_strings;  // it was already counted
      parser_error_.parsed_chars = 0;
      parser_error_.status = ParserStatus::kEmptyParameterNameInQuery;
      return false;
    }
    // Process parameter value.
    it1 = val[i].second.begin();
    it2 = val[i].second.end();
    if (!ParseString<encoded>(it1, it2, &out[i].second, encoded))
      return false;
  }
  query_ = std::move(out);
  return true;
}

template <bool encoded>
bool Uri::Pim::SaveFragment(const std::string& val) {
  parser_error_.status = ParserStatus::kNoErrors;
  parser_error_.parsed_strings = 0;
  std::string out;
  if (!ParseString<encoded>(val.begin(), val.end(), &out))
    return false;
  fragment_ = std::move(out);
  return true;
}

bool Uri::Pim::ParseScheme(const Iter& begin, const Iter& end) {
  parser_error_.status = ParserStatus::kNoErrors;
  parser_error_.parsed_strings = 0;
  parser_error_.parsed_chars = 0;
  // Special case for an empty string on the input.
  if (begin == end) {
    scheme_.clear();
    return true;
  }
  // Temporary output string.
  std::string out;
  out.reserve(end - begin);
  // Checks the first character - must be an ASCII letter.
  auto it = begin;
  if (base::IsAsciiAlpha(*it)) {
    out.push_back(base::ToLowerASCII(*it));
  } else {
    parser_error_.status = ParserStatus::kInvalidScheme;
    return false;
  }
  // Checks the rest of characters.
  for (++it; it < end; ++it) {
    if (base::IsAsciiAlpha(*it) || base::IsAsciiDigit(*it) || *it == '+' ||
        *it == '-' || *it == '.') {
      out.push_back(base::ToLowerASCII(*it));
    } else {
      parser_error_.status = ParserStatus::kInvalidScheme;
      parser_error_.parsed_chars = it - begin;
      return false;
    }
  }
  // Success - save the Scheme.
  scheme_ = std::move(out);
  // If the current Port is unspecified and the new Scheme has default port
  // number, set the default port number.
  if (port_ == kPortUnspecified)
    port_ = Uri::GetDefaultPort(scheme_);
  return true;
}

bool Uri::Pim::ParseAuthority(const Iter& begin, const Iter& end) {
  // Parse and save Userinfo.
  Iter it = std::find(begin, end, '@');
  if (it != end) {
    if (!SaveUserinfo<true>(std::string(begin, it))) {
      parser_error_.parsed_chars += it - begin;
      return false;
    }
    ++it;  // to omit '@' character
  } else {
    it = begin;
  }
  // Parse and save Host.
  Iter it2 = std::find(it, end, ':');
  if (!SaveHost<true>(std::string(it, it2))) {
    parser_error_.parsed_chars += it - begin;
    return false;
  }
  // Parse and save Port.
  if (it2 != end) {
    ++it2;  // omit the ':' character
    if (!ParsePort(it2, end)) {
      parser_error_.parsed_chars += it2 - begin;
      return false;
    }
  }
  return true;
}

bool Uri::Pim::ParsePort(const Iter& begin, const Iter& end) {
  if (begin == end)
    return SavePort(kPortUnspecified);
  int number = 0;
  for (Iter it = begin; it < end; ++it) {
    if (!base::IsAsciiDigit(*it))
      return SavePort(kPortInvalid);
    number *= 10;
    number += *it - '0';
    if (number > kPortMaxNumber)
      return SavePort(kPortInvalid);
  }
  return SavePort(number);
}

bool Uri::Pim::ParsePath(const Iter& begin, const Iter& end) {
  // Path must be empty or start with '/'.
  if (begin < end && *begin != '/') {
    parser_error_.status = ParserStatus::kRelativePathsNotAllowed;
    parser_error_.parsed_chars = 0;
    parser_error_.parsed_strings = 0;
    return false;
  }
  // This holds Path's segments.
  std::vector<std::string> path;
  // This stores offset from begin of every segment.
  std::vector<size_t> strings_positions;
  // Parsing...
  for (Iter it1 = begin; it1 < end;) {
    if (++it1 == end)  // omit '/' character
      break;
    Iter it2 = std::find(it1, end, '/');
    path.push_back(std::string(it1, it2));
    strings_positions.push_back(it1 - begin);
    it1 = it2;
  }
  // Try to set the new Path and return true if succeed.
  if (SavePath<true>(path))
    return true;
  // An error occurred, adjust parser error fields set by SetPath(...).
  parser_error_.parsed_chars += strings_positions[parser_error_.parsed_strings];
  parser_error_.parsed_strings = 0;
  return false;
}

bool Uri::Pim::ParseQuery(const Iter& begin, const Iter& end) {
  // This holds pairs name=value.
  std::vector<std::pair<std::string, std::string>> query;
  // This stores offset from begin of every name and value.
  std::vector<size_t> strings_positions;
  // Parsing...
  for (Iter it = begin; it < end;) {
    Iter it_am = std::find(it, end, '&');
    Iter it_eq = std::find(it, it_am, '=');
    // Extract name.
    std::string name(it, it_eq);
    // Extract value.
    if (it_eq < it_am)  // to omit '=' character
      ++it_eq;
    std::string value(it_eq, it_am);
    // Save the pair (name,value).
    query.push_back(std::make_pair(std::move(name), std::move(value)));
    // Store the offset of the name.
    strings_positions.push_back(it - begin);
    // Store the offset of the value.
    strings_positions.push_back(it_eq - begin);
    // Move |it| to the beginning of the next pair.
    if (it_am < end)
      ++it_am;  // to omit '&' character
    it = it_am;
  }
  // Try to set the new Query and return true if succeed.
  if (SaveQuery<true>(query))
    return true;
  // An error occurred, adjust the |parser_error| set by SetQuery(...).
  parser_error_.parsed_chars += strings_positions[parser_error_.parsed_strings];
  parser_error_.parsed_strings = 0;
  return false;
}

bool Uri::Pim::ParseFragment(const Iter& begin, const Iter& end) {
  parser_error_.parsed_strings = 0;
  std::string out;
  if (!ParseString<true>(begin, end, &out))
    return false;
  fragment_ = std::move(out);
  return true;
}

bool Uri::Pim::ParseUri(const Iter& begin, const Iter end) {
  parser_error_.status = ParserStatus::kNoErrors;
  parser_error_.parsed_strings = 0;
  parser_error_.parsed_chars = 0;
  Iter it1 = begin;
  // The Scheme component starts from character different than slash ("/"),
  // question mark ("?"), and number sign ("#"). Non-empty Scheme must be
  // followed by the colon (":") character.
  if (it1 < end && *it1 != '/' && *it1 != '?' && *it1 != '#') {
    auto it2 = std::find(it1, end, ':');
    if (it2 == end) {
      parser_error_.status = ParserStatus::kInvalidScheme;
      return false;
    }
    if (!ParseScheme(it1, it2))
      return false;
    it1 = ++it2;
  }
  // The authority component is preceded by a double slash ("//") and is
  // terminated by the next slash ("/"), question mark ("?"), or number
  // sign ("#") character, or by the end of the URI.
  if (it1 < end && *it1 == '/') {
    ++it1;
    if (it1 < end && *it1 == '/') {
      ++it1;
      auto it_auth_end = FindFirstOf(it1, end, "/?#");
      if (!ParseAuthority(it1, it_auth_end)) {
        parser_error_.parsed_chars += it1 - begin;
        return false;
      }
      it1 = it_auth_end;
    } else {
      --it1;
    }
  }
  // The Path is terminated by the first question mark ("?") or number
  // sign ("#") character, or by the end of the URI.
  if (it1 < end) {
    auto it2 = FindFirstOf(it1, end, "?#");
    if (!ParsePath(it1, it2)) {
      parser_error_.parsed_chars += it1 - begin;
      return false;
    }
    it1 = it2;
  }
  // The Query component is indicated by the first question mark ("?")
  // character and terminated by a number sign ("#") character or by the end
  // of the URI.
  if (it1 < end && *it1 == '?') {
    ++it1;
    auto it2 = std::find(it1, end, '#');
    if (!ParseQuery(it1, it2)) {
      parser_error_.parsed_chars += it1 - begin;
      return false;
    }
    it1 = it2;
  }
  // A Fragment component is indicated by the presence of a number
  // sign ("#") character and terminated by the end of the URI.
  if (it1 < end) {
    DCHECK_EQ(*it1, '#');
    ++it1;  // to omit '#' character
    if (!ParseFragment(it1, end)) {
      parser_error_.parsed_chars += it1 - begin;
      return false;
    }
  }
  // Success!
  return true;
}

template bool Uri::Pim::ParseString<false, false>(const Iter& begin,
                                                  const Iter& end,
                                                  std::string* out,
                                                  bool plus_to_space);
template bool Uri::Pim::ParseString<false, true>(const Iter& begin,
                                                 const Iter& end,
                                                 std::string* out,
                                                 bool plus_to_space);
template bool Uri::Pim::ParseString<true, false>(const Iter& begin,
                                                 const Iter& end,
                                                 std::string* out,
                                                 bool plus_to_space);
template bool Uri::Pim::ParseString<true, true>(const Iter& begin,
                                                const Iter& end,
                                                std::string* out,
                                                bool plus_to_space);

template bool Uri::Pim::SaveUserinfo<false>(const std::string& val);
template bool Uri::Pim::SaveUserinfo<true>(const std::string& val);

template bool Uri::Pim::SaveHost<false>(const std::string& val);
template bool Uri::Pim::SaveHost<true>(const std::string& val);

template bool Uri::Pim::SavePath<false>(const std::vector<std::string>& val);
template bool Uri::Pim::SavePath<true>(const std::vector<std::string>& val);

template bool Uri::Pim::SaveQuery<false>(
    const std::vector<std::pair<std::string, std::string>>& val);
template bool Uri::Pim::SaveQuery<true>(
    const std::vector<std::pair<std::string, std::string>>& val);

template bool Uri::Pim::SaveFragment<false>(const std::string& val);
template bool Uri::Pim::SaveFragment<true>(const std::string& val);

}  // namespace chromeos