llvm/flang/lib/Parser/source.cpp

//===-- lib/Parser/source.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "flang/Parser/source.h"
#include "flang/Common/idioms.h"
#include "flang/Parser/char-buffer.h"
#include "flang/Parser/characters.h"
#include "llvm/Support/Errno.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstring>
#include <memory>
#include <string>
#include <vector>

namespace Fortran::parser {

SourceFile::~SourceFile() { Close(); }

void SourceFile::RecordLineStarts() {
  if (std::size_t chars{bytes()}; chars > 0) {
    origins_.emplace(1, SourcePositionOrigin{path_, 1});
    const char *source{content().data()};
    CHECK(source[chars - 1] == '\n' && "missing ultimate newline");
    std::size_t at{0};
    do { // "at" is always at the beginning of a source line
      lineStart_.push_back(at);
      at = reinterpret_cast<const char *>(
               std::memchr(source + at, '\n', chars - at)) -
          source + 1;
    } while (at < chars);
    CHECK(at == chars);
    lineStart_.shrink_to_fit();
  }
}

// Check for a Unicode byte order mark (BOM).
// Module files all have one; so can source files.
void SourceFile::IdentifyPayload() {
  llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
  constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
  if (content.starts_with(UTF8_BOM)) {
    bom_end_ = UTF8_BOM.size();
    encoding_ = Encoding::UTF_8;
  }
}

std::string DirectoryName(std::string path) {
  llvm::SmallString<128> pathBuf{path};
  llvm::sys::path::remove_filename(pathBuf);
  return pathBuf.str().str();
}

std::optional<std::string> LocateSourceFile(
    std::string name, const std::list<std::string> &searchPath) {
  if (name == "-" || llvm::sys::path::is_absolute(name)) {
    return name;
  }
  for (const std::string &dir : searchPath) {
    llvm::SmallString<128> path{dir};
    llvm::sys::path::append(path, name);
    bool isDir{false};
    auto er = llvm::sys::fs::is_directory(path, isDir);
    if (!er && !isDir) {
      return path.str().str();
    }
  }
  return std::nullopt;
}

std::vector<std::string> LocateSourceFileAll(
    std::string name, const std::vector<std::string> &searchPath) {
  if (name == "-" || llvm::sys::path::is_absolute(name)) {
    return {name};
  }
  std::vector<std::string> result;
  for (const std::string &dir : searchPath) {
    llvm::SmallString<128> path{dir};
    llvm::sys::path::append(path, name);
    bool isDir{false};
    auto er = llvm::sys::fs::is_directory(path, isDir);
    if (!er && !isDir) {
      result.emplace_back(path.str().str());
    }
  }
  return result;
}

std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
  std::size_t wrote{0};
  char *buffer{buf.data()};
  char *p{buf.data()};
  std::size_t bytes = buf.size();
  while (bytes > 0) {
    void *vp{static_cast<void *>(p)};
    void *crvp{std::memchr(vp, '\r', bytes)};
    char *crcp{static_cast<char *>(crvp)};
    if (!crcp) {
      std::memmove(buffer + wrote, p, bytes);
      wrote += bytes;
      break;
    }
    std::size_t chunk = crcp - p;
    auto advance{chunk + 1};
    if (chunk + 1 >= bytes || crcp[1] == '\n') {
      // CR followed by LF or EOF: omit
    } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
      // CR preceded by LF or BOF: omit
    } else {
      // CR in line: retain
      ++chunk;
    }
    std::memmove(buffer + wrote, p, chunk);
    wrote += chunk;
    p += advance;
    bytes -= advance;
  }
  return wrote;
}

bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
  Close();
  path_ = path;
  std::string errorPath{"'"s + path_ + "'"};
  auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
  if (!bufOr) {
    auto err = bufOr.getError();
    error << "Could not open " << errorPath << ": " << err.message();
    return false;
  }
  buf_ = std::move(bufOr.get());
  ReadFile();
  return true;
}

bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
  Close();
  path_ = "standard input";
  auto buf_or = llvm::MemoryBuffer::getSTDIN();
  if (!buf_or) {
    auto err = buf_or.getError();
    error << err.message();
    return false;
  }
  auto inbuf = std::move(buf_or.get());
  buf_ =
      llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
  llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
  ReadFile();
  return true;
}

void SourceFile::ReadFile() {
  buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
  if (content().size() == 0 || content().back() != '\n') {
    // Don't bother to copy if we have spare memory
    if (content().size() >= buf_->getBufferSize()) {
      auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
          content().size() + 1)};
      llvm::copy(content(), tmp_buf->getBufferStart());
      buf_ = std::move(tmp_buf);
    }
    buf_end_++;
    buf_->getBuffer()[buf_end_ - 1] = '\n';
  }
  IdentifyPayload();
  RecordLineStarts();
}

void SourceFile::Close() {
  path_.clear();
  buf_.reset();
  distinctPaths_.clear();
  origins_.clear();
}

SourcePosition SourceFile::GetSourcePosition(std::size_t at) const {
  CHECK(at < bytes());
  auto it{llvm::upper_bound(lineStart_, at)};
  auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1};
  auto ub{origins_.upper_bound(trueLineNumber)};
  auto column{static_cast<int>(at - lineStart_[trueLineNumber - 1] + 1)};
  if (ub == origins_.begin()) {
    return {*this, path_, static_cast<int>(trueLineNumber), column,
        static_cast<int>(trueLineNumber)};
  } else {
    --ub;
    const SourcePositionOrigin &origin{ub->second};
    auto lineNumber{
        trueLineNumber - ub->first + static_cast<std::size_t>(origin.line)};
    return {*this, origin.path, static_cast<int>(lineNumber), column,
        static_cast<int>(trueLineNumber)};
  }
}

const std::string &SourceFile::SavePath(std::string &&path) {
  return *distinctPaths_.emplace(std::move(path)).first;
}

void SourceFile::LineDirective(
    int trueLineNumber, const std::string &path, int lineNumber) {
  origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber});
}

llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const {
  o << "SourceFile '" << path_ << "'\n";
  for (const auto &[at, spo] : origins_) {
    o << "  origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n';
  }
  return o;
}
} // namespace Fortran::parser