llvm/flang/include/flang/Parser/provenance.h

//===-- include/flang/Parser/provenance.h -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_PARSER_PROVENANCE_H_
#define FORTRAN_PARSER_PROVENANCE_H_

#include "char-block.h"
#include "char-buffer.h"
#include "characters.h"
#include "source.h"
#include "flang/Common/idioms.h"
#include "flang/Common/interval.h"
#include "llvm/Support/raw_ostream.h"
#include <cstddef>
#include <list>
#include <map>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <variant>
#include <vector>

namespace Fortran::parser {

// Each character in the contiguous source stream built by the
// prescanner corresponds to a particular character in a source file,
// include file, macro expansion, or compiler-inserted text.
// The location of this original character to which a parsable character
// corresponds is its provenance.
//
// Provenances are offsets into an (unmaterialized) marshaling of the
// entire contents of all the original source files, include files, macro
// expansions, &c. for each visit to each source.  These origins of the
// original source characters constitute a forest whose roots are
// the original source files named on the compiler's command line.
// Given a Provenance, we can find the tree node that contains it in time
// O(log(# of origins)), and describe the position precisely by walking
// up the tree.  (It would be possible, via a time/space trade-off, to
// cap the time by the use of an intermediate table that would be indexed
// by the upper bits of an offset, but that does not appear to be
// necessary.)

class AllSources;
class AllCookedSources;

class Provenance {
public:
  Provenance() {}
  Provenance(std::size_t offset) : offset_{offset} { CHECK(offset > 0); }
  Provenance(const Provenance &that) = default;
  Provenance(Provenance &&that) = default;
  Provenance &operator=(const Provenance &that) = default;
  Provenance &operator=(Provenance &&that) = default;

  std::size_t offset() const { return offset_; }

  Provenance operator+(ptrdiff_t n) const {
    CHECK(n > -static_cast<ptrdiff_t>(offset_));
    return {offset_ + static_cast<std::size_t>(n)};
  }
  Provenance operator+(std::size_t n) const { return {offset_ + n}; }
  std::size_t operator-(Provenance that) const {
    CHECK(that <= *this);
    return offset_ - that.offset_;
  }
  bool operator<(Provenance that) const { return offset_ < that.offset_; }
  bool operator<=(Provenance that) const { return !(that < *this); }
  bool operator==(Provenance that) const { return offset_ == that.offset_; }
  bool operator!=(Provenance that) const { return !(*this == that); }

private:
  std::size_t offset_{0};
};

using ProvenanceRange = common::Interval<Provenance>;

// Maps contiguous ranges of byte offsets in original source files to
// contiguous ranges in the cooked character stream; essentially a
// partial inversion of OffsetToProvenanceMappings (below).
// Used for implementing the first step of mapping an identifier
// selected in a code editor to one of its declarative statements.
class ProvenanceRangeToOffsetMappings {
public:
  ProvenanceRangeToOffsetMappings();
  ~ProvenanceRangeToOffsetMappings();
  bool empty() const { return map_.empty(); }
  void Put(ProvenanceRange, std::size_t offset);
  std::optional<std::size_t> Map(ProvenanceRange) const;
  llvm::raw_ostream &Dump(llvm::raw_ostream &) const;

private:
  // A comparison function object for use in std::multimap<Compare=>.
  // Intersecting intervals will effectively compare equal, not being
  // either < nor >= each other.
  struct WhollyPrecedes {
    bool operator()(ProvenanceRange, ProvenanceRange) const;
  };

  std::multimap<ProvenanceRange, std::size_t, WhollyPrecedes> map_;
};

// Maps 0-based local offsets in some contiguous range (e.g., a token
// sequence) to their provenances.  Lookup time is on the order of
// O(log(#of intervals with contiguous provenances)).  As mentioned
// above, this time could be capped via a time/space trade-off.
class OffsetToProvenanceMappings {
public:
  OffsetToProvenanceMappings() {}
  void clear();
  void swap(OffsetToProvenanceMappings &);
  void shrink_to_fit();
  std::size_t SizeInBytes() const;
  void Put(ProvenanceRange);
  void Put(const OffsetToProvenanceMappings &);
  ProvenanceRange Map(std::size_t at) const;
  void RemoveLastBytes(std::size_t);
  ProvenanceRangeToOffsetMappings Invert(const AllSources &) const;
  llvm::raw_ostream &Dump(llvm::raw_ostream &) const;

private:
  struct ContiguousProvenanceMapping {
    std::size_t start;
    ProvenanceRange range;
  };

  // Elements appear in ascending order of distinct .start values;
  // their .range values are disjoint and not necessarily adjacent.
  std::vector<ContiguousProvenanceMapping> provenanceMap_;
};

// A singleton AllSources instance for the whole compilation
// is shared by reference.
class AllSources {
public:
  AllSources();
  ~AllSources();

  std::size_t size() const { return range_.size(); }
  const char &operator[](Provenance) const;
  Encoding encoding() const { return encoding_; }
  AllSources &set_encoding(Encoding e) {
    encoding_ = e;
    return *this;
  }

  void ClearSearchPath();
  void AppendSearchPathDirectory(std::string); // new last directory
  const SourceFile *OpenPath(std::string path, llvm::raw_ostream &error);
  const SourceFile *Open(std::string path, llvm::raw_ostream &error,
      std::optional<std::string> &&prependPath = std::nullopt);
  const SourceFile *ReadStandardInput(llvm::raw_ostream &error);

  ProvenanceRange AddIncludedFile(
      const SourceFile &, ProvenanceRange, bool isModule = false);
  ProvenanceRange AddMacroCall(
      ProvenanceRange def, ProvenanceRange use, const std::string &expansion);
  ProvenanceRange AddCompilerInsertion(std::string);

  // If provenance is in an expanded macro, return the starting provenance of
  // the replaced macro. Otherwise, return the input provenance.
  Provenance GetReplacedProvenance(Provenance) const;

  bool IsValid(Provenance at) const { return range_.Contains(at); }
  bool IsValid(ProvenanceRange range) const {
    return range.size() > 0 && range_.Contains(range);
  }
  void setShowColors(bool showColors) { showColors_ = showColors; }
  bool getShowColors() const { return showColors_; }
  std::optional<ProvenanceRange> GetInclusionInfo(
      const std::optional<ProvenanceRange> &) const;
  void EmitMessage(llvm::raw_ostream &, const std::optional<ProvenanceRange> &,
      const std::string &message, const std::string &prefix,
      llvm::raw_ostream::Colors color, bool echoSourceLine = false) const;
  const SourceFile *GetSourceFile(
      Provenance, std::size_t *offset = nullptr, bool topLevel = false) const;
  const char *GetSource(ProvenanceRange) const;
  std::optional<SourcePosition> GetSourcePosition(Provenance) const;
  std::optional<ProvenanceRange> GetFirstFileProvenance() const;
  std::string GetPath(Provenance, bool topLevel = false) const; // __FILE__
  int GetLineNumber(Provenance) const; // __LINE__
  Provenance CompilerInsertionProvenance(char ch);
  ProvenanceRange IntersectionWithSourceFiles(ProvenanceRange) const;
  llvm::raw_ostream &Dump(llvm::raw_ostream &) const;

private:
  struct Inclusion {
    const SourceFile &source;
    bool isModule{false};
  };
  struct Macro {
    ProvenanceRange definition;
    std::string expansion;
  };
  struct CompilerInsertion {
    std::string text;
  };

  struct Origin {
    Origin(ProvenanceRange, const SourceFile &);
    Origin(ProvenanceRange, const SourceFile &, ProvenanceRange,
        bool isModule = false);
    Origin(ProvenanceRange, ProvenanceRange def, ProvenanceRange use,
        const std::string &expansion);
    Origin(ProvenanceRange, const std::string &);

    const char &operator[](std::size_t) const;

    std::variant<Inclusion, Macro, CompilerInsertion> u;
    ProvenanceRange covers, replaces;
  };

  const Origin &MapToOrigin(Provenance) const;

  // Elements are in ascending & contiguous order of .covers.
  std::vector<Origin> origin_;
  ProvenanceRange range_;
  std::map<char, Provenance> compilerInsertionProvenance_;
  std::vector<std::unique_ptr<SourceFile>> ownedSourceFiles_;
  std::list<std::string> searchPath_;
  Encoding encoding_{Encoding::UTF_8};
  bool showColors_{false};
};

// Represents the result of preprocessing and prescanning a single source
// file (and all its inclusions) or module file.  Parsers operate within
// single instances of CookedSource.
class CookedSource {
public:
  explicit CookedSource(AllSources &allSources) : allSources_{allSources} {};

  int number() const { return number_; }
  void set_number(int n) { number_ = n; }

  CharBlock AsCharBlock() const { return CharBlock{data_}; }
  std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const;
  std::optional<CharBlock> GetCharBlock(ProvenanceRange) const;

  // The result of a Put() is the offset that the new data
  // will have in the eventually marshaled contiguous buffer.
  std::size_t Put(const char *data, std::size_t bytes) {
    return buffer_.Put(data, bytes);
  }
  std::size_t Put(const std::string &s) { return buffer_.Put(s); }
  std::size_t Put(char ch) { return buffer_.Put(&ch, 1); }
  std::size_t Put(char ch, Provenance p) {
    provenanceMap_.Put(ProvenanceRange{p, 1});
    return buffer_.Put(&ch, 1);
  }

  void PutProvenance(Provenance p) { provenanceMap_.Put(ProvenanceRange{p}); }
  void PutProvenance(ProvenanceRange pr) { provenanceMap_.Put(pr); }
  void PutProvenanceMappings(const OffsetToProvenanceMappings &pm) {
    provenanceMap_.Put(pm);
  }

  void MarkPossibleFixedFormContinuation() {
    possibleFixedFormContinuations_.push_back(BufferedBytes());
  }

  std::size_t BufferedBytes() const;
  void Marshal(AllCookedSources &); // marshals text into one contiguous block
  void CompileProvenanceRangeToOffsetMappings(AllSources &);
  llvm::raw_ostream &Dump(llvm::raw_ostream &) const;

private:
  AllSources &allSources_;
  int number_{0}; // for sorting purposes
  CharBuffer buffer_; // before Marshal()
  std::string data_; // all of it, prescanned and preprocessed
  OffsetToProvenanceMappings provenanceMap_;
  ProvenanceRangeToOffsetMappings invertedMap_;
  std::list<std::size_t> possibleFixedFormContinuations_;
};

class AllCookedSources {
public:
  explicit AllCookedSources(AllSources &);
  ~AllCookedSources();

  AllSources &allSources() { return allSources_; }
  const AllSources &allSources() const { return allSources_; }

  CookedSource &NewCookedSource();

  const CookedSource *Find(CharBlock) const;
  const CookedSource *Find(const char *p) const { return Find(CharBlock{p}); }

  bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); }

  std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const;
  std::optional<CharBlock> GetCharBlockFromLineAndColumns(
      int line, int startColumn, int endColumn) const;
  std::optional<std::pair<SourcePosition, SourcePosition>>
      GetSourcePositionRange(CharBlock) const;
  std::optional<CharBlock> GetCharBlock(ProvenanceRange) const;
  void Dump(llvm::raw_ostream &) const;

  // For sorting symbol names without being dependent on pointer values
  bool Precedes(CharBlock, CharBlock) const;

  // Once a CookedSource is complete, add it to index_ and assign its number_
  void Register(CookedSource &);

private:
  AllSources &allSources_;
  std::list<CookedSource> cooked_; // owns all CookedSource instances
  std::map<CharBlock, const CookedSource &, CharBlockPointerComparator> index_;
};

} // namespace Fortran::parser
#endif // FORTRAN_PARSER_PROVENANCE_H_