llvm/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h

//===-- PythonDataObjects.h--------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

//
// !! FIXME FIXME FIXME !!
//
// Python APIs nearly all can return an exception.   They do this
// by returning NULL, or -1, or some such value and setting
// the exception state with PyErr_Set*().   Exceptions must be
// handled before further python API functions are called.   Failure
// to do so will result in asserts on debug builds of python.
// It will also sometimes, but not usually result in crashes of
// release builds.
//
// Nearly all the code in this header does not handle python exceptions
// correctly.  It should all be converted to return Expected<> or
// Error types to capture the exception.
//
// Everything in this file except functions that return Error or
// Expected<> is considered deprecated and should not be
// used in new code.  If you need to use it, fix it first.
//
//
// TODOs for this file
//
// * Make all methods safe for exceptions.
//
// * Eliminate method signatures that must translate exceptions into
//   empty objects or NULLs.   Almost everything here should return
//   Expected<>.   It should be acceptable for certain operations that
//   can never fail to assert instead, such as the creation of
//   PythonString from a string literal.
//
// * Eliminate Reset(), and make all non-default constructors private.
//   Python objects should be created with Retain<> or Take<>, and they
//   should be assigned with operator=
//
// * Eliminate default constructors, make python objects always
//   nonnull, and use optionals where necessary.
//


#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H

#include "lldb/Host/Config.h"

#if LLDB_ENABLE_PYTHON

// LLDB Python header must be included first
#include "lldb-python.h"

#include "lldb/Host/File.h"
#include "lldb/Utility/StructuredData.h"

#include "llvm/ADT/ArrayRef.h"

namespace lldb_private {
namespace python {

class PythonObject;
class PythonBytes;
class PythonString;
class PythonList;
class PythonDictionary;
class PythonInteger;
class PythonException;

class GIL {
public:
  GIL() {
    m_state = PyGILState_Ensure();
    assert(!PyErr_Occurred());
  }
  ~GIL() { PyGILState_Release(m_state); }

protected:
  PyGILState_STATE m_state;
};

enum class PyObjectType {
  Unknown,
  None,
  Boolean,
  Integer,
  Dictionary,
  List,
  String,
  Bytes,
  ByteArray,
  Module,
  Callable,
  Tuple,
  File
};

enum class PyRefType {
  Borrowed, // We are not given ownership of the incoming PyObject.
            // We cannot safely hold it without calling Py_INCREF.
  Owned     // We have ownership of the incoming PyObject.  We should
            // not call Py_INCREF.
};


// Take a reference that you already own, and turn it into
// a PythonObject.
//
// Most python API methods will return a +1 reference
// if they succeed or NULL if and only if
// they set an exception.   Use this to collect such return
// values, after checking for NULL.
//
// If T is not just PythonObject, then obj must be already be
// checked to be of the correct type.
template <typename T> T Take(PyObject *obj) {
  assert(obj);
  assert(!PyErr_Occurred());
  T thing(PyRefType::Owned, obj);
  assert(thing.IsValid());
  return thing;
}

// Retain a reference you have borrowed, and turn it into
// a PythonObject.
//
// A minority of python APIs return a borrowed reference
// instead of a +1.   They will also return NULL if and only
// if they set an exception.   Use this to collect such return
// values, after checking for NULL.
//
// If T is not just PythonObject, then obj must be already be
// checked to be of the correct type.
template <typename T> T Retain(PyObject *obj) {
  assert(obj);
  assert(!PyErr_Occurred());
  T thing(PyRefType::Borrowed, obj);
  assert(thing.IsValid());
  return thing;
}

// This class can be used like a utility function to convert from
// a llvm-friendly Twine into a null-terminated const char *,
// which is the form python C APIs want their strings in.
//
// Example:
// const llvm::Twine &some_twine;
// PyFoo_Bar(x, y, z, NullTerminated(some_twine));
//
// Why a class instead of a function?  If the twine isn't already null
// terminated, it will need a temporary buffer to copy the string
// into.   We need that buffer to stick around for the lifetime of the
// statement.
class NullTerminated {
  const char *str;
  llvm::SmallString<32> storage;

public:
  NullTerminated(const llvm::Twine &twine) {
    llvm::StringRef ref = twine.toNullTerminatedStringRef(storage);
    str = ref.begin();
  }
  operator const char *() { return str; }
};

inline llvm::Error nullDeref() {
  return llvm::createStringError(llvm::inconvertibleErrorCode(),
                                 "A NULL PyObject* was dereferenced");
}

inline llvm::Error exception(const char *s = nullptr) {
  return llvm::make_error<PythonException>(s);
}

inline llvm::Error keyError() {
  return llvm::createStringError(llvm::inconvertibleErrorCode(),
                                 "key not in dict");
}

inline const char *py2_const_cast(const char *s) { return s; }

enum class PyInitialValue { Invalid, Empty };

// DOC: https://docs.python.org/3/c-api/arg.html#building-values
template <typename T, typename Enable = void> struct PythonFormat;

template <typename T, char F> struct PassthroughFormat {
  static constexpr char format = F;
  static constexpr T get(T t) { return t; }
};

template <> struct PythonFormat<char *> : PassthroughFormat<char *, 's'> {};
template <> struct PythonFormat<const char *> : 
    PassthroughFormat<const char *, 's'> {};
template <> struct PythonFormat<char> : PassthroughFormat<char, 'b'> {};
template <>
struct PythonFormat<unsigned char> : PassthroughFormat<unsigned char, 'B'> {};
template <> struct PythonFormat<short> : PassthroughFormat<short, 'h'> {};
template <>
struct PythonFormat<unsigned short> : PassthroughFormat<unsigned short, 'H'> {};
template <> struct PythonFormat<int> : PassthroughFormat<int, 'i'> {};
template <> struct PythonFormat<bool> : PassthroughFormat<bool, 'p'> {};
template <>
struct PythonFormat<unsigned int> : PassthroughFormat<unsigned int, 'I'> {};
template <> struct PythonFormat<long> : PassthroughFormat<long, 'l'> {};
template <>
struct PythonFormat<unsigned long> : PassthroughFormat<unsigned long, 'k'> {};
template <>
struct PythonFormat<long long> : PassthroughFormat<long long, 'L'> {};
template <>
struct PythonFormat<unsigned long long>
    : PassthroughFormat<unsigned long long, 'K'> {};
template <>
struct PythonFormat<PyObject *> : PassthroughFormat<PyObject *, 'O'> {};

template <typename T>
struct PythonFormat<
    T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> {
  static constexpr char format = 'O';
  static auto get(const T &value) { return value.get(); }
};

class PythonObject {
public:
  PythonObject() = default;

  PythonObject(PyRefType type, PyObject *py_obj) {
    m_py_obj = py_obj;
    // If this is a borrowed reference, we need to convert it to
    // an owned reference by incrementing it.  If it is an owned
    // reference (for example the caller allocated it with PyDict_New()
    // then we must *not* increment it.
    if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed)
      Py_XINCREF(m_py_obj);
  }

  PythonObject(const PythonObject &rhs)
      : PythonObject(PyRefType::Borrowed, rhs.m_py_obj) {}

  PythonObject(PythonObject &&rhs) {
    m_py_obj = rhs.m_py_obj;
    rhs.m_py_obj = nullptr;
  }

  ~PythonObject() { Reset(); }

  void Reset();

  void Dump() const {
    if (m_py_obj)
      _PyObject_Dump(m_py_obj);
    else
      puts("NULL");
  }

  void Dump(Stream &strm) const;

  PyObject *get() const { return m_py_obj; }

  PyObject *release() {
    PyObject *result = m_py_obj;
    m_py_obj = nullptr;
    return result;
  }

  PythonObject &operator=(PythonObject other) {
    Reset();
    m_py_obj = std::exchange(other.m_py_obj, nullptr);
    return *this;
  }

  PyObjectType GetObjectType() const;

  PythonString Repr() const;

  PythonString Str() const;

  static PythonObject ResolveNameWithDictionary(llvm::StringRef name,
                                                const PythonDictionary &dict);

  template <typename T>
  static T ResolveNameWithDictionary(llvm::StringRef name,
                                     const PythonDictionary &dict) {
    return ResolveNameWithDictionary(name, dict).AsType<T>();
  }

  PythonObject ResolveName(llvm::StringRef name) const;

  template <typename T> T ResolveName(llvm::StringRef name) const {
    return ResolveName(name).AsType<T>();
  }

  bool HasAttribute(llvm::StringRef attribute) const;

  PythonObject GetAttributeValue(llvm::StringRef attribute) const;

  bool IsNone() const { return m_py_obj == Py_None; }

  bool IsValid() const { return m_py_obj != nullptr; }

  bool IsAllocated() const { return IsValid() && !IsNone(); }

  explicit operator bool() const { return IsValid() && !IsNone(); }

  template <typename T> T AsType() const {
    if (!T::Check(m_py_obj))
      return T();
    return T(PyRefType::Borrowed, m_py_obj);
  }

  StructuredData::ObjectSP CreateStructuredObject() const;

  template <typename... T>
  llvm::Expected<PythonObject> CallMethod(const char *name,
                                          const T &... t) const {
    const char format[] = {'(', PythonFormat<T>::format..., ')', 0};
    PyObject *obj =
        PyObject_CallMethod(m_py_obj, py2_const_cast(name),
                            py2_const_cast(format), PythonFormat<T>::get(t)...);
    if (!obj)
      return exception();
    return python::Take<PythonObject>(obj);
  }

  template <typename... T>
  llvm::Expected<PythonObject> Call(const T &... t) const {
    const char format[] = {'(', PythonFormat<T>::format..., ')', 0};
    PyObject *obj = PyObject_CallFunction(m_py_obj, py2_const_cast(format),
                                          PythonFormat<T>::get(t)...);
    if (!obj)
      return exception();
    return python::Take<PythonObject>(obj);
  }

  llvm::Expected<PythonObject> GetAttribute(const llvm::Twine &name) const {
    if (!m_py_obj)
      return nullDeref();
    PyObject *obj = PyObject_GetAttrString(m_py_obj, NullTerminated(name));
    if (!obj)
      return exception();
    return python::Take<PythonObject>(obj);
  }

  llvm::Expected<PythonObject> GetType() const {
    if (!m_py_obj)
      return nullDeref();
    PyObject *obj = PyObject_Type(m_py_obj);
    if (!obj)
      return exception();
    return python::Take<PythonObject>(obj);
  }

  llvm::Expected<bool> IsTrue() {
    if (!m_py_obj)
      return nullDeref();
    int r = PyObject_IsTrue(m_py_obj);
    if (r < 0)
      return exception();
    return !!r;
  }

  llvm::Expected<long long> AsLongLong() const;

  llvm::Expected<unsigned long long> AsUnsignedLongLong() const;

  // wraps on overflow, instead of raising an error.
  llvm::Expected<unsigned long long> AsModuloUnsignedLongLong() const;

  llvm::Expected<bool> IsInstance(const PythonObject &cls) {
    if (!m_py_obj || !cls.IsValid())
      return nullDeref();
    int r = PyObject_IsInstance(m_py_obj, cls.get());
    if (r < 0)
      return exception();
    return !!r;
  }

protected:
  PyObject *m_py_obj = nullptr;
};


// This is why C++ needs monads.
template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) {
  if (!obj)
    return obj.takeError();
  if (!T::Check(obj.get().get()))
    return llvm::createStringError(llvm::inconvertibleErrorCode(),
                                   "type error");
  return T(PyRefType::Borrowed, std::move(obj.get().get()));
}

template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj);

template <>
llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj);

template <>
llvm::Expected<unsigned long long>
As<unsigned long long>(llvm::Expected<PythonObject> &&obj);

template <>
llvm::Expected<std::string> As<std::string>(llvm::Expected<PythonObject> &&obj);


template <class T> class TypedPythonObject : public PythonObject {
public:
  TypedPythonObject(PyRefType type, PyObject *py_obj) {
    if (!py_obj)
      return;
    if (T::Check(py_obj))
      PythonObject::operator=(PythonObject(type, py_obj));
    else if (type == PyRefType::Owned)
      Py_DECREF(py_obj);
  }

  TypedPythonObject() = default;
};

class PythonBytes : public TypedPythonObject<PythonBytes> {
public:
  using TypedPythonObject::TypedPythonObject;
  explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes);
  PythonBytes(const uint8_t *bytes, size_t length);

  static bool Check(PyObject *py_obj);

  llvm::ArrayRef<uint8_t> GetBytes() const;

  size_t GetSize() const;

  void SetBytes(llvm::ArrayRef<uint8_t> stringbytes);

  StructuredData::StringSP CreateStructuredString() const;
};

class PythonByteArray : public TypedPythonObject<PythonByteArray> {
public:
  using TypedPythonObject::TypedPythonObject;
  explicit PythonByteArray(llvm::ArrayRef<uint8_t> bytes);
  PythonByteArray(const uint8_t *bytes, size_t length);
  PythonByteArray(const PythonBytes &object);

  static bool Check(PyObject *py_obj);

  llvm::ArrayRef<uint8_t> GetBytes() const;

  size_t GetSize() const;

  void SetBytes(llvm::ArrayRef<uint8_t> stringbytes);

  StructuredData::StringSP CreateStructuredString() const;
};

class PythonString : public TypedPythonObject<PythonString> {
public:
  using TypedPythonObject::TypedPythonObject;
  static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string);

  PythonString() : TypedPythonObject() {} // MSVC requires this for some reason

  explicit PythonString(llvm::StringRef string); // safe, null on error

  static bool Check(PyObject *py_obj);

  llvm::StringRef GetString() const; // safe, empty string on error

  llvm::Expected<llvm::StringRef> AsUTF8() const;

  size_t GetSize() const;

  void SetString(llvm::StringRef string); // safe, null on error

  StructuredData::StringSP CreateStructuredString() const;
};

class PythonInteger : public TypedPythonObject<PythonInteger> {
public:
  using TypedPythonObject::TypedPythonObject;

  PythonInteger() : TypedPythonObject() {} // MSVC requires this for some reason

  explicit PythonInteger(int64_t value);

  static bool Check(PyObject *py_obj);

  void SetInteger(int64_t value);

  StructuredData::IntegerSP CreateStructuredInteger() const;

  StructuredData::UnsignedIntegerSP CreateStructuredUnsignedInteger() const;

  StructuredData::SignedIntegerSP CreateStructuredSignedInteger() const;
};

class PythonBoolean : public TypedPythonObject<PythonBoolean> {
public:
  using TypedPythonObject::TypedPythonObject;

  explicit PythonBoolean(bool value);

  static bool Check(PyObject *py_obj);

  bool GetValue() const;

  void SetValue(bool value);

  StructuredData::BooleanSP CreateStructuredBoolean() const;
};

class PythonList : public TypedPythonObject<PythonList> {
public:
  using TypedPythonObject::TypedPythonObject;

  PythonList() : TypedPythonObject() {} // MSVC requires this for some reason

  explicit PythonList(PyInitialValue value);
  explicit PythonList(int list_size);

  static bool Check(PyObject *py_obj);

  uint32_t GetSize() const;

  PythonObject GetItemAtIndex(uint32_t index) const;

  void SetItemAtIndex(uint32_t index, const PythonObject &object);

  void AppendItem(const PythonObject &object);

  StructuredData::ArraySP CreateStructuredArray() const;
};

class PythonTuple : public TypedPythonObject<PythonTuple> {
public:
  using TypedPythonObject::TypedPythonObject;

  explicit PythonTuple(PyInitialValue value);
  explicit PythonTuple(int tuple_size);
  PythonTuple(std::initializer_list<PythonObject> objects);
  PythonTuple(std::initializer_list<PyObject *> objects);

  static bool Check(PyObject *py_obj);

  uint32_t GetSize() const;

  PythonObject GetItemAtIndex(uint32_t index) const;

  void SetItemAtIndex(uint32_t index, const PythonObject &object);

  StructuredData::ArraySP CreateStructuredArray() const;
};

class PythonDictionary : public TypedPythonObject<PythonDictionary> {
public:
  using TypedPythonObject::TypedPythonObject;

  PythonDictionary() : TypedPythonObject() {} // MSVC requires this for some reason

  explicit PythonDictionary(PyInitialValue value);

  static bool Check(PyObject *py_obj);

  bool HasKey(const llvm::Twine &key) const;

  uint32_t GetSize() const;

  PythonList GetKeys() const;

  PythonObject GetItemForKey(const PythonObject &key) const; // DEPRECATED
  void SetItemForKey(const PythonObject &key,
                     const PythonObject &value); // DEPRECATED

  llvm::Expected<PythonObject> GetItem(const PythonObject &key) const;
  llvm::Expected<PythonObject> GetItem(const llvm::Twine &key) const;
  llvm::Error SetItem(const PythonObject &key, const PythonObject &value) const;
  llvm::Error SetItem(const llvm::Twine &key, const PythonObject &value) const;

  StructuredData::DictionarySP CreateStructuredDictionary() const;
};

class PythonModule : public TypedPythonObject<PythonModule> {
public:
  using TypedPythonObject::TypedPythonObject;

  static bool Check(PyObject *py_obj);

  static PythonModule BuiltinsModule();

  static PythonModule MainModule();

  static PythonModule AddModule(llvm::StringRef module);

  // safe, returns invalid on error;
  static PythonModule ImportModule(llvm::StringRef name) {
    std::string s = std::string(name);
    auto mod = Import(s.c_str());
    if (!mod) {
      llvm::consumeError(mod.takeError());
      return PythonModule();
    }
    return std::move(mod.get());
  }

  static llvm::Expected<PythonModule> Import(const llvm::Twine &name);

  llvm::Expected<PythonObject> Get(const llvm::Twine &name);

  PythonDictionary GetDictionary() const;
};

class PythonCallable : public TypedPythonObject<PythonCallable> {
public:
  using TypedPythonObject::TypedPythonObject;

  struct ArgInfo {
    /* the largest number of positional arguments this callable
     * can accept, or UNBOUNDED, ie UINT_MAX if it's a varargs
     * function and can accept an arbitrary number */
    unsigned max_positional_args;
    static constexpr unsigned UNBOUNDED = UINT_MAX; // FIXME c++17 inline
  };

  static bool Check(PyObject *py_obj);

  llvm::Expected<ArgInfo> GetArgInfo() const;

  PythonObject operator()();

  PythonObject operator()(std::initializer_list<PyObject *> args);

  PythonObject operator()(std::initializer_list<PythonObject> args);

  template <typename Arg, typename... Args>
  PythonObject operator()(const Arg &arg, Args... args) {
    return operator()({arg, args...});
  }
};

class PythonFile : public TypedPythonObject<PythonFile> {
public:
  using TypedPythonObject::TypedPythonObject;

  PythonFile() : TypedPythonObject() {} // MSVC requires this for some reason

  static bool Check(PyObject *py_obj);

  static llvm::Expected<PythonFile> FromFile(File &file,
                                             const char *mode = nullptr);

  llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false);
  llvm::Expected<lldb::FileSP>
  ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false);
};

class PythonException : public llvm::ErrorInfo<PythonException> {
private:
  PyObject *m_exception_type, *m_exception, *m_traceback;
  PyObject *m_repr_bytes;

public:
  static char ID;
  const char *toCString() const;
  PythonException(const char *caller = nullptr);
  void Restore();
  ~PythonException() override;
  void log(llvm::raw_ostream &OS) const override;
  std::error_code convertToErrorCode() const override;
  bool Matches(PyObject *exc) const;
  std::string ReadBacktrace() const;
};

// This extracts the underlying T out of an Expected<T> and returns it.
// If the Expected is an Error instead of a T, that error will be converted
// into a python exception, and this will return a default-constructed T.
//
// This is appropriate for use right at the boundary of python calling into
// C++, such as in a SWIG typemap.   In such a context you should simply
// check if the returned T is valid, and if it is, return a NULL back
// to python.   This will result in the Error being raised as an exception
// from python code's point of view.
//
// For example:
// ```
// Expected<Foo *> efoop = some_cpp_function();
// Foo *foop = unwrapOrSetPythonException(efoop);
// if (!foop)
//    return NULL;
// do_something(*foop);
//
// If the Error returned was itself created because a python exception was
// raised when C++ code called into python, then the original exception
// will be restored.   Otherwise a simple string exception will be raised.
template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) {
  if (expected)
    return expected.get();
  llvm::handleAllErrors(
      expected.takeError(), [](PythonException &E) { E.Restore(); },
      [](const llvm::ErrorInfoBase &E) {
        PyErr_SetString(PyExc_Exception, E.message().c_str());
      });
  return T();
}

// This is only here to help incrementally migrate old, exception-unsafe
// code.
template <typename T> T unwrapIgnoringErrors(llvm::Expected<T> expected) {
  if (expected)
    return std::move(expected.get());
  llvm::consumeError(expected.takeError());
  return T();
}

llvm::Expected<PythonObject> runStringOneLine(const llvm::Twine &string,
                                              const PythonDictionary &globals,
                                              const PythonDictionary &locals);

llvm::Expected<PythonObject> runStringMultiLine(const llvm::Twine &string,
                                                const PythonDictionary &globals,
                                                const PythonDictionary &locals);

// Sometimes the best way to interact with a python interpreter is
// to run some python code.   You construct a PythonScript with
// script string.   The script assigns some function to `_function_`
// and you get a C++ callable object that calls the python function.
//
// Example:
//
// const char script[] = R"(
// def main(x, y):
//    ....
// )";
//
// Expected<PythonObject> cpp_foo_wrapper(PythonObject x, PythonObject y) {
//   // no need to synchronize access to this global, we already have the GIL
//   static PythonScript foo(script)
//   return  foo(x, y);
// }
class PythonScript {
  const char *script;
  PythonCallable function;

  llvm::Error Init();

public:
  PythonScript(const char *script) : script(script), function() {}

  template <typename... Args>
  llvm::Expected<PythonObject> operator()(Args &&... args) {
    if (llvm::Error error = Init())
      return std::move(error);
    return function.Call(std::forward<Args>(args)...);
  }
};

class StructuredPythonObject : public StructuredData::Generic {
public:
  StructuredPythonObject() : StructuredData::Generic() {}

  // Take ownership of the object we received.
  StructuredPythonObject(PythonObject obj)
      : StructuredData::Generic(obj.release()) {}

  ~StructuredPythonObject() override {
    // Hand ownership back to a (temporary) PythonObject instance and let it
    // take care of releasing it.
    PythonObject(PyRefType::Owned, static_cast<PyObject *>(GetValue()));
  }

  bool IsValid() const override { return GetValue() && GetValue() != Py_None; }

  void Serialize(llvm::json::OStream &s) const override;

private:
  StructuredPythonObject(const StructuredPythonObject &) = delete;
  const StructuredPythonObject &
  operator=(const StructuredPythonObject &) = delete;
};

} // namespace python
} // namespace lldb_private

#endif

#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H