folly/folly/Subprocess.h

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * Subprocess library, modeled after Python's subprocess module
 * (http://docs.python.org/2/library/subprocess.html)
 *
 * This library defines one class (Subprocess) which represents a child
 * process.  Subprocess has two constructors: one that takes a vector<string>
 * and executes the given executable without using the shell, and one
 * that takes a string and executes the given command using the shell.
 * Subprocess allows you to redirect the child's standard input, standard
 * output, and standard error to/from child descriptors in the parent,
 * or to create communication pipes between the child and the parent.
 *
 * The simplest example is a thread-safe [1] version of the system() library
 * function:
 *    Subprocess(cmd).wait();
 * which executes the command using the default shell and waits for it
 * to complete, returning the exit status.
 *
 * A thread-safe [1] version of popen() (type="r", to read from the child):
 *    Subprocess proc(cmd, Subprocess::Options().pipeStdout());
 *    // read from proc.stdoutFd()
 *    proc.wait();
 *
 * A thread-safe [1] version of popen() (type="w", to write to the child):
 *    Subprocess proc(cmd, Subprocess::Options().pipeStdin());
 *    // write to proc.stdinFd()
 *    proc.wait();
 *
 * If you want to redirect both stdin and stdout to pipes, you can, but note
 * that you're subject to a variety of deadlocks.  You'll want to use
 * nonblocking I/O, like the callback version of communicate().
 *
 * The string or IOBuf-based variants of communicate() are the simplest way
 * to communicate with a child via its standard input, standard output, and
 * standard error.  They buffer everything in memory, so they are not great
 * for large amounts of data (or long-running processes), but they are much
 * simpler than the callback version.
 *
 * == A note on thread-safety ==
 *
 * [1] "thread-safe" refers ONLY to the fact that Subprocess is very careful
 * to fork in a way that does not cause grief in multithreaded programs.
 *
 * Caveat: If your system does not have the atomic pipe2 system call, it is
 * not safe to concurrently call Subprocess from different threads.
 * Therefore, it is best to have a single thread be responsible for spawning
 * subprocesses.
 *
 * A particular instances of Subprocess is emphatically **not** thread-safe.
 * If you need to simultaneously communicate via the pipes, and interact
 * with the Subprocess state, your best bet is to:
 *  - takeOwnershipOfPipes() to separate the pipe I/O from the subprocess.
 *  - Only interact with the Subprocess from one thread at a time.
 *
 * The current implementation of communicate() cannot be safely interrupted.
 * To do so correctly, one would need to use EventFD, or open a dedicated
 * pipe to be messaged from a different thread -- in particular, kill() will
 * not do, since a descendant may keep the pipes open indefinitely.
 *
 * So, once you call communicate(), you must wait for it to return, and not
 * touch the pipes from other threads.  closeParentFd() is emphatically
 * unsafe to call concurrently, and even sendSignal() is not a good idea.
 * You can perhaps give the Subprocess's PID to a different thread before
 * starting communicate(), and use that PID to send a signal without
 * accessing the Subprocess object.  In that case, you will need a mutex
 * that ensures you don't wait() before you sent said signal.  In a
 * nutshell, don't do this.
 *
 * In fact, signals are inherently concurrency-unsafe on Unix: if you signal
 * a PID, while another thread is in waitpid(), the signal may fire either
 * before or after the process is reaped.  This means that your signal can,
 * in pathological circumstances, be delivered to the wrong process (ouch!).
 * To avoid this, you should only use non-blocking waits (i.e. poll()), and
 * make sure to serialize your signals (i.e. kill()) with the waits --
 * either wait & signal from the same thread, or use a mutex.
 */

#pragma once

#ifdef _WIN32
#error Subprocess is not supported on Windows.
#endif

#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>

#include <chrono>
#include <exception>
#include <string>
#include <vector>

#include <boost/container/flat_map.hpp>
#include <boost/operators.hpp>

#include <folly/Exception.h>
#include <folly/File.h>
#include <folly/FileUtil.h>
#include <folly/Function.h>
#include <folly/MapUtil.h>
#include <folly/Optional.h>
#include <folly/Portability.h>
#include <folly/Range.h>
#include <folly/gen/String.h>
#include <folly/io/IOBufQueue.h>
#include <folly/portability/SysResource.h>

namespace folly {

/**
 * Class to wrap a process return code.
 */
class Subprocess;
class ProcessReturnCode {};

/**
 * Base exception thrown by the Subprocess methods.
 */
class FOLLY_EXPORT SubprocessError : public std::runtime_error {};

/**
 * Exception thrown by *Checked methods of Subprocess.
 */
class FOLLY_EXPORT CalledProcessError : public SubprocessError {};

/**
 * Exception thrown if the subprocess cannot be started.
 */
class FOLLY_EXPORT SubprocessSpawnError : public SubprocessError {};

/**
 * Subprocess.
 */
class Subprocess {};

} // namespace folly