/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <atomic> #include <chrono> #include <memory> #include <glog/logging.h> #include <folly/ConstexprMath.h> #include <folly/Optional.h> #include <folly/Traits.h> #include <folly/concurrency/CacheLocality.h> #include <folly/lang/Align.h> #include <folly/synchronization/Hazptr.h> #include <folly/synchronization/SaturatingSemaphore.h> #include <folly/synchronization/WaitOptions.h> #include <folly/synchronization/detail/Spin.h> namespace folly { /// UnboundedQueue supports a variety of options for unbounded /// dynamically expanding an shrinking queues, including variations of: /// - Single vs. multiple producers /// - Single vs. multiple consumers /// - Blocking vs. spin-waiting /// - Non-waiting, timed, and waiting consumer operations. /// Producer operations never wait or fail (unless out-of-memory). /// /// Template parameters: /// - T: element type /// - SingleProducer: true if there can be only one producer at a /// time. /// - SingleConsumer: true if there can be only one consumer at a /// time. /// - MayBlock: true if consumers may block, false if they only /// spin. A performance tuning parameter. /// - LgSegmentSize (default 8): Log base 2 of number of elements per /// segment. A performance tuning parameter. See below. /// - LgAlign (default 7): Log base 2 of alignment directive; can be /// used to balance scalability (avoidance of false sharing) with /// memory efficiency. /// /// When to use UnboundedQueue: /// - If a small bound may lead to deadlock or performance degradation /// under bursty patterns. /// - If there is no risk of the queue growing too much. /// /// When not to use UnboundedQueue: /// - If there is risk of the queue growing too much and a large bound /// is acceptable, then use DynamicBoundedQueue. /// - If the queue must not allocate on enqueue or it must have a /// small bound, then use fixed-size MPMCQueue or (if non-blocking /// SPSC) ProducerConsumerQueue. /// /// Template Aliases: /// USPSCQueue<T, MayBlock, LgSegmentSize, LgAlign> /// UMPSCQueue<T, MayBlock, LgSegmentSize, LgAlign> /// USPMCQueue<T, MayBlock, LgSegmentSize, LgAlign> /// UMPMCQueue<T, MayBlock, LgSegmentSize, LgAlign> /// /// Functions: /// Producer operations never wait or fail (unless OOM) /// void enqueue(const T&); /// void enqueue(T&&); /// Adds an element to the end of the queue. /// /// Consumer operations: /// void dequeue(T&); /// T dequeue(); /// Extracts an element from the front of the queue. Waits /// until an element is available if needed. /// bool try_dequeue(T&); /// folly::Optional<T> try_dequeue(); /// Tries to extract an element from the front of the queue /// if available. /// bool try_dequeue_until(T&, time_point& deadline); /// folly::Optional<T> try_dequeue_until(time_point& deadline); /// Tries to extract an element from the front of the queue /// if available until the specified deadline. /// bool try_dequeue_for(T&, duration&); /// folly::Optional<T> try_dequeue_for(duration&); /// Tries to extract an element from the front of the queue if /// available until the expiration of the specified duration. /// const T* try_peek(); /// Returns pointer to the element at the front of the queue /// if available, or nullptr if the queue is empty. Only for /// SPSC and MPSC. /// /// Secondary functions: /// size_t size(); /// Returns an estimate of the size of the queue. /// bool empty(); /// Returns true only if the queue was empty during the call. /// Note: size() and empty() are guaranteed to be accurate only if /// the queue is not changed concurrently. /// /// Usage examples: /// @code /// /* UMPSC, doesn't block, 1024 int elements per segment */ /// UMPSCQueue<int, false, 10> q; /// q.enqueue(1); /// q.enqueue(2); /// q.enqueue(3); /// ASSERT_FALSE(q.empty()); /// ASSERT_EQ(q.size(), 3); /// int v; /// q.dequeue(v); /// ASSERT_EQ(v, 1); /// ASSERT_TRUE(try_dequeue(v)); /// ASSERT_EQ(v, 2); /// ASSERT_TRUE(try_dequeue_until(v, now() + seconds(1))); /// ASSERT_EQ(v, 3); /// ASSERT_TRUE(q.empty()); /// ASSERT_EQ(q.size(), 0); /// ASSERT_FALSE(try_dequeue(v)); /// ASSERT_FALSE(try_dequeue_for(v, microseconds(100))); /// @endcode /// /// Design: /// - The queue is composed of one or more segments. Each segment has /// a fixed size of 2^LgSegmentSize entries. Each segment is used /// exactly once. /// - Each entry is composed of a futex and a single element. /// - Each segment's array of entries is strided to avoid false sharing. /// I.e., to reduce any cacheline contention that might be induced by /// concurrent mutations to the queue that might happen to affect /// otherwise-adjacent locations that might happen to share cacheline. /// - The queue contains two 64-bit ticket variables. The producer /// ticket counts the number of producer tickets issued so far, and /// the same for the consumer ticket. Each ticket number corresponds /// to a specific entry in a specific segment. /// - The queue maintains two pointers, head and tail. Head points to /// the segment that corresponds to the current consumer /// ticket. Similarly, tail pointer points to the segment that /// corresponds to the producer ticket. /// - Segments are organized as a singly linked list. /// - The producer with the first ticket in the current producer /// segment has primary responsibility for allocating and linking /// the next segment. Other producers and connsumers may help do so /// when needed if that thread is delayed. /// - The producer with the last ticket in the current producer /// segment is primarily responsible for advancing the tail pointer /// to the next segment. Other producers and consumers may help do /// so when needed if that thread is delayed. /// - Similarly, the consumer with the last ticket in the current /// consumer segment is primarily responsible for advancing the head /// pointer to the next segment. Other consumers may help do so when /// needed if that thread is delayed. /// - The tail pointer must not lag behind the head pointer. /// Otherwise, the algorithm cannot be certain about the removal of /// segment and would have to incur higher costs to ensure safe /// reclamation. Consumers must ensure that head never overtakes /// tail. /// /// Memory Usage: /// - An empty queue contains one segment. A nonempty queue contains /// one or two more segment than fits its contents. /// - Removed segments are not reclaimed until there are no threads, /// producers or consumers, with references to them or their /// predecessors. That is, a lagging thread may delay the reclamation /// of a chain of removed segments. /// - The template parameter LgAlign can be used to reduce memory usage /// at the cost of increased chance of false sharing. /// /// Performance considerations: /// - All operations take constant time, excluding the costs of /// allocation, reclamation, interference from other threads, and /// waiting for actions by other threads. /// - In general, using the single producer and or single consumer /// variants yield better performance than the MP and MC /// alternatives. /// - SPSC without blocking is the fastest configuration. It doesn't /// include any read-modify-write atomic operations, full fences, or /// system calls in the critical path. /// - MP adds a fetch_add to the critical path of each producer operation. /// - MC adds a fetch_add or compare_exchange to the critical path of /// each consumer operation. /// - The possibility of consumers blocking, even if they never do, /// adds a compare_exchange to the critical path of each producer /// operation. /// - MPMC, SPMC, MPSC require the use of a deferred reclamation /// mechanism to guarantee that segments removed from the linked /// list, i.e., unreachable from the head pointer, are reclaimed /// only after they are no longer needed by any lagging producers or /// consumers. /// - The overheads of segment allocation and reclamation are intended /// to be mostly out of the critical path of the queue's throughput. /// - If the template parameter LgSegmentSize is changed, it should be /// set adequately high to keep the amortized cost of allocation and /// reclamation low. /// - It is recommended to measure performance with different variants /// when applicable, e.g., UMPMC vs UMPSC. Depending on the use /// case, sometimes the variant with the higher sequential overhead /// may yield better results due to, for example, more favorable /// producer-consumer balance or favorable timing for avoiding /// costly blocking. /// /// Guarantees: /// - The queues are linearizable: /// - For two enqueue operations q(A) and q(B), if q(A) < q(B) in /// the happens-before relation, then A precedes B in the queue. /// - For two dequeue operations d(A) and d(B), if d(A) < d(B) in /// the happens-before relation, then A preceded B in the queue. template < typename T, bool SingleProducer, bool SingleConsumer, bool MayBlock, size_t LgSegmentSize = 8, size_t LgAlign = constexpr_log2(hardware_destructive_interference_size), template <typename> class Atom = std::atomic> class UnboundedQueue { … }; // UnboundedQueue /* Aliases */ USPSCQueue; UMPSCQueue; USPMCQueue; UMPMCQueue; } // namespace folly