// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2021-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <[email protected]> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "scrub/scrub.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/trace.h" /* * Large Arrays of Fixed-Size Records * ================================== * * This memory array uses an xfile (which itself is a shmem file) to store * large numbers of fixed-size records in memory that can be paged out. This * puts less stress on the memory reclaim algorithms during an online repair * because we don't have to pin so much memory. However, array access is less * direct than would be in a regular memory array. Access to the array is * performed via indexed load and store methods, and an append method is * provided for convenience. Array elements can be unset, which sets them to * all zeroes. Unset entries are skipped during iteration, though direct loads * will return a zeroed buffer. Callers are responsible for concurrency * control. */ /* * Pointer to scratch space. Because we can't access the xfile data directly, * we allocate a small amount of memory on the end of the xfarray structure to * buffer array items when we need space to store values temporarily. */ static inline void *xfarray_scratch(struct xfarray *array) { … } /* Compute array index given an xfile offset. */ static xfarray_idx_t xfarray_idx( struct xfarray *array, loff_t pos) { … } /* Compute xfile offset of array element. */ static inline loff_t xfarray_pos(struct xfarray *array, xfarray_idx_t idx) { … } /* * Initialize a big memory array. Array records cannot be larger than a * page, and the array cannot span more bytes than the page cache supports. * If @required_capacity is nonzero, the maximum array size will be set to this * quantity and the array creation will fail if the underlying storage cannot * support that many records. */ int xfarray_create( const char *description, unsigned long long required_capacity, size_t obj_size, struct xfarray **arrayp) { … } /* Destroy the array. */ void xfarray_destroy( struct xfarray *array) { … } /* Load an element from the array. */ int xfarray_load( struct xfarray *array, xfarray_idx_t idx, void *ptr) { … } /* Is this array element potentially unset? */ static inline bool xfarray_is_unset( struct xfarray *array, loff_t pos) { … } /* * Unset an array element. If @idx is the last element in the array, the * array will be truncated. Otherwise, the entry will be zeroed. */ int xfarray_unset( struct xfarray *array, xfarray_idx_t idx) { … } /* * Store an element in the array. The element must not be completely zeroed, * because those are considered unset sparse elements. */ int xfarray_store( struct xfarray *array, xfarray_idx_t idx, const void *ptr) { … } /* Is this array element NULL? */ bool xfarray_element_is_null( struct xfarray *array, const void *ptr) { … } /* * Store an element anywhere in the array that is unset. If there are no * unset slots, append the element to the array. */ int xfarray_store_anywhere( struct xfarray *array, const void *ptr) { … } /* Return length of array. */ uint64_t xfarray_length( struct xfarray *array) { … } /* * Decide which array item we're going to read as part of an _iter_get. * @cur is the array index, and @pos is the file offset of that array index in * the backing xfile. Returns ENODATA if we reach the end of the records. * * Reading from a hole in a sparse xfile causes page instantiation, so for * iterating a (possibly sparse) array we need to figure out if the cursor is * pointing at a totally uninitialized hole and move the cursor up if * necessary. */ static inline int xfarray_find_data( struct xfarray *array, xfarray_idx_t *cur, loff_t *pos) { … } /* * Starting at *idx, fetch the next non-null array entry and advance the index * to set up the next _load_next call. Returns ENODATA if we reach the end of * the array. Callers must set @*idx to XFARRAY_CURSOR_INIT before the first * call to this function. */ int xfarray_load_next( struct xfarray *array, xfarray_idx_t *idx, void *rec) { … } /* Sorting functions */ #ifdef DEBUG #define xfarray_sort_bump_loads(si) … #define xfarray_sort_bump_stores(si) … #define xfarray_sort_bump_compares(si) … #define xfarray_sort_bump_heapsorts(si) … #else #define xfarray_sort_bump_loads … #define xfarray_sort_bump_stores … #define xfarray_sort_bump_compares … #define xfarray_sort_bump_heapsorts … #endif /* DEBUG */ /* Load an array element for sorting. */ static inline int xfarray_sort_load( struct xfarray_sortinfo *si, xfarray_idx_t idx, void *ptr) { … } /* Store an array element for sorting. */ static inline int xfarray_sort_store( struct xfarray_sortinfo *si, xfarray_idx_t idx, void *ptr) { … } /* Compare an array element for sorting. */ static inline int xfarray_sort_cmp( struct xfarray_sortinfo *si, const void *a, const void *b) { … } /* Return a pointer to the low index stack for quicksort partitioning. */ static inline xfarray_idx_t *xfarray_sortinfo_lo(struct xfarray_sortinfo *si) { … } /* Return a pointer to the high index stack for quicksort partitioning. */ static inline xfarray_idx_t *xfarray_sortinfo_hi(struct xfarray_sortinfo *si) { … } /* Size of each element in the quicksort pivot array. */ static inline size_t xfarray_pivot_rec_sz( struct xfarray *array) { … } /* Allocate memory to handle the sort. */ static inline int xfarray_sortinfo_alloc( struct xfarray *array, xfarray_cmp_fn cmp_fn, unsigned int flags, struct xfarray_sortinfo **infop) { … } /* Should this sort be terminated by a fatal signal? */ static inline bool xfarray_sort_terminated( struct xfarray_sortinfo *si, int *error) { … } /* Do we want an in-memory sort? */ static inline bool xfarray_want_isort( struct xfarray_sortinfo *si, xfarray_idx_t start, xfarray_idx_t end) { … } /* Return the scratch space within the sortinfo structure. */ static inline void *xfarray_sortinfo_isort_scratch(struct xfarray_sortinfo *si) { … } /* * Sort a small number of array records using scratchpad memory. The records * need not be contiguous in the xfile's memory pages. */ STATIC int xfarray_isort( struct xfarray_sortinfo *si, xfarray_idx_t lo, xfarray_idx_t hi) { … } /* * Sort the records from lo to hi (inclusive) if they are all backed by the * same memory folio. Returns 1 if it sorted, 0 if it did not, or a negative * errno. */ STATIC int xfarray_foliosort( struct xfarray_sortinfo *si, xfarray_idx_t lo, xfarray_idx_t hi) { … } /* Return a pointer to the xfarray pivot record within the sortinfo struct. */ static inline void *xfarray_sortinfo_pivot(struct xfarray_sortinfo *si) { … } /* Return a pointer to the start of the pivot array. */ static inline void * xfarray_sortinfo_pivot_array( struct xfarray_sortinfo *si) { … } /* The xfarray record is stored at the start of each pivot array element. */ static inline void * xfarray_pivot_array_rec( void *pa, size_t pa_recsz, unsigned int pa_idx) { … } /* The xfarray index is stored at the end of each pivot array element. */ static inline xfarray_idx_t * xfarray_pivot_array_idx( void *pa, size_t pa_recsz, unsigned int pa_idx) { … } /* * Find a pivot value for quicksort partitioning, swap it with a[lo], and save * the cached pivot record for the next step. * * Load evenly-spaced records within the given range into memory, sort them, * and choose the pivot from the median record. Using multiple points will * improve the quality of the pivot selection, and hopefully avoid the worst * quicksort behavior, since our array values are nearly always evenly sorted. */ STATIC int xfarray_qsort_pivot( struct xfarray_sortinfo *si, xfarray_idx_t lo, xfarray_idx_t hi) { … } /* * Set up the pointers for the next iteration. We push onto the stack all of * the unsorted values between a[lo + 1] and a[end[i]], and we tweak the * current stack frame to point to the unsorted values between a[beg[i]] and * a[lo] so that those values will be sorted when we pop the stack. */ static inline int xfarray_qsort_push( struct xfarray_sortinfo *si, xfarray_idx_t *si_lo, xfarray_idx_t *si_hi, xfarray_idx_t lo, xfarray_idx_t hi) { … } static inline void xfarray_sort_scan_done( struct xfarray_sortinfo *si) { … } /* * Cache the folio backing the start of the given array element. If the array * element is contained entirely within the folio, return a pointer to the * cached folio. Otherwise, load the element into the scratchpad and return a * pointer to the scratchpad. */ static inline int xfarray_sort_scan( struct xfarray_sortinfo *si, xfarray_idx_t idx, void **ptrp) { … } /* * Sort the array elements via quicksort. This implementation incorporates * four optimizations discussed in Sedgewick: * * 1. Use an explicit stack of array indices to store the next array partition * to sort. This helps us to avoid recursion in the call stack, which is * particularly expensive in the kernel. * * 2. For arrays with records in arbitrary or user-controlled order, choose the * pivot element using a median-of-nine decision tree. This reduces the * probability of selecting a bad pivot value which causes worst case * behavior (i.e. partition sizes of 1). * * 3. The smaller of the two sub-partitions is pushed onto the stack to start * the next level of recursion, and the larger sub-partition replaces the * current stack frame. This guarantees that we won't need more than * log2(nr) stack space. * * 4. For small sets, load the records into the scratchpad and run heapsort on * them because that is very fast. In the author's experience, this yields * a ~10% reduction in runtime. * * If a small set is contained entirely within a single xfile memory page, * map the page directly and run heap sort directly on the xfile page * instead of using the load/store interface. This halves the runtime. * * 5. This optimization is specific to the implementation. When converging lo * and hi after selecting a pivot, we will try to retain the xfile memory * page between load calls, which reduces run time by 50%. */ /* * Due to the use of signed indices, we can only support up to 2^63 records. * Files can only grow to 2^63 bytes, so this is not much of a limitation. */ #define QSORT_MAX_RECS … int xfarray_sort( struct xfarray *array, xfarray_cmp_fn cmp_fn, unsigned int flags) { … } /* How many bytes is this array consuming? */ unsigned long long xfarray_bytes( struct xfarray *array) { … } /* Empty the entire array. */ void xfarray_truncate( struct xfarray *array) { … }