blkback.c | Explore in Territory

/******************************************************************************
 *
 * Back-end of the driver for virtual block devices. This portion of the
 * driver exports a 'unified' block-device interface that can be accessed
 * by any operating system that implements a compatible front end. A
 * reference front-end implementation can be found in:
 *  drivers/block/xen-blkfront.c
 *
 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
 * Copyright (c) 2005, Christopher Clark
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version 2
 * as published by the Free Software Foundation; or, when distributed
 * separately from the Linux kernel or incorporated into other
 * software packages, subject to the following license:
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this source file (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy, modify,
 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#define pr_fmt(fmt) …

#include <linux/spinlock.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/bitmap.h>

#include <xen/events.h>
#include <xen/page.h>
#include <xen/xen.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <xen/balloon.h>
#include <xen/grant_table.h>
#include "common.h"

/*
 * Maximum number of unused free pages to keep in the internal buffer.
 * Setting this to a value too low will reduce memory used in each backend,
 * but can have a performance penalty.
 *
 * A sane value is xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST, but can
 * be set to a lower value that might degrade performance on some intensive
 * IO workloads.
 */

static int max_buffer_pages = …;
module_param_named(max_buffer_pages, max_buffer_pages, int, 0644);
MODULE_PARM_DESC(…) …;

/*
 * Maximum number of grants to map persistently in blkback. For maximum
 * performance this should be the total numbers of grants that can be used
 * to fill the ring, but since this might become too high, specially with
 * the use of indirect descriptors, we set it to a value that provides good
 * performance without using too much memory.
 *
 * When the list of persistent grants is full we clean it up using a LRU
 * algorithm.
 */

static int max_pgrants = …;
module_param_named(max_persistent_grants, max_pgrants, int, 0644);
MODULE_PARM_DESC(…) …;

/*
 * How long a persistent grant is allowed to remain allocated without being in
 * use. The time is in seconds, 0 means indefinitely long.
 */

static unsigned int pgrant_timeout = …;
module_param_named(persistent_grant_unused_seconds, pgrant_timeout,
		   uint, 0644);
MODULE_PARM_DESC(…) …;

/*
 * Maximum number of rings/queues blkback supports, allow as many queues as there
 * are CPUs if user has not specified a value.
 */
unsigned int xenblk_max_queues;
module_param_named(max_queues, xenblk_max_queues, uint, 0644);
MODULE_PARM_DESC(…) …;

/*
 * Maximum order of pages to be used for the shared ring between front and
 * backend, 4KB page granularity is used.
 */
unsigned int xen_blkif_max_ring_order = …;
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(…) …;
/*
 * The LRU mechanism to clean the lists of persistent grants needs to
 * be executed periodically. The time interval between consecutive executions
 * of the purge mechanism is set in ms.
 */
#define LRU_INTERVAL …

/*
 * When the persistent grants list is full we will remove unused grants
 * from the list. The percent number of grants to be removed at each LRU
 * execution.
 */
#define LRU_PERCENT_CLEAN …

/* Run-time switchable: /sys/module/blkback/parameters/ */
static unsigned int log_stats;
module_param(log_stats, int, 0644);

#define BLKBACK_INVALID_HANDLE …

static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt)
{ … }

#define vaddr(page) …

static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags);
static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
				struct blkif_request *req,
				struct pending_req *pending_req);
static void make_response(struct xen_blkif_ring *ring, u64 id,
			  unsigned short op, int st);

#define foreach_grant_safe(pos, n, rbtree, node) …


/*
 * We don't need locking around the persistent grant helpers
 * because blkback uses a single-thread for each backend, so we
 * can be sure that this functions will never be called recursively.
 *
 * The only exception to that is put_persistent_grant, that can be called
 * from interrupt context (by xen_blkbk_unmap), so we have to use atomic
 * bit operations to modify the flags of a persistent grant and to count
 * the number of used grants.
 */
static int add_persistent_gnt(struct xen_blkif_ring *ring,
			       struct persistent_gnt *persistent_gnt)
{ … }

static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
						 grant_ref_t gref)
{ … }

static void put_persistent_gnt(struct xen_blkif_ring *ring,
                               struct persistent_gnt *persistent_gnt)
{ … }

static void free_persistent_gnts(struct xen_blkif_ring *ring)
{ … }

void xen_blkbk_unmap_purged_grants(struct work_struct *work)
{ … }

static void purge_persistent_gnt(struct xen_blkif_ring *ring)
{ … }

/*
 * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
 */
static struct pending_req *alloc_req(struct xen_blkif_ring *ring)
{ … }

/*
 * Return the 'pending_req' structure back to the freepool. We also
 * wake up the thread if it was waiting for a free page.
 */
static void free_req(struct xen_blkif_ring *ring, struct pending_req *req)
{ … }

/*
 * Routines for managing virtual block devices (vbds).
 */
static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
			     enum req_op operation)
{ … }

static void xen_vbd_resize(struct xen_blkif *blkif)
{ … }

/*
 * Notification from the guest OS.
 */
static void blkif_notify_work(struct xen_blkif_ring *ring)
{ … }

irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
{ … }

/*
 * SCHEDULER FUNCTIONS
 */

static void print_stats(struct xen_blkif_ring *ring)
{ … }

int xen_blkif_schedule(void *arg)
{ … }

/*
 * Remove persistent grants and empty the pool of free pages
 */
void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
{ … }

static unsigned int xen_blkbk_unmap_prepare(
	struct xen_blkif_ring *ring,
	struct grant_page **pages,
	unsigned int num,
	struct gnttab_unmap_grant_ref *unmap_ops,
	struct page **unmap_pages)
{ … }

static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
{ … }

static void xen_blkbk_unmap_and_respond(struct pending_req *req)
{ … }


/*
 * Unmap the grant references.
 *
 * This could accumulate ops up to the batch size to reduce the number
 * of hypercalls, but since this is only used in error paths there's
 * no real need.
 */
static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
                            struct grant_page *pages[],
                            int num)
{ … }

static int xen_blkbk_map(struct xen_blkif_ring *ring,
			 struct grant_page *pages[],
			 int num, bool ro)
{ … }

static int xen_blkbk_map_seg(struct pending_req *pending_req)
{ … }

static int xen_blkbk_parse_indirect(struct blkif_request *req,
				    struct pending_req *pending_req,
				    struct seg_buf seg[],
				    struct phys_req *preq)
{ … }

static int dispatch_discard_io(struct xen_blkif_ring *ring,
				struct blkif_request *req)
{ … }

static int dispatch_other_io(struct xen_blkif_ring *ring,
			     struct blkif_request *req,
			     struct pending_req *pending_req)
{ … }

static void xen_blk_drain_io(struct xen_blkif_ring *ring)
{ … }

static void __end_block_io_op(struct pending_req *pending_req,
		blk_status_t error)
{ … }

/*
 * bio callback.
 */
static void end_block_io_op(struct bio *bio)
{ … }

static void blkif_get_x86_32_req(struct blkif_request *dst,
				 const struct blkif_x86_32_request *src)
{ … }

static void blkif_get_x86_64_req(struct blkif_request *dst,
				 const struct blkif_x86_64_request *src)
{ … }

/*
 * Function to copy the from the ring buffer the 'struct blkif_request'
 * (which has the sectors we want, number of them, grant references, etc),
 * and transmute  it to the block API to hand it over to the proper block disk.
 */
static int
__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{ … }

static int
do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{ … }
/*
 * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
 * and call the 'submit_bio' to pass it to the underlying storage.
 */
static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
				struct blkif_request *req,
				struct pending_req *pending_req)
{ … }



/*
 * Put a response on the ring on how the operation fared.
 */
static void make_response(struct xen_blkif_ring *ring, u64 id,
			  unsigned short op, int st)
{ … }

static int __init xen_blkif_init(void)
{ … }

module_init(…) …;

static void __exit xen_blkif_fini(void)
{ … }

module_exit(xen_blkif_fini);

MODULE_DESCRIPTION(…) …;
MODULE_LICENSE(…) …;
MODULE_ALIAS(…) …;
linux/drivers/block/xen-blkback/blkback.c