dm-io

dm-io provides synchronous and asynchronous I/O services. There are 4 types of I/O services available, and each type has a sync and an async version.

The user must set up an dm_io_region structure to describe the desired location of the I/O. Each dm_io_region indicates a block-device along with the starting sector and size of the region.

    struct dm_io_region {
        struct block_device *bdev;
        sector_t sector;
        sector_t count;
    };

dm-io can read from one dm_io_region or write to one or more dm_io_regions 读限制为 1 个,而写可以为多个,见 sync_io/async_io 实现. Writes to multiple regions are specified by an array of dm_io_region structures.

dm_io 提供了 4 种 I/O service types dm_io_mem_type

  • The first I/O service type takes a list of memory pages as the data buffer for the I/O, along with an offset into the first page.

  • The second I/O service type takes an array of bio vectors as the data buffer for the I/O. This service can be handy if the caller has a pre-assembled bio, but wants to direct different portions of the bio to different devices.

  • The third I/O service type takes a pointer to a vmalloc'd memory buffer as the data buffer for the I/O. This service can be handy if the caller needs to do I/O to a large region but doesn't want to allocate a large number of individual memory pages.

  • The fourth I/O service type takes a pointer (virt addr) to a kernel memory buffer as the data buffer for the I/O.

Callers of the asynchronous I/O services must include the name of a completion callback routine and a pointer to some context data for the I/O.

    typedef void (*io_notify_fn)(unsigned long error, void *context);

The "error" parameter in this callback, as well as the "*error" parameter in all of the synchronous versions, is a bitset (instead of a simple error value). In the case of an write-I/O to multiple regions, this bitset allows dm-io to indicate success or failure on each individual region.

(a)sync io interface

    collapsed (a)synchronous interface.

    If the IO is asynchronous (i.e. it has notify.fn), you must either
    unplug the queue with blk_unplug() some time later or set REQ_SYNC 
    in io_req->bi_opf. 
    If you fail to do one of these, the IO will be submitted to the disk
    after q->unplug_delay, which defaults to 3ms in blk-settings.c.

    /*
     * IO interface using private per-client pools.
     * Each bit in the optional 'sync_error_bits' bitset indicates whether an
     * error occurred doing io to the corresponding region.
     */
    int dm_io(struct dm_io_request *io_req, unsigned num_regions,
              struct dm_io_region *region, unsigned long *sync_error_bits);

        if (!io_req->notify.fn)
            sync_io(io_req->client, num_regions, where,
                    io_req->bi_op, io_req->bi_op_flags, &dp,
                    sync_error_bits);

        if (io_req->notify.fn)
            async_io(io_req->client, num_regions, where, io_req->bi_op,
                     io_req->bi_op_flags, &dp, io_req->notify.fn,
                     io_req->notify.context);

struct dm_io_request

    struct dm_io_request {
        int bi_op;                      /* REQ_OP */
        int bi_op_flags;                /* req_flag_bits */
        struct dm_io_memory mem;        /* Memory to use for io */
        struct dm_io_notify notify;     /* Synchronous if notify.fn is NULL */
        struct dm_io_client *client;    /* Client memory handler */
    };

    typedef void (*io_notify_fn)(unsigned long error, void *context);

    struct dm_io_notify {
        io_notify_fn    fn;         /* Callback for asynchronous requests */
        void            *context;   /* Passed to callback */
    };

    enum dm_io_mem_type {
        DM_IO_PAGE_LIST,    /* Page list */
        DM_IO_BIO,          /* Bio vector */
        DM_IO_VMA,          /* Virtual memory area */
        DM_IO_KMEM,         /* Kernel memory */
    };

    struct dm_io_memory {
        enum dm_io_mem_type type;

        unsigned offset;

        union {
            struct page_list *pl;
            struct bio *bio;
            void *vma;
            void *addr;
        } ptr;
    };

struct dm_io_region

    struct dm_io_region {
        struct block_device *bdev;
        sector_t            sector;
        sector_t            count;      /* If this is zero the region is ignored. */
    };

struct dm_io_client

    struct dm_io_client {
        mempool_t           pool;
        struct bio_set      bios;
    };
    /*
     * For async io calls, users can alternatively use the dm_io() function
     * and dm_io_client_create() to create private mempools for the client.
     *
     * Create/destroy may block.
     */
    struct dm_io_client *dm_io_client_create(void);
    void dm_io_client_destroy(struct dm_io_client *client);

struct io — bio->bi_private = (void *)((unsigned long)io | region) — 1:1:1

    #define DM_IO_MAX_REGIONS   BITS_PER_LONG

    typedef void (*io_notify_fn)(unsigned long error, void *context);

    /*
     * Aligning 'struct io' reduces the number of bits required to store
     * its address. 
     * Refer to store_io_and_region_in_bio() and retrieve_io_and_region_from_bio().
     */
    struct io {
        unsigned long       error_bits;
        atomic_t            count;          keep track of the number of bios that 
                                            have been dispatched for a particular io.
        struct dm_io_client *client;
        io_notify_fn        callback;
        void                *context;
        void                *vma_invalidate_address;
        unsigned long       vma_invalidate_size;
    } __attribute__((aligned(DM_IO_MAX_REGIONS)));
                    /*
                     * We need to keep track of which region a bio is doing io for.
                     * To avoid a memory allocation to store just 5 or 6 bits, we
                     * ensure the 'struct io' pointer is aligned so enough low bits are
                     * always zero and then combine it with the region number directly in
                     * bi_private.
                     */

    static void store_io_and_region_in_bio(struct bio *bio, struct io *io, unsigned region)
    {
        if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
            DMCRIT("Unaligned struct io pointer %p", io);
            BUG();
        }

        bio->bi_private = (void *)((unsigned long)io | region);
    }

    static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, unsigned *region)
    {
        unsigned long val = (unsigned long)bio->bi_private;

        *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
        *region = val & (DM_IO_MAX_REGIONS - 1);
    }