QEMU源码全解析 —— 块设备虚拟化(13)

接前一篇文章: QEMU源码全解析 —— 块设备虚拟化(12)

本文内容参考:

《趣谈 Linux操作系统 》 —— 刘超, 极客时间

QEMU /KVM源码解析与应用》 —— 李强,机械工业出版社

特此致谢!

QEMU初始化阶段的块设备虚拟化

上一回或者说上几回详细讲解了QEMU中类Java反射机制的从模板生成类和类的实例化的完整流程。本回开始,继续往下讲解块设备虚拟化的机制和流程。

再来回顾一下TYPE_VIRTIO_BLK层的类声明,在hw/block/virtio-blk.c中,代码如下:

static const TypeInfo virtio_blk_info = {
    .name = TYPE_VIRTIO_BLK,
    .parent = TYPE_VIRTIO_DEVICE,
    .instance_size = sizeof(VirtIOBlock),
    .instance_init = virtio_blk_instance_init,
    .class_init = virtio_blk_class_init,
};
 
static void virtio_register_types(void)
{
    type_register_static(&virtio_blk_info);
}
 
type_init(virtio_register_types)

virtio_blk_info中的class_init指向的函数为virtio_blk_class_init。该函数也在hw/block/virtio-blk.c中,代码如下:

static void virtio_blk_class_init(ObjectClass *klass, void *data)
{
    DeviceClass *dc = DEVICE_CLASS(klass);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);

    device_class_set_props(dc, virtio_blk_properties);
    dc->vmsd = &vmstate_virtio_blk;
    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
    vdc->realize = virtio_blk_device_realize;
    vdc->unrealize = virtio_blk_device_unrealize;
    vdc->get_config = virtio_blk_update_config;
    vdc->set_config = virtio_blk_set_config;
    vdc->get_features = virtio_blk_get_features;
    vdc->set_status = virtio_blk_set_status;
    vdc->reset = virtio_blk_reset;
    vdc->save = virtio_blk_save_device;
    vdc->load = virtio_blk_load_device;
    vdc->start_ioeventfd = virtio_blk_data_plane_start;
    vdc->stop_ioeventfd = virtio_blk_data_plane_stop;
}

在virtio_blk_class_init函数中,定义了VirtioDeviceClass的realize函数为virtio_blk_device_realize()。virtio_blk_device_realize函数在同文件(hw/block/virtio-blk.c)中,代码如下:

static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VirtIOBlock *s = VIRTIO_BLK(dev);
    VirtIOBlkConf *conf = &s->conf;
    Error *err = NULL;
    unsigned i;

    if (!conf->conf.blk) {
        error_setg(errp, "drive property not set");
        return;
    }
    if (!blk_is_inserted(conf->conf.blk)) {
        error_setg(errp, "Device needs media, but drive is empty");
        return;
    }
    if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) {
        conf->num_queues = 1;
    }
    if (!conf->num_queues) {
        error_setg(errp, "num-queues property must be larger than 0");
        return;
    }
    if (conf->queue_size <= 2) {
        error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
                   "must be > 2", conf->queue_size);
        return;
    }
    if (!is_power_of_2(conf->queue_size) ||
        conf->queue_size > VIRTQUEUE_MAX_SIZE) {
        error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
                   "must be a power of 2 (max %d)",
                   conf->queue_size, VIRTQUEUE_MAX_SIZE);
        return;
    }

    if (!blkconf_apply_backend_options(&conf->conf,
                                       !blk_supports_write_perm(conf->conf.blk),
                                       true, errp)) {
        return;
    }
    s->original_wce = blk_enable_write_cache(conf->conf.blk);
    if (!blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, errp)) {
        return;
    }

    if (!blkconf_blocksizes(&conf->conf, errp)) {
        return;
    }

    BlockDriverState *bs = blk_bs(conf->conf.blk);
    if (bs->bl.zoned != BLK_Z_NONE) {
        virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED);
        if (bs->bl.zoned == BLK_Z_HM) {
            virtio_clear_feature(&s->host_features, VIRTIO_BLK_F_DISCARD);
        }
    }

    if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) &&
        (!conf->max_discard_sectors ||
         conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) {
        error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")"
                   ", must be between 1 and %d",
                   conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS);
        return;
    }

    if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) &&
        (!conf->max_write_zeroes_sectors ||
         conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) {
        error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32
                   "), must be between 1 and %d",
                   conf->max_write_zeroes_sectors,
                   (int)BDRV_REQUEST_MAX_SECTORS);
        return;
    }

    s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params,
                                            s->host_features);
    virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);

    s->blk = conf->conf.blk;
    s->rq = NULL;
    s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;

    for (i = 0; i < conf->num_queues; i++) {
        virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
    }
    qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2);
    virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
    if (err != NULL) {
        error_propagate(errp, err);
        for (i = 0; i < conf->num_queues; i++) {
            virtio_del_queue(vdev, i);
        }
        virtio_cleanup(vdev);
        return;
    }

    /*
     * This must be after virtio_init() so virtio_blk_dma_restart_cb() gets
     * called after ->start_ioeventfd() has already set blk's AioContext.
     */
    s->change =
        qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s);

    blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
    blk_set_dev_ops(s->blk, &virtio_block_ops, s);

    blk_iostatus_enable(s->blk);

    add_boot_device_lchs(dev, "/disk@0,0",
                         conf->conf.lcyls,
                         conf->conf.lheads,
                         conf->conf.lsecs);
}

在virtio_blk_device_realize函数中,先是通过virtio_init函数初始化VirtIODevice结构。代码片段如下:

    virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);

virtio_init函数在hw/virtio/virtio.c中,代码如下:

void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
    int i;
    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;

    if (nvectors) {
        vdev->vector_queues =
            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
    }

    vdev->start_on_kick = false;
    vdev->started = false;
    vdev->vhost_started = false;
    vdev->device_id = device_id;
    vdev->status = 0;
    qatomic_set(&vdev->isr, 0);
    vdev->queue_sel = 0;
    vdev->config_vector = VIRTIO_NO_VECTOR;
    vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
    vdev->vm_running = runstate_is_running();
    vdev->broken = false;
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
        vdev->vq[i].vdev = vdev;
        vdev->vq[i].queue_index = i;
        vdev->vq[i].host_notifier_enabled = false;
    }

    vdev->name = virtio_id_to_name(device_id);
    vdev->config_len = config_size;
    if (vdev->config_len) {
        vdev->config = g_malloc0(config_size);
    } else {
        vdev->config = NULL;
    }
    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
            virtio_vmstate_change, vdev);
    vdev->device_endian = virtio_default_endian();
    vdev->use_guest_notifier_mask = true;
}

从virtio_init函数代码中可以看到,VirtioIODevice结构(VirtIODevice *vdev)中有一个VirtQueue数组,它正是virtio中的前端和后端后端互相传输数据的队列,最多有VIRTIO_QUEUE_MAX个。代码片段如下:

    vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
    vdev->vm_running = runstate_is_running();
    vdev->broken = false;
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
        vdev->vq[i].vdev = vdev;
        vdev->vq[i].queue_index = i;
        vdev->vq[i].host_notifier_enabled = false;
    }

这一点也能够从VirtioIODevice结构的定义中看出来。VirtioIODevice结构的定义在include/hw/virtio/virtio.h中,如下:

/**
 * struct VirtIODevice - common VirtIO structure
 * @name: name of the device
 * @status: VirtIO Device Status field
 *
 */
struct VirtIODevice
{
    DeviceState parent_obj;
    const char *name;
    uint8_t status;
    uint8_t isr;
    uint16_t queue_sel;
    /**
     * These fields represent a set of VirtIO features at various
     * levels of the stack. @host_features indicates the complete
     * feature set the VirtIO device can offer to the driver.
     * @guest_features indicates which features the VirtIO driver has
     * selected by writing to the feature register. Finally
     * @backend_features represents everything supported by the
     * backend (e.g. vhost) and could potentially be a subset of the
     * total feature set offered by QEMU.
     */
    uint64_t host_features;
    uint64_t guest_features;
    uint64_t backend_features;

    size_t config_len;
    void *config;
    uint16_t config_vector;
    uint32_t generation;
    int nvectors;
    VirtQueue *vq;
    MemoryListener listener;
    uint16_t device_id;
    /* @vm_running: current VM running state via virtio_vmstate_change() */
    bool vm_running;
    bool broken; /* device in invalid state, needs reset */
    bool use_disabled_flag; /* allow use of 'disable' flag when needed */
    bool disabled; /* device in temporarily disabled state */
    /**
     * @use_started: true if the @started flag should be used to check the
     * current state of the VirtIO device. Otherwise status bits
     * should be checked for a current status of the device.
     * @use_started is only set via QMP and defaults to true for all
     * modern machines (since 4.1).
     */
    bool use_started;
    bool started;
    bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */
    bool disable_legacy_check;
    bool vhost_started;
    VMChangeStateEntry *vmstate;
    char *bus_name;
    uint8_t device_endian;
    /**
     * @user_guest_notifier_mask: gate usage of ->guest_notifier_mask() callback.
     * This is used to suppress the masking of guest updates for
     * vhost-user devices which are asynchronous by design.
     */
    bool use_guest_notifier_mask;
    AddressSpace *dma_as;
    QLIST_HEAD(, VirtQueue) *vector_queues;
    QTAILQ_ENTRY(VirtIODevice) next;
    /**
     * @config_notifier: the event notifier that handles config events
     */
    EventNotifier config_notifier;
    bool device_iotlb_enabled;
};

其中重点关注以下一行定义:

    VirtQueue *vq;

VIRTIO_QUEUE_MAX是一个宏,其定义在include/hw/virtio\virtio.h中,如下:

#define VIRTIO_QUEUE_MAX 1024

也就是说,virtio中的前端和后端后端互相传输数据的队列最多有1024个。

更多内容请看下回。