QEMU源码全解析 —— 块设备虚拟化(14)

接前一篇文章: QEMU源码全解析 —— 块设备虚拟化(13)

本文内容参考:

《趣谈 Linux操作系统 》 —— 刘超, 极客时间

QEMU /KVM源码解析与应用》 —— 李强,机械工业出版社

特此致谢!

QEMU初始化阶段的块设备虚拟化

上一回开始解析VirtioDeviceClass的realize函数virtio_blk_device_realize(),再来回顾一下。virtio_blk_device_realize函数在同文件(hw/block/virtio-blk.c)中,代码如下:

static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VirtIOBlock *s = VIRTIO_BLK(dev);
    VirtIOBlkConf *conf = &s->conf;
    Error *err = NULL;
    unsigned i;
 
    if (!conf->conf.blk) {
        error_setg(errp, "drive property not set");
        return;
    }
    if (!blk_is_inserted(conf->conf.blk)) {
        error_setg(errp, "Device needs media, but drive is empty");
        return;
    }
    if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) {
        conf->num_queues = 1;
    }
    if (!conf->num_queues) {
        error_setg(errp, "num-queues property must be larger than 0");
        return;
    }
    if (conf->queue_size <= 2) {
        error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
                   "must be > 2", conf->queue_size);
        return;
    }
    if (!is_power_of_2(conf->queue_size) ||
        conf->queue_size > VIRTQUEUE_MAX_SIZE) {
        error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
                   "must be a power of 2 (max %d)",
                   conf->queue_size, VIRTQUEUE_MAX_SIZE);
        return;
    }
 
    if (!blkconf_apply_backend_options(&conf->conf,
                                       !blk_supports_write_perm(conf->conf.blk),
                                       true, errp)) {
        return;
    }
    s->original_wce = blk_enable_write_cache(conf->conf.blk);
    if (!blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, errp)) {
        return;
    }
 
    if (!blkconf_blocksizes(&conf->conf, errp)) {
        return;
    }
 
    BlockDriverState *bs = blk_bs(conf->conf.blk);
    if (bs->bl.zoned != BLK_Z_NONE) {
        virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED);
        if (bs->bl.zoned == BLK_Z_HM) {
            virtio_clear_feature(&s->host_features, VIRTIO_BLK_F_DISCARD);
        }
    }
 
    if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) &&
        (!conf->max_discard_sectors ||
         conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) {
        error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")"
                   ", must be between 1 and %d",
                   conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS);
        return;
    }
 
    if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) &&
        (!conf->max_write_zeroes_sectors ||
         conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) {
        error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32
                   "), must be between 1 and %d",
                   conf->max_write_zeroes_sectors,
                   (int)BDRV_REQUEST_MAX_SECTORS);
        return;
    }
 
    s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params,
                                            s->host_features);
    virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);
 
    s->blk = conf->conf.blk;
    s->rq = NULL;
    s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
 
    for (i = 0; i < conf->num_queues; i++) {
        virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
    }
    qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2);
    virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
    if (err != NULL) {
        error_propagate(errp, err);
        for (i = 0; i < conf->num_queues; i++) {
            virtio_del_queue(vdev, i);
        }
        virtio_cleanup(vdev);
        return;
    }
 
    /*
     * This must be after virtio_init() so virtio_blk_dma_restart_cb() gets
     * called after ->start_ioeventfd() has already set blk's AioContext.
     */
    s->change =
        qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s);
 
    blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
    blk_set_dev_ops(s->blk, &virtio_block_ops, s);
 
    blk_iostatus_enable(s->blk);
 
    add_boot_device_lchs(dev, "/disk@0,0",
                         conf->conf.lcyls,
                         conf->conf.lheads,
                         conf->conf.lsecs);
}

在virtio_blk_device_realize函数中,先是通过virtio_init函数初始化VirtIODevice结构。代码片段如下:

    virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);

接下来,根据之前所配置的队列数目conf->num_queues,对于每个队列都调用virtio_add_queue函数来初始化队列。代码片段如下:

    for (i = 0; i < conf->num_queues; i++) {
        virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
    }

先来看一下这个conf->num_queues是在何时、哪里赋值的。

在virtio_blk_device_realize函数中,只有一处相关代码,如下:

conf是从s->conf而来的,而s是通过DeviceState *dev得到的:

    VirtIOBlock *s = VIRTIO_BLK(dev);

VIRTIO_BLK()定义相关的代码在include/hw/virtio/virtio-blk.h中,如下:

#define TYPE_VIRTIO_BLK "virtio-blk-device"
OBJECT_DECLARE_SIMPLE_TYPE(VirtIOBlock, VIRTIO_BLK)

VIRTIO_BLK()的意思和前文书所讲的DEVICE_CLASS()、VIRTIO_DEVICE_CLASS()基本一样,笔者就不在此展开了。直接来看VirtIOBlock结构的定义。

struct VirtIOBlkConf的定义也在include/hw/virtio/virtio-blk.h中,如下:

struct VirtIOBlkConf
{
    BlockConf conf;
    IOThread *iothread;
    char *serial;
    uint32_t request_merging;
    uint16_t num_queues;
    uint16_t queue_size;
    bool seg_max_adjust;
    bool report_discard_granularity;
    uint32_t max_discard_sectors;
    uint32_t max_write_zeroes_sectors;
    bool x_enable_wce_if_config_wce;
};

以笔者目前对于QEMU代码的认知,s->conf.num_queue的值应该是在hw/virtio/virtio-blk-pci.c的virtio_blk_pci_realize函数中赋的,代码如下:

static void virtio_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
{
    VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(vpci_dev);
    DeviceState *vdev = DEVICE(&dev->vdev);
    VirtIOBlkConf *conf = &dev->vdev.conf;

    if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) {
        conf->num_queues = virtio_pci_optimal_num_queues(0);
    }

    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
        vpci_dev->nvectors = conf->num_queues + 1;
    }

    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
}

后边随着笔者对于代码认知的深入,会更进一步讲解这一部分。

回到上边的主线:

对于virtio_add_queue函数的解析,放在下一回中。