接前一篇文章: QEMU源码全解析 —— 块设备虚拟化(12)
本文内容参考:
《 QEMU /KVM源码解析与应用》 —— 李强,机械工业出版社
特此致谢!
QEMU初始化阶段的块设备虚拟化
上一回或者说上几回详细讲解了QEMU中类Java反射机制的从模板生成类和类的实例化的完整流程。本回开始,继续往下讲解块设备虚拟化的机制和流程。
再来回顾一下TYPE_VIRTIO_BLK层的类声明,在hw/block/virtio-blk.c中,代码如下:
static const TypeInfo virtio_blk_info = {
.name = TYPE_VIRTIO_BLK,
.parent = TYPE_VIRTIO_DEVICE,
.instance_size = sizeof(VirtIOBlock),
.instance_init = virtio_blk_instance_init,
.class_init = virtio_blk_class_init,
};
static void virtio_register_types(void)
{
type_register_static(&virtio_blk_info);
}
type_init(virtio_register_types)
virtio_blk_info中的class_init指向的函数为virtio_blk_class_init。该函数也在hw/block/virtio-blk.c中,代码如下:
static void virtio_blk_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
device_class_set_props(dc, virtio_blk_properties);
dc->vmsd = &vmstate_virtio_blk;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
vdc->realize = virtio_blk_device_realize;
vdc->unrealize = virtio_blk_device_unrealize;
vdc->get_config = virtio_blk_update_config;
vdc->set_config = virtio_blk_set_config;
vdc->get_features = virtio_blk_get_features;
vdc->set_status = virtio_blk_set_status;
vdc->reset = virtio_blk_reset;
vdc->save = virtio_blk_save_device;
vdc->load = virtio_blk_load_device;
vdc->start_ioeventfd = virtio_blk_data_plane_start;
vdc->stop_ioeventfd = virtio_blk_data_plane_stop;
}
在virtio_blk_class_init函数中,定义了VirtioDeviceClass的realize函数为virtio_blk_device_realize()。virtio_blk_device_realize函数在同文件(hw/block/virtio-blk.c)中,代码如下:
static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOBlock *s = VIRTIO_BLK(dev);
VirtIOBlkConf *conf = &s->conf;
Error *err = NULL;
unsigned i;
if (!conf->conf.blk) {
error_setg(errp, "drive property not set");
return;
}
if (!blk_is_inserted(conf->conf.blk)) {
error_setg(errp, "Device needs media, but drive is empty");
return;
}
if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) {
conf->num_queues = 1;
}
if (!conf->num_queues) {
error_setg(errp, "num-queues property must be larger than 0");
return;
}
if (conf->queue_size <= 2) {
error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
"must be > 2", conf->queue_size);
return;
}
if (!is_power_of_2(conf->queue_size) ||
conf->queue_size > VIRTQUEUE_MAX_SIZE) {
error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
"must be a power of 2 (max %d)",
conf->queue_size, VIRTQUEUE_MAX_SIZE);
return;
}
if (!blkconf_apply_backend_options(&conf->conf,
!blk_supports_write_perm(conf->conf.blk),
true, errp)) {
return;
}
s->original_wce = blk_enable_write_cache(conf->conf.blk);
if (!blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, errp)) {
return;
}
if (!blkconf_blocksizes(&conf->conf, errp)) {
return;
}
BlockDriverState *bs = blk_bs(conf->conf.blk);
if (bs->bl.zoned != BLK_Z_NONE) {
virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED);
if (bs->bl.zoned == BLK_Z_HM) {
virtio_clear_feature(&s->host_features, VIRTIO_BLK_F_DISCARD);
}
}
if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) &&
(!conf->max_discard_sectors ||
conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) {
error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")"
", must be between 1 and %d",
conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS);
return;
}
if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) &&
(!conf->max_write_zeroes_sectors ||
conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) {
error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32
"), must be between 1 and %d",
conf->max_write_zeroes_sectors,
(int)BDRV_REQUEST_MAX_SECTORS);
return;
}
s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params,
s->host_features);
virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);
s->blk = conf->conf.blk;
s->rq = NULL;
s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
for (i = 0; i < conf->num_queues; i++) {
virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
}
qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2);
virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
if (err != NULL) {
error_propagate(errp, err);
for (i = 0; i < conf->num_queues; i++) {
virtio_del_queue(vdev, i);
}
virtio_cleanup(vdev);
return;
}
/*
* This must be after virtio_init() so virtio_blk_dma_restart_cb() gets
* called after ->start_ioeventfd() has already set blk's AioContext.
*/
s->change =
qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s);
blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
blk_set_dev_ops(s->blk, &virtio_block_ops, s);
blk_iostatus_enable(s->blk);
add_boot_device_lchs(dev, "/disk@0,0",
conf->conf.lcyls,
conf->conf.lheads,
conf->conf.lsecs);
}
在virtio_blk_device_realize函数中,先是通过virtio_init函数初始化VirtIODevice结构。代码片段如下:
virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);
virtio_init函数在hw/virtio/virtio.c中,代码如下:
void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
{
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
int i;
int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
if (nvectors) {
vdev->vector_queues =
g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
}
vdev->start_on_kick = false;
vdev->started = false;
vdev->vhost_started = false;
vdev->device_id = device_id;
vdev->status = 0;
qatomic_set(&vdev->isr, 0);
vdev->queue_sel = 0;
vdev->config_vector = VIRTIO_NO_VECTOR;
vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
vdev->vm_running = runstate_is_running();
vdev->broken = false;
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
vdev->vq[i].vector = VIRTIO_NO_VECTOR;
vdev->vq[i].vdev = vdev;
vdev->vq[i].queue_index = i;
vdev->vq[i].host_notifier_enabled = false;
}
vdev->name = virtio_id_to_name(device_id);
vdev->config_len = config_size;
if (vdev->config_len) {
vdev->config = g_malloc0(config_size);
} else {
vdev->config = NULL;
}
vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
virtio_vmstate_change, vdev);
vdev->device_endian = virtio_default_endian();
vdev->use_guest_notifier_mask = true;
}
从virtio_init函数代码中可以看到,VirtioIODevice结构(VirtIODevice *vdev)中有一个VirtQueue数组,它正是virtio中的前端和后端后端互相传输数据的队列,最多有VIRTIO_QUEUE_MAX个。代码片段如下:
vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
vdev->vm_running = runstate_is_running();
vdev->broken = false;
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
vdev->vq[i].vector = VIRTIO_NO_VECTOR;
vdev->vq[i].vdev = vdev;
vdev->vq[i].queue_index = i;
vdev->vq[i].host_notifier_enabled = false;
}
这一点也能够从VirtioIODevice结构的定义中看出来。VirtioIODevice结构的定义在include/hw/virtio/virtio.h中,如下:
/**
* struct VirtIODevice - common VirtIO structure
* @name: name of the device
* @status: VirtIO Device Status field
*
*/
struct VirtIODevice
{
DeviceState parent_obj;
const char *name;
uint8_t status;
uint8_t isr;
uint16_t queue_sel;
/**
* These fields represent a set of VirtIO features at various
* levels of the stack. @host_features indicates the complete
* feature set the VirtIO device can offer to the driver.
* @guest_features indicates which features the VirtIO driver has
* selected by writing to the feature register. Finally
* @backend_features represents everything supported by the
* backend (e.g. vhost) and could potentially be a subset of the
* total feature set offered by QEMU.
*/
uint64_t host_features;
uint64_t guest_features;
uint64_t backend_features;
size_t config_len;
void *config;
uint16_t config_vector;
uint32_t generation;
int nvectors;
VirtQueue *vq;
MemoryListener listener;
uint16_t device_id;
/* @vm_running: current VM running state via virtio_vmstate_change() */
bool vm_running;
bool broken; /* device in invalid state, needs reset */
bool use_disabled_flag; /* allow use of 'disable' flag when needed */
bool disabled; /* device in temporarily disabled state */
/**
* @use_started: true if the @started flag should be used to check the
* current state of the VirtIO device. Otherwise status bits
* should be checked for a current status of the device.
* @use_started is only set via QMP and defaults to true for all
* modern machines (since 4.1).
*/
bool use_started;
bool started;
bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */
bool disable_legacy_check;
bool vhost_started;
VMChangeStateEntry *vmstate;
char *bus_name;
uint8_t device_endian;
/**
* @user_guest_notifier_mask: gate usage of ->guest_notifier_mask() callback.
* This is used to suppress the masking of guest updates for
* vhost-user devices which are asynchronous by design.
*/
bool use_guest_notifier_mask;
AddressSpace *dma_as;
QLIST_HEAD(, VirtQueue) *vector_queues;
QTAILQ_ENTRY(VirtIODevice) next;
/**
* @config_notifier: the event notifier that handles config events
*/
EventNotifier config_notifier;
bool device_iotlb_enabled;
};
其中重点关注以下一行定义:
VirtQueue *vq;
VIRTIO_QUEUE_MAX是一个宏,其定义在include/hw/virtio\virtio.h中,如下:
#define VIRTIO_QUEUE_MAX 1024
也就是说,virtio中的前端和后端后端互相传输数据的队列最多有1024个。
更多内容请看下回。