接前一篇文章: QEMU源码全解析 —— 块设备虚拟化(18)
本文内容参考:
《 QEMU /KVM源码解析与应用》 —— 李强,机械工业出版社
《KVM实战 —— 原理、进阶与性能调优》—— 任永杰 程舟,机械工业出版社
特此致谢!
QEMU启动过程中的块设备虚拟化
上一回沿着QEMU启动过程中所调用的函数流程:
main函数
---> qemu_init函数
---> qemu_create_early_backends()
---> configure_blockdev函数
---> drive_init_func函数
---> driver_new函数
讲到了drive_new函数,本回对于该函数进行详细解析。为了便于理解和回顾,再次贴出drive_new函数源码,在blockdev.c中,如下:
DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
Error **errp)
{
const char *value;
BlockBackend *blk;
DriveInfo *dinfo = NULL;
QDict *bs_opts;
QemuOpts *legacy_opts;
DriveMediaType media = MEDIA_DISK;
BlockInterfaceType type;
int max_devs, bus_id, unit_id, index;
const char *werror, *rerror;
bool read_only = false;
bool copy_on_read;
const char *filename;
int i;
GLOBAL_STATE_CODE();
/* Change legacy command line options into QMP ones */
static const struct {
const char *from;
const char *to;
} opt_renames[] = {
{ "iops", "throttling.iops-total" },
{ "iops_rd", "throttling.iops-read" },
{ "iops_wr", "throttling.iops-write" },
{ "bps", "throttling.bps-total" },
{ "bps_rd", "throttling.bps-read" },
{ "bps_wr", "throttling.bps-write" },
{ "iops_max", "throttling.iops-total-max" },
{ "iops_rd_max", "throttling.iops-read-max" },
{ "iops_wr_max", "throttling.iops-write-max" },
{ "bps_max", "throttling.bps-total-max" },
{ "bps_rd_max", "throttling.bps-read-max" },
{ "bps_wr_max", "throttling.bps-write-max" },
{ "iops_size", "throttling.iops-size" },
{ "group", "throttling.group" },
{ "readonly", BDRV_OPT_READ_ONLY },
};
for (i = 0; i < ARRAY_SIZE(opt_renames); i++) {
if (!qemu_opt_rename(all_opts, opt_renames[i].from,
opt_renames[i].to, errp)) {
return NULL;
}
}
value = qemu_opt_get(all_opts, "cache");
if (value) {
int flags = 0;
bool writethrough;
if (bdrv_parse_cache_mode(value, &flags, &writethrough) != 0) {
error_setg(errp, "invalid cache option");
return NULL;
}
/* Specific options take precedence */
if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_WB)) {
qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_WB,
!writethrough, &error_abort);
}
if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) {
qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT,
!!(flags & BDRV_O_NOCACHE), &error_abort);
}
if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_NO_FLUSH)) {
qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_NO_FLUSH,
!!(flags & BDRV_O_NO_FLUSH), &error_abort);
}
qemu_opt_unset(all_opts, "cache");
}
/* Get a QDict for processing the options */
bs_opts = qdict_new();
qemu_opts_to_qdict(all_opts, bs_opts);
legacy_opts = qemu_opts_create(&qemu_legacy_drive_opts, NULL, 0,
&error_abort);
if (!qemu_opts_absorb_qdict(legacy_opts, bs_opts, errp)) {
goto fail;
}
/* Media type */
value = qemu_opt_get(legacy_opts, "media");
if (value) {
if (!strcmp(value, "disk")) {
media = MEDIA_DISK;
} else if (!strcmp(value, "cdrom")) {
media = MEDIA_CDROM;
read_only = true;
} else {
error_setg(errp, "'%s' invalid media", value);
goto fail;
}
}
/* copy-on-read is disabled with a warning for read-only devices */
read_only |= qemu_opt_get_bool(legacy_opts, BDRV_OPT_READ_ONLY, false);
copy_on_read = qemu_opt_get_bool(legacy_opts, "copy-on-read", false);
if (read_only && copy_on_read) {
warn_report("disabling copy-on-read on read-only drive");
copy_on_read = false;
}
qdict_put_str(bs_opts, BDRV_OPT_READ_ONLY, read_only ? "on" : "off");
qdict_put_str(bs_opts, "copy-on-read", copy_on_read ? "on" : "off");
/* Controller type */
value = qemu_opt_get(legacy_opts, "if");
if (value) {
for (type = 0;
type < IF_COUNT && strcmp(value, if_name[type]);
type++) {
}
if (type == IF_COUNT) {
error_setg(errp, "unsupported bus type '%s'", value);
goto fail;
}
} else {
type = block_default_type;
}
/* Device address specified by bus/unit or index.
* If none was specified, try to find the first free one. */
bus_id = qemu_opt_get_number(legacy_opts, "bus", 0);
unit_id = qemu_opt_get_number(legacy_opts, "unit", -1);
index = qemu_opt_get_number(legacy_opts, "index", -1);
max_devs = if_max_devs[type];
if (index != -1) {
if (bus_id != 0 || unit_id != -1) {
error_setg(errp, "index cannot be used with bus and unit");
goto fail;
}
bus_id = drive_index_to_bus_id(type, index);
unit_id = drive_index_to_unit_id(type, index);
}
if (unit_id == -1) {
unit_id = 0;
while (drive_get(type, bus_id, unit_id) != NULL) {
unit_id++;
if (max_devs && unit_id >= max_devs) {
unit_id -= max_devs;
bus_id++;
}
}
}
if (max_devs && unit_id >= max_devs) {
error_setg(errp, "unit %d too big (max is %d)", unit_id, max_devs - 1);
goto fail;
}
if (drive_get(type, bus_id, unit_id) != NULL) {
error_setg(errp, "drive with bus=%d, unit=%d (index=%d) exists",
bus_id, unit_id, index);
goto fail;
}
/* no id supplied -> create one */
if (qemu_opts_id(all_opts) == NULL) {
char *new_id;
const char *mediastr = "";
if (type == IF_IDE || type == IF_SCSI) {
mediastr = (media == MEDIA_CDROM) ? "-cd" : "-hd";
}
if (max_devs) {
new_id = g_strdup_printf("%s%i%s%i", if_name[type], bus_id,
mediastr, unit_id);
} else {
new_id = g_strdup_printf("%s%s%i", if_name[type],
mediastr, unit_id);
}
qdict_put_str(bs_opts, "id", new_id);
g_free(new_id);
}
/* Add virtio block device */
if (type == IF_VIRTIO) {
QemuOpts *devopts;
devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
&error_abort);
qemu_opt_set(devopts, "driver", "virtio-blk", &error_abort);
qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
&error_abort);
}
filename = qemu_opt_get(legacy_opts, "file");
/* Check werror/rerror compatibility with if=... */
werror = qemu_opt_get(legacy_opts, "werror");
if (werror != NULL) {
if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO &&
type != IF_NONE) {
error_setg(errp, "werror is not supported by this bus type");
goto fail;
}
qdict_put_str(bs_opts, "werror", werror);
}
rerror = qemu_opt_get(legacy_opts, "rerror");
if (rerror != NULL) {
if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI &&
type != IF_NONE) {
error_setg(errp, "rerror is not supported by this bus type");
goto fail;
}
qdict_put_str(bs_opts, "rerror", rerror);
}
/* Actual block device init: Functionality shared with blockdev-add */
blk = blockdev_init(filename, bs_opts, errp);
bs_opts = NULL;
if (!blk) {
goto fail;
}
/* Create legacy DriveInfo */
dinfo = g_malloc0(sizeof(*dinfo));
dinfo->opts = all_opts;
dinfo->type = type;
dinfo->bus = bus_id;
dinfo->unit = unit_id;
blk_set_legacy_dinfo(blk, dinfo);
switch(type) {
case IF_IDE:
case IF_SCSI:
case IF_XEN:
case IF_NONE:
dinfo->media_cd = media == MEDIA_CDROM;
break;
default:
break;
}
fail:
qemu_opts_del(legacy_opts);
qobject_unref(bs_opts);
return dinfo;
}
上一回已讲过,drive_new函数用于创建一个设备。在其中,会解析QEMU的启动参数。对于virtio来说,会解析device参数,将driver设置为"virtio-blk"。代码片段如下:
/* Add virtio block device */
if (type == IF_VIRTIO) {
QemuOpts *devopts;
devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
&error_abort);
qemu_opt_set(devopts, "driver", "virtio-blk", &error_abort);
qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
&error_abort);
}
对应启动命令行中的:
-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,bootindex=1
接下来,drive_new函数会解析file参数,就是指向宿主机上的那个文件。代码片段如下:
filename = qemu_opt_get(legacy_opts, "file");
对应启动命令行中的:
-drive file=/var/lib/nova/instances/1f8e6f7e-5a70-4780-89c1-464dc0e7f308/disk,if=none,id=drive-virtio-disk0,format=qcow2,cache=none
接下来,drive_new函数会调用blockdev_init函数,根据参数进行初始化。代码片段如下:
/* Actual block device init: Functionality shared with blockdev-add */
blk = blockdev_init(filename, bs_opts, errp);
bs_opts = NULL;
if (!blk) {
goto fail;
}
最后,drive_new函数会创建一个DriverInfo结构对象,来管理这个新建的设备。代码片段如下:
DriveInfo *dinfo = NULL;
……
/* Create legacy DriveInfo */
dinfo = g_malloc0(sizeof(*dinfo));
dinfo->opts = all_opts;
dinfo->type = type;
dinfo->bus = bus_id;
dinfo->unit = unit_id;
blk_set_legacy_dinfo(blk, dinfo);
switch(type) {
case IF_IDE:
case IF_SCSI:
case IF_XEN:
case IF_NONE:
dinfo->media_cd = media == MEDIA_CDROM;
break;
default:
break;
}
下一回重点对于drive_new函数中调用的blockdev_init函数进行解析。