接前一篇文章: QEMU源码全解析 —— 块设备虚拟化(17)
本文内容参考:
《 QEMU /KVM源码解析与应用》 —— 李强,机械工业出版社
《KVM实战 —— 原理、进阶与性能调优》—— 任永杰 程舟,机械工业出版社
特此致谢!
QEMU启动过程中的块设备虚拟化
上两回讲解了“存储的基本配置选项”和“详细配置存储驱动器的-drive参数”,用了两回做铺垫,目的是为了更好地讲清楚QEMU启动过程中的块设备虚拟化。
对于硬盘的虚拟化,QEMU的启动参数中有关的(一般可能)是以下两行:
-drive file=/var/lib/nova/instances/1f8e6f7e-5a70-4780-89c1-464dc0e7f308/disk,if=none,id=drive-virtio-disk0,format=qcow2,cache=none
-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,bootindex=1
-drive file=/var/lib/nova/instances/1f8e6f7e-5a70-4780-89c1-464dc0e7f308/disk,if=none,id=drive-virtio-disk0,format=qcow2,cache=none
-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,bootindex=1
上边代码的第1行指定了宿主机硬盘上的一个文件,文件的格式是qcow2。对于宿主机上的一个文件,可以被QEMU模拟成为客户机上的一块硬盘。
而上边代码的第2行说明了,使用的驱动是virtio-blk驱动。
在QEMU启动的main函数中(新版本(8.1.3/8.1.4)已经移到了qemu_create_early_backends()中,该函数为qemu_init函数所调用),初始化块设备,是通过configure_blockdev函数开始的。
configure_blockdev函数也在softmmu/vl.c中,代码如下:
static void configure_blockdev(BlockdevOptionsQueue *bdo_queue,
MachineClass *machine_class, int snapshot)
{
/*
* If the currently selected machine wishes to override the
* units-per-bus property of its default HBA interface type, do so
* now.
*/
if (machine_class->units_per_default_bus) {
override_max_devs(machine_class->block_default_type,
machine_class->units_per_default_bus);
}
/* open the virtual block devices */
while (!QSIMPLEQ_EMPTY(bdo_queue)) {
BlockdevOptionsQueueEntry *bdo = QSIMPLEQ_FIRST(bdo_queue);
QSIMPLEQ_REMOVE_HEAD(bdo_queue, entry);
loc_push_restore(&bdo->loc);
qmp_blockdev_add(bdo->bdo, &error_fatal);
loc_pop(&bdo->loc);
qapi_free_BlockdevOptions(bdo->bdo);
g_free(bdo);
}
if (snapshot) {
qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot,
NULL, NULL);
}
if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func,
&machine_class->block_default_type, &error_fatal)) {
/* We printed help */
exit(0);
}
default_drive(default_cdrom, snapshot, machine_class->block_default_type, 2,
CDROM_OPTS);
default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
}
在configure_blockdev函数中,可以看到对于-drive这个参数的解析,并且初始化这个设备要调用drive_init_func函数。代码片段如下:
if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func,
&machine_class->block_default_type, &error_fatal)) {
/* We printed help */
exit(0);
}
drive_init_func函数在softmmu/vl.c中,代码如下:
static int drive_init_func(void *opaque, QemuOpts *opts, Error **errp)
{
BlockInterfaceType *block_default_type = opaque;
return drive_new(opts, *block_default_type, errp) == NULL;
}
在drive_init_func函数中,会调用drive_new函数创建一个设备。drive_new函数在blockdev.c中,代码如下:
DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
Error **errp)
{
const char *value;
BlockBackend *blk;
DriveInfo *dinfo = NULL;
QDict *bs_opts;
QemuOpts *legacy_opts;
DriveMediaType media = MEDIA_DISK;
BlockInterfaceType type;
int max_devs, bus_id, unit_id, index;
const char *werror, *rerror;
bool read_only = false;
bool copy_on_read;
const char *filename;
int i;
GLOBAL_STATE_CODE();
/* Change legacy command line options into QMP ones */
static const struct {
const char *from;
const char *to;
} opt_renames[] = {
{ "iops", "throttling.iops-total" },
{ "iops_rd", "throttling.iops-read" },
{ "iops_wr", "throttling.iops-write" },
{ "bps", "throttling.bps-total" },
{ "bps_rd", "throttling.bps-read" },
{ "bps_wr", "throttling.bps-write" },
{ "iops_max", "throttling.iops-total-max" },
{ "iops_rd_max", "throttling.iops-read-max" },
{ "iops_wr_max", "throttling.iops-write-max" },
{ "bps_max", "throttling.bps-total-max" },
{ "bps_rd_max", "throttling.bps-read-max" },
{ "bps_wr_max", "throttling.bps-write-max" },
{ "iops_size", "throttling.iops-size" },
{ "group", "throttling.group" },
{ "readonly", BDRV_OPT_READ_ONLY },
};
for (i = 0; i < ARRAY_SIZE(opt_renames); i++) {
if (!qemu_opt_rename(all_opts, opt_renames[i].from,
opt_renames[i].to, errp)) {
return NULL;
}
}
value = qemu_opt_get(all_opts, "cache");
if (value) {
int flags = 0;
bool writethrough;
if (bdrv_parse_cache_mode(value, &flags, &writethrough) != 0) {
error_setg(errp, "invalid cache option");
return NULL;
}
/* Specific options take precedence */
if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_WB)) {
qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_WB,
!writethrough, &error_abort);
}
if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) {
qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT,
!!(flags & BDRV_O_NOCACHE), &error_abort);
}
if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_NO_FLUSH)) {
qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_NO_FLUSH,
!!(flags & BDRV_O_NO_FLUSH), &error_abort);
}
qemu_opt_unset(all_opts, "cache");
}
/* Get a QDict for processing the options */
bs_opts = qdict_new();
qemu_opts_to_qdict(all_opts, bs_opts);
legacy_opts = qemu_opts_create(&qemu_legacy_drive_opts, NULL, 0,
&error_abort);
if (!qemu_opts_absorb_qdict(legacy_opts, bs_opts, errp)) {
goto fail;
}
/* Media type */
value = qemu_opt_get(legacy_opts, "media");
if (value) {
if (!strcmp(value, "disk")) {
media = MEDIA_DISK;
} else if (!strcmp(value, "cdrom")) {
media = MEDIA_CDROM;
read_only = true;
} else {
error_setg(errp, "'%s' invalid media", value);
goto fail;
}
}
/* copy-on-read is disabled with a warning for read-only devices */
read_only |= qemu_opt_get_bool(legacy_opts, BDRV_OPT_READ_ONLY, false);
copy_on_read = qemu_opt_get_bool(legacy_opts, "copy-on-read", false);
if (read_only && copy_on_read) {
warn_report("disabling copy-on-read on read-only drive");
copy_on_read = false;
}
qdict_put_str(bs_opts, BDRV_OPT_READ_ONLY, read_only ? "on" : "off");
qdict_put_str(bs_opts, "copy-on-read", copy_on_read ? "on" : "off");
/* Controller type */
value = qemu_opt_get(legacy_opts, "if");
if (value) {
for (type = 0;
type < IF_COUNT && strcmp(value, if_name[type]);
type++) {
}
if (type == IF_COUNT) {
error_setg(errp, "unsupported bus type '%s'", value);
goto fail;
}
} else {
type = block_default_type;
}
/* Device address specified by bus/unit or index.
* If none was specified, try to find the first free one. */
bus_id = qemu_opt_get_number(legacy_opts, "bus", 0);
unit_id = qemu_opt_get_number(legacy_opts, "unit", -1);
index = qemu_opt_get_number(legacy_opts, "index", -1);
max_devs = if_max_devs[type];
if (index != -1) {
if (bus_id != 0 || unit_id != -1) {
error_setg(errp, "index cannot be used with bus and unit");
goto fail;
}
bus_id = drive_index_to_bus_id(type, index);
unit_id = drive_index_to_unit_id(type, index);
}
if (unit_id == -1) {
unit_id = 0;
while (drive_get(type, bus_id, unit_id) != NULL) {
unit_id++;
if (max_devs && unit_id >= max_devs) {
unit_id -= max_devs;
bus_id++;
}
}
}
if (max_devs && unit_id >= max_devs) {
error_setg(errp, "unit %d too big (max is %d)", unit_id, max_devs - 1);
goto fail;
}
if (drive_get(type, bus_id, unit_id) != NULL) {
error_setg(errp, "drive with bus=%d, unit=%d (index=%d) exists",
bus_id, unit_id, index);
goto fail;
}
/* no id supplied -> create one */
if (qemu_opts_id(all_opts) == NULL) {
char *new_id;
const char *mediastr = "";
if (type == IF_IDE || type == IF_SCSI) {
mediastr = (media == MEDIA_CDROM) ? "-cd" : "-hd";
}
if (max_devs) {
new_id = g_strdup_printf("%s%i%s%i", if_name[type], bus_id,
mediastr, unit_id);
} else {
new_id = g_strdup_printf("%s%s%i", if_name[type],
mediastr, unit_id);
}
qdict_put_str(bs_opts, "id", new_id);
g_free(new_id);
}
/* Add virtio block device */
if (type == IF_VIRTIO) {
QemuOpts *devopts;
devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
&error_abort);
qemu_opt_set(devopts, "driver", "virtio-blk", &error_abort);
qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
&error_abort);
}
filename = qemu_opt_get(legacy_opts, "file");
/* Check werror/rerror compatibility with if=... */
werror = qemu_opt_get(legacy_opts, "werror");
if (werror != NULL) {
if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO &&
type != IF_NONE) {
error_setg(errp, "werror is not supported by this bus type");
goto fail;
}
qdict_put_str(bs_opts, "werror", werror);
}
rerror = qemu_opt_get(legacy_opts, "rerror");
if (rerror != NULL) {
if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI &&
type != IF_NONE) {
error_setg(errp, "rerror is not supported by this bus type");
goto fail;
}
qdict_put_str(bs_opts, "rerror", rerror);
}
/* Actual block device init: Functionality shared with blockdev-add */
blk = blockdev_init(filename, bs_opts, errp);
bs_opts = NULL;
if (!blk) {
goto fail;
}
/* Create legacy DriveInfo */
dinfo = g_malloc0(sizeof(*dinfo));
dinfo->opts = all_opts;
dinfo->type = type;
dinfo->bus = bus_id;
dinfo->unit = unit_id;
blk_set_legacy_dinfo(blk, dinfo);
switch(type) {
case IF_IDE:
case IF_SCSI:
case IF_XEN:
case IF_NONE:
dinfo->media_cd = media == MEDIA_CDROM;
break;
default:
break;
}
fail:
qemu_opts_del(legacy_opts);
qobject_unref(bs_opts);
return dinfo;
}
对于drive_new函数的解析,下一回开始。