接前一篇文章:
2. PCI设备的模拟
QEMU 模拟的设备很多都是PCI设备,本节介绍PCI设备的模拟。与所有设备类似,PCI设备的父设备也是TYPE_DEVICE,其定义在QEMU源码根目录/hw/pci/pci.c中,代码如下:
- static const TypeInfo pci_device_type_info = {
- .name = TYPE_PCI_DEVICE,
- .parent = TYPE_DEVICE,
- .instance_size = sizeof(PCIDevice),
- .abstract = true,
- .class_size = sizeof(PCIDeviceClass),
- .class_init = pci_device_class_init,
- .class_base_init = pci_device_class_base_init,
- };
-
- static void pci_register_types(void)
- {
- type_register_static(&pci_bus_info);
- type_register_static(&pcie_bus_info);
- type_register_static(&cxl_bus_info);
- type_register_static(&conventional_pci_interface_info);
- type_register_static(&cxl_interface_info);
- type_register_static(&pcie_interface_info);
- type_register_static(&pci_device_type_info);
- }
-
- type_init(pci_register_types)
其中,TypeInfo的定义在include/qom/object.h中,如下:
typedef struct TypeInfo TypeInfo;
而struct TypeInfo的定义在include/qomobject.h中,代码如下:
- /**
- * struct TypeInfo:
- * @name: The name of the type.
- * @parent: The name of the parent type.
- * @instance_size: The size of the object (derivative of #Object). If
- * @instance_size is 0, then the size of the object will be the size of the
- * parent object.
- * @instance_align: The required alignment of the object. If @instance_align
- * is 0, then normal malloc alignment is sufficient; if non-zero, then we
- * must use qemu_memalign for allocation.
- * @instance_init: This function is called to initialize an object. The parent
- * class will have already been initialized so the type is only responsible
- * for initializing its own members.
- * @instance_post_init: This function is called to finish initialization of
- * an object, after all @instance_init functions were called.
- * @instance_finalize: This function is called during object destruction. This
- * is called before the parent @instance_finalize function has been called.
- * An object should only free the members that are unique to its type in this
- * function.
- * @abstract: If this field is true, then the class is considered abstract and
- * cannot be directly instantiated.
- * @class_size: The size of the class object (derivative of #ObjectClass)
- * for this object. If @class_size is 0, then the size of the class will be
- * assumed to be the size of the parent class. This allows a type to avoid
- * implementing an explicit class type if they are not adding additional
- * virtual functions.
- * @class_init: This function is called after all parent class initialization
- * has occurred to allow a class to set its default virtual method pointers.
- * This is also the function to use to override virtual methods from a parent
- * class.
- * @class_base_init: This function is called for all base classes after all
- * parent class initialization has occurred, but before the class itself
- * is initialized. This is the function to use to undo the effects of
- * memcpy from the parent class to the descendants.
- * @class_data: Data to pass to the @class_init,
- * @class_base_init. This can be useful when building dynamic
- * classes.
- * @interfaces: The list of interfaces associated with this type. This
- * should point to a static array that's terminated with a zero filled
- * element.
- */
- struct TypeInfo
- {
- const char *name;
- const char *parent;
-
- size_t instance_size;
- size_t instance_align;
- void (*instance_init)(Object *obj);
- void (*instance_post_init)(Object *obj);
- void (*instance_finalize)(Object *obj);
-
- bool abstract;
- size_t class_size;
-
- void (*class_init)(ObjectClass *klass, void *data);
- void (*class_base_init)(ObjectClass *klass, void *data);
- void *class_data;
-
- InterfaceInfo *interfaces;
- };
这里,对于TypeInfo即struct TypeInfo的对象pci_device_type_info来说,其class_init(函数指针)成员指向了pci_device_class_init函数。该函数也在hw/pci/pci.c中,代码如下:
- static void pci_device_class_init(ObjectClass *klass, void *data)
- {
- DeviceClass *k = DEVICE_CLASS(klass);
-
- k->realize = pci_qdev_realize;
- k->unrealize = pci_qdev_unrealize;
- k->bus_type = TYPE_PCI_BUS;
- device_class_set_props(k, pci_props);
- }
PCI类初始化函数中设置了PCIDeviceClass基类对象DeviceClass的realize和unrealize函数;bus_type表示设备挂接到的总线;props表示PCI设备有哪些属性,这些属性都可以在命令行指定。同样的, 不存在单独的PCI设备,PCI设备也是一个抽象类 。
PCI设备的具现化函数为pci_qdev_realize。该函数同样在hw/pci/pci.c中,代码如下:
- static void pci_qdev_realize(DeviceState *qdev, Error **errp)
- {
- PCIDevice *pci_dev = (PCIDevice *)qdev;
- PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
- ObjectClass *klass = OBJECT_CLASS(pc);
- Error *local_err = NULL;
- bool is_default_rom;
- uint16_t class_id;
-
- /*
- * capped by systemd (see: udev-builtin-net_id.c)
- * as it's the only known user honor it to avoid users
- * misconfigure QEMU and then wonder why acpi-index doesn't work
- */
- if (pci_dev->acpi_index > ONBOARD_INDEX_MAX) {
- error_setg(errp, "acpi-index should be less or equal to %u",
- ONBOARD_INDEX_MAX);
- return;
- }
-
- /*
- * make sure that acpi-index is unique across all present PCI devices
- */
- if (pci_dev->acpi_index) {
- GSequence *used_indexes = pci_acpi_index_list();
-
- if (g_sequence_lookup(used_indexes,
- GINT_TO_POINTER(pci_dev->acpi_index),
- g_cmp_uint32, NULL)) {
- error_setg(errp, "a PCI device with acpi-index = %" PRIu32
- " already exist", pci_dev->acpi_index);
- return;
- }
- g_sequence_insert_sorted(used_indexes,
- GINT_TO_POINTER(pci_dev->acpi_index),
- g_cmp_uint32, NULL);
- }
-
- if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) {
- error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize);
- return;
- }
-
- /* initialize cap_present for pci_is_express() and pci_config_size(),
- * Note that hybrid PCIs are not set automatically and need to manage
- * QEMU_PCI_CAP_EXPRESS manually */
- if (object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE) &&
- !object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE)) {
- pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
- }
-
- if (object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE)) {
- pci_dev->cap_present |= QEMU_PCIE_CAP_CXL;
- }
-
- pci_dev = do_pci_register_device(pci_dev,
- object_get_typename(OBJECT(qdev)),
- pci_dev->devfn, errp);
- if (pci_dev == NULL)
- return;
-
- if (pc->realize) {
- pc->realize(pci_dev, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- do_pci_unregister_device(pci_dev);
- return;
- }
- }
-
- /*
- * A PCIe Downstream Port that do not have ARI Forwarding enabled must
- * associate only Device 0 with the device attached to the bus
- * representing the Link from the Port (PCIe base spec rev 4.0 ver 0.3,
- * sec 7.3.1).
- * With ARI, PCI_SLOT() can return non-zero value as the traditional
- * 5-bit Device Number and 3-bit Function Number fields in its associated
- * Routing IDs, Requester IDs and Completer IDs are interpreted as a
- * single 8-bit Function Number. Hence, ignore ARI capable devices.
- */
- if (pci_is_express(pci_dev) &&
- !pcie_find_capability(pci_dev, PCI_EXT_CAP_ID_ARI) &&
- pcie_has_upstream_port(pci_dev) &&
- PCI_SLOT(pci_dev->devfn)) {
- warn_report("PCI: slot %d is not valid for %s,"
- " parent device only allows plugging into slot 0.",
- PCI_SLOT(pci_dev->devfn), pci_dev->name);
- }
-
- if (pci_dev->failover_pair_id) {
- if (!pci_bus_is_express(pci_get_bus(pci_dev))) {
- error_setg(errp, "failover primary device must be on "
- "PCIExpress bus");
- pci_qdev_unrealize(DEVICE(pci_dev));
- return;
- }
- class_id = pci_get_word(pci_dev->config + PCI_CLASS_DEVICE);
- if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
- error_setg(errp, "failover primary device is not an "
- "Ethernet device");
- pci_qdev_unrealize(DEVICE(pci_dev));
- return;
- }
- if ((pci_dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)
- || (PCI_FUNC(pci_dev->devfn) != 0)) {
- error_setg(errp, "failover: primary device must be in its own "
- "PCI slot");
- pci_qdev_unrealize(DEVICE(pci_dev));
- return;
- }
- qdev->allow_unplug_during_migration = true;
- }
-
- /* rom loading */
- is_default_rom = false;
- if (pci_dev->romfile == NULL && pc->romfile != NULL) {
- pci_dev->romfile = g_strdup(pc->romfile);
- is_default_rom = true;
- }
-
- pci_add_option_rom(pci_dev, is_default_rom, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- pci_qdev_unrealize(DEVICE(pci_dev));
- return;
- }
-
- pci_set_power(pci_dev, true);
-
- pci_dev->msi_trigger = pci_msi_trigger;
- }
pci_qdev_realize函数主要包括三个方面的工作:
(1)首先调用do_pci_register_device函数进行注册。
代码片段如下:
- pci_dev = do_pci_register_device(pci_dev,
- object_get_typename(OBJECT(qdev)),
- pci_dev->devfn, errp);
- if (pci_dev == NULL)
- return;
do_pci_register_device函数同样在hw/pci/pci.c中,代码如下:
- /* -1 for devfn means auto assign */
- static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
- const char *name, int devfn,
- Error **errp)
- {
- PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
- PCIConfigReadFunc *config_read = pc->config_read;
- PCIConfigWriteFunc *config_write = pc->config_write;
- Error *local_err = NULL;
- DeviceState *dev = DEVICE(pci_dev);
- PCIBus *bus = pci_get_bus(pci_dev);
- bool is_bridge = IS_PCI_BRIDGE(pci_dev);
-
- /* Only pci bridges can be attached to extra PCI root buses */
- if (pci_bus_is_root(bus) && bus->parent_dev && !is_bridge) {
- error_setg(errp,
- "PCI: Only PCI/PCIe bridges can be plugged into %s",
- bus->parent_dev->name);
- return NULL;
- }
-
- if (devfn < 0) {
- for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
- devfn += PCI_FUNC_MAX) {
- if (pci_bus_devfn_available(bus, devfn) &&
- !pci_bus_devfn_reserved(bus, devfn)) {
- goto found;
- }
- }
- error_setg(errp, "PCI: no slot/function available for %s, all in use "
- "or reserved", name);
- return NULL;
- found: ;
- } else if (pci_bus_devfn_reserved(bus, devfn)) {
- error_setg(errp, "PCI: slot %d function %d not available for %s,"
- " reserved",
- PCI_SLOT(devfn), PCI_FUNC(devfn), name);
- return NULL;
- } else if (!pci_bus_devfn_available(bus, devfn)) {
- error_setg(errp, "PCI: slot %d function %d not available for %s,"
- " in use by %s,id=%s",
- PCI_SLOT(devfn), PCI_FUNC(devfn), name,
- bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
- return NULL;
- } /*
- * Populating function 0 triggers a scan from the guest that
- * exposes other non-zero functions. Hence we need to ensure that
- * function 0 wasn't added yet.
- */
- else if (dev->hotplugged &&
- !pci_is_vf(pci_dev) &&
- pci_get_function_0(pci_dev)) {
- error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
- " new func %s cannot be exposed to guest.",
- PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
- pci_get_function_0(pci_dev)->name,
- name);
-
- return NULL;
- }
-
- pci_dev->devfn = devfn;
- pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
- pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
-
- memory_region_init(&pci_dev->bus_master_container_region, OBJECT(pci_dev),
- "bus master container", UINT64_MAX);
- address_space_init(&pci_dev->bus_master_as,
- &pci_dev->bus_master_container_region, pci_dev->name);
-
- if (phase_check(PHASE_MACHINE_READY)) {
- pci_init_bus_master(pci_dev);
- }
- pci_dev->irq_state = 0;
- pci_config_alloc(pci_dev);
-
- pci_config_set_vendor_id(pci_dev->config, pc->vendor_id);
- pci_config_set_device_id(pci_dev->config, pc->device_id);
- pci_config_set_revision(pci_dev->config, pc->revision);
- pci_config_set_class(pci_dev->config, pc->class_id);
-
- if (!is_bridge) {
- if (pc->subsystem_vendor_id || pc->subsystem_id) {
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
- pc->subsystem_vendor_id);
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
- pc->subsystem_id);
- } else {
- pci_set_default_subsystem_id(pci_dev);
- }
- } else {
- /* subsystem_vendor_id/subsystem_id are only for header type 0 */
- assert(!pc->subsystem_vendor_id);
- assert(!pc->subsystem_id);
- }
- pci_init_cmask(pci_dev);
- pci_init_wmask(pci_dev);
- pci_init_w1cmask(pci_dev);
- if (is_bridge) {
- pci_init_mask_bridge(pci_dev);
- }
- pci_init_multifunction(bus, pci_dev, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- do_pci_unregister_device(pci_dev);
- return NULL;
- }
-
- if (!config_read)
- config_read = pci_default_read_config;
- if (!config_write)
- config_write = pci_default_write_config;
- pci_dev->config_read = config_read;
- pci_dev->config_write = config_write;
- bus->devices[devfn] = pci_dev;
- pci_dev->version_id = 2; /* Current pci device vmstate version */
- return pci_dev;
- }
do_pci_register_device函数完成设备及其对应 PCI总线 上的一些初始化工作。
1)如果指定的devfn为-1,表示由总线自己选择插槽,得到插槽之后保存在PCIDevice的devfn(即pci_dev->devfn)中;如果在设备命令行中指定了addr,则addr会作为设备的devfn。代码片段如下:
- if (devfn < 0) {
- for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
- devfn += PCI_FUNC_MAX) {
- if (pci_bus_devfn_available(bus, devfn) &&
- !pci_bus_devfn_reserved(bus, devfn)) {
- goto found;
- }
- }
- error_setg(errp, "PCI: no slot/function available for %s, all in use "
- "or reserved", name);
- return NULL;
- found: ;
- } else if (pci_bus_devfn_reserved(bus, devfn)) {
- error_setg(errp, "PCI: slot %d function %d not available for %s,"
- " reserved",
- PCI_SLOT(devfn), PCI_FUNC(devfn), name);
- return NULL;
- } else if (!pci_bus_devfn_available(bus, devfn)) {
- error_setg(errp, "PCI: slot %d function %d not available for %s,"
- " in use by %s,id=%s",
- PCI_SLOT(devfn), PCI_FUNC(devfn), name,
- bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
- return NULL;
- } /*
- * Populating function 0 triggers a scan from the guest that
- * exposes other non-zero functions. Hence we need to ensure that
- * function 0 wasn't added yet.
- */
- else if (dev->hotplugged &&
- !pci_is_vf(pci_dev) &&
- pci_get_function_0(pci_dev)) {
- error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
- " new func %s cannot be exposed to guest.",
- PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
- pci_get_function_0(pci_dev)->name,
- name);
-
- return NULL;
- }
-
- pci_dev->devfn = devfn;
- pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
- pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
2)接下来设置PCIDevice结构体中的各个域,包括调用pci_init_bus_master函数初始化PCIDevice中的 Address 成员bus_master_as及其对应的MR。代码片段如下:
- memory_region_init(&pci_dev->bus_master_container_region, OBJECT(pci_dev),
- "bus master container", UINT64_MAX);
- address_space_init(&pci_dev->bus_master_as,
- &pci_dev->bus_master_container_region, pci_dev->name);
-
- if (phase_check(PHASE_MACHINE_READY)) {
- pci_init_bus_master(pci_dev);
- }
3)之后,调用pci_config_alloc函数分配PCI设备的配置空间,cmask用来检测相关的能力,wmask用来控制读写,w1cmask用来实现RW1C。由此完成一些初始化的设置,如vendor_id等。代码片段如下:
- pci_config_alloc(pci_dev);
-
- pci_config_set_vendor_id(pci_dev->config, pc->vendor_id);
- pci_config_set_device_id(pci_dev->config, pc->device_id);
- pci_config_set_revision(pci_dev->config, pc->revision);
- pci_config_set_class(pci_dev->config, pc->class_id);
-
- if (!is_bridge) {
- if (pc->subsystem_vendor_id || pc->subsystem_id) {
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
- pc->subsystem_vendor_id);
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
- pc->subsystem_id);
- } else {
- pci_set_default_subsystem_id(pci_dev);
- }
- } else {
- /* subsystem_vendor_id/subsystem_id are only for header type 0 */
- assert(!pc->subsystem_vendor_id);
- assert(!pc->subsystem_id);
- }
- pci_init_cmask(pci_dev);
- pci_init_wmask(pci_dev);
- pci_init_w1cmask(pci_dev);
- if (is_bridge) {
- pci_init_mask_bridge(pci_dev);
- }
- pci_init_multifunction(bus, pci_dev, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- do_pci_unregister_device(pci_dev);
- return NULL;
- }
4)然后是设置设备的config_read和config_write函数。如果相关的子类自己没有设置,那么就使用默认的pci_default_read/write_config函数。代码片段如下:
- if (!config_read)
- config_read = pci_default_read_config;
- if (!config_write)
- config_write = pci_default_write_config;
- pci_dev->config_read = config_read;
- pci_dev->config_write = config_write;
5)最后,将该device复制到bus->devices数组中。代码片段如下:
- bus->devices[devfn] = pci_dev;
- pci_dev->version_id = 2; /* Current pci device vmstate version */
至此,pci_qdev_realize函数所做的第一方面工作即所调用的第1个函数do_pci_register_device()就解析完了。
欲知后事如何,且看下回分解。