QEMU源码全解析 —— PCI设备模拟(2)

88 篇文章 19 订阅
本文深入剖析QEMU源码,详细讲解PCI设备的模拟过程。从PCI设备的父类结构,到PCI类初始化函数,再到设备的具现化函数pci_qdev_realize,阐述了PCI设备注册、配置空间分配以及初始化的关键步骤。通过对do_pci_register_device函数的分析,揭示了PCI设备在总线上的选择、配置空间的分配和初始化细节。
摘要由CSDN通过智能技术生成

接前一篇文章:

2. PCI设备的模拟

QEMU 模拟的设备很多都是PCI设备,本节介绍PCI设备的模拟。与所有设备类似,PCI设备的父设备也是TYPE_DEVICE,其定义在QEMU源码根目录/hw/pci/pci.c中,代码如下:

  1. static const TypeInfo pci_device_type_info = {
  2. .name = TYPE_PCI_DEVICE,
  3. .parent = TYPE_DEVICE,
  4. .instance_size = sizeof(PCIDevice),
  5. .abstract = true,
  6. .class_size = sizeof(PCIDeviceClass),
  7. .class_init = pci_device_class_init,
  8. .class_base_init = pci_device_class_base_init,
  9. };
  10. static void pci_register_types(void)
  11. {
  12. type_register_static(&pci_bus_info);
  13. type_register_static(&pcie_bus_info);
  14. type_register_static(&cxl_bus_info);
  15. type_register_static(&conventional_pci_interface_info);
  16. type_register_static(&cxl_interface_info);
  17. type_register_static(&pcie_interface_info);
  18. type_register_static(&pci_device_type_info);
  19. }
  20. type_init(pci_register_types)

其中,TypeInfo的定义在include/qom/object.h中,如下:

typedef struct TypeInfo TypeInfo;

而struct TypeInfo的定义在include/qomobject.h中,代码如下:

  1. /**
  2. * struct TypeInfo:
  3. * @name: The name of the type.
  4. * @parent: The name of the parent type.
  5. * @instance_size: The size of the object (derivative of #Object). If
  6. * @instance_size is 0, then the size of the object will be the size of the
  7. * parent object.
  8. * @instance_align: The required alignment of the object. If @instance_align
  9. * is 0, then normal malloc alignment is sufficient; if non-zero, then we
  10. * must use qemu_memalign for allocation.
  11. * @instance_init: This function is called to initialize an object. The parent
  12. * class will have already been initialized so the type is only responsible
  13. * for initializing its own members.
  14. * @instance_post_init: This function is called to finish initialization of
  15. * an object, after all @instance_init functions were called.
  16. * @instance_finalize: This function is called during object destruction. This
  17. * is called before the parent @instance_finalize function has been called.
  18. * An object should only free the members that are unique to its type in this
  19. * function.
  20. * @abstract: If this field is true, then the class is considered abstract and
  21. * cannot be directly instantiated.
  22. * @class_size: The size of the class object (derivative of #ObjectClass)
  23. * for this object. If @class_size is 0, then the size of the class will be
  24. * assumed to be the size of the parent class. This allows a type to avoid
  25. * implementing an explicit class type if they are not adding additional
  26. * virtual functions.
  27. * @class_init: This function is called after all parent class initialization
  28. * has occurred to allow a class to set its default virtual method pointers.
  29. * This is also the function to use to override virtual methods from a parent
  30. * class.
  31. * @class_base_init: This function is called for all base classes after all
  32. * parent class initialization has occurred, but before the class itself
  33. * is initialized. This is the function to use to undo the effects of
  34. * memcpy from the parent class to the descendants.
  35. * @class_data: Data to pass to the @class_init,
  36. * @class_base_init. This can be useful when building dynamic
  37. * classes.
  38. * @interfaces: The list of interfaces associated with this type. This
  39. * should point to a static array that's terminated with a zero filled
  40. * element.
  41. */
  42. struct TypeInfo
  43. {
  44. const char *name;
  45. const char *parent;
  46. size_t instance_size;
  47. size_t instance_align;
  48. void (*instance_init)(Object *obj);
  49. void (*instance_post_init)(Object *obj);
  50. void (*instance_finalize)(Object *obj);
  51. bool abstract;
  52. size_t class_size;
  53. void (*class_init)(ObjectClass *klass, void *data);
  54. void (*class_base_init)(ObjectClass *klass, void *data);
  55. void *class_data;
  56. InterfaceInfo *interfaces;
  57. };

这里,对于TypeInfo即struct TypeInfo的对象pci_device_type_info来说,其class_init(函数指针)成员指向了pci_device_class_init函数。该函数也在hw/pci/pci.c中,代码如下:

  1. static void pci_device_class_init(ObjectClass *klass, void *data)
  2. {
  3. DeviceClass *k = DEVICE_CLASS(klass);
  4. k->realize = pci_qdev_realize;
  5. k->unrealize = pci_qdev_unrealize;
  6. k->bus_type = TYPE_PCI_BUS;
  7. device_class_set_props(k, pci_props);
  8. }

PCI类初始化函数中设置了PCIDeviceClass基类对象DeviceClass的realize和unrealize函数;bus_type表示设备挂接到的总线;props表示PCI设备有哪些属性,这些属性都可以在命令行指定。同样的, 不存在单独的PCI设备,PCI设备也是一个抽象类

PCI设备的具现化函数为pci_qdev_realize。该函数同样在hw/pci/pci.c中,代码如下:

  1. static void pci_qdev_realize(DeviceState *qdev, Error **errp)
  2. {
  3. PCIDevice *pci_dev = (PCIDevice *)qdev;
  4. PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
  5. ObjectClass *klass = OBJECT_CLASS(pc);
  6. Error *local_err = NULL;
  7. bool is_default_rom;
  8. uint16_t class_id;
  9. /*
  10. * capped by systemd (see: udev-builtin-net_id.c)
  11. * as it's the only known user honor it to avoid users
  12. * misconfigure QEMU and then wonder why acpi-index doesn't work
  13. */
  14. if (pci_dev->acpi_index > ONBOARD_INDEX_MAX) {
  15. error_setg(errp, "acpi-index should be less or equal to %u",
  16. ONBOARD_INDEX_MAX);
  17. return;
  18. }
  19. /*
  20. * make sure that acpi-index is unique across all present PCI devices
  21. */
  22. if (pci_dev->acpi_index) {
  23. GSequence *used_indexes = pci_acpi_index_list();
  24. if (g_sequence_lookup(used_indexes,
  25. GINT_TO_POINTER(pci_dev->acpi_index),
  26. g_cmp_uint32, NULL)) {
  27. error_setg(errp, "a PCI device with acpi-index = %" PRIu32
  28. " already exist", pci_dev->acpi_index);
  29. return;
  30. }
  31. g_sequence_insert_sorted(used_indexes,
  32. GINT_TO_POINTER(pci_dev->acpi_index),
  33. g_cmp_uint32, NULL);
  34. }
  35. if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) {
  36. error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize);
  37. return;
  38. }
  39. /* initialize cap_present for pci_is_express() and pci_config_size(),
  40. * Note that hybrid PCIs are not set automatically and need to manage
  41. * QEMU_PCI_CAP_EXPRESS manually */
  42. if (object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE) &&
  43. !object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE)) {
  44. pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
  45. }
  46. if (object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE)) {
  47. pci_dev->cap_present |= QEMU_PCIE_CAP_CXL;
  48. }
  49. pci_dev = do_pci_register_device(pci_dev,
  50. object_get_typename(OBJECT(qdev)),
  51. pci_dev->devfn, errp);
  52. if (pci_dev == NULL)
  53. return;
  54. if (pc->realize) {
  55. pc->realize(pci_dev, &local_err);
  56. if (local_err) {
  57. error_propagate(errp, local_err);
  58. do_pci_unregister_device(pci_dev);
  59. return;
  60. }
  61. }
  62. /*
  63. * A PCIe Downstream Port that do not have ARI Forwarding enabled must
  64. * associate only Device 0 with the device attached to the bus
  65. * representing the Link from the Port (PCIe base spec rev 4.0 ver 0.3,
  66. * sec 7.3.1).
  67. * With ARI, PCI_SLOT() can return non-zero value as the traditional
  68. * 5-bit Device Number and 3-bit Function Number fields in its associated
  69. * Routing IDs, Requester IDs and Completer IDs are interpreted as a
  70. * single 8-bit Function Number. Hence, ignore ARI capable devices.
  71. */
  72. if (pci_is_express(pci_dev) &&
  73. !pcie_find_capability(pci_dev, PCI_EXT_CAP_ID_ARI) &&
  74. pcie_has_upstream_port(pci_dev) &&
  75. PCI_SLOT(pci_dev->devfn)) {
  76. warn_report("PCI: slot %d is not valid for %s,"
  77. " parent device only allows plugging into slot 0.",
  78. PCI_SLOT(pci_dev->devfn), pci_dev->name);
  79. }
  80. if (pci_dev->failover_pair_id) {
  81. if (!pci_bus_is_express(pci_get_bus(pci_dev))) {
  82. error_setg(errp, "failover primary device must be on "
  83. "PCIExpress bus");
  84. pci_qdev_unrealize(DEVICE(pci_dev));
  85. return;
  86. }
  87. class_id = pci_get_word(pci_dev->config + PCI_CLASS_DEVICE);
  88. if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
  89. error_setg(errp, "failover primary device is not an "
  90. "Ethernet device");
  91. pci_qdev_unrealize(DEVICE(pci_dev));
  92. return;
  93. }
  94. if ((pci_dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)
  95. || (PCI_FUNC(pci_dev->devfn) != 0)) {
  96. error_setg(errp, "failover: primary device must be in its own "
  97. "PCI slot");
  98. pci_qdev_unrealize(DEVICE(pci_dev));
  99. return;
  100. }
  101. qdev->allow_unplug_during_migration = true;
  102. }
  103. /* rom loading */
  104. is_default_rom = false;
  105. if (pci_dev->romfile == NULL && pc->romfile != NULL) {
  106. pci_dev->romfile = g_strdup(pc->romfile);
  107. is_default_rom = true;
  108. }
  109. pci_add_option_rom(pci_dev, is_default_rom, &local_err);
  110. if (local_err) {
  111. error_propagate(errp, local_err);
  112. pci_qdev_unrealize(DEVICE(pci_dev));
  113. return;
  114. }
  115. pci_set_power(pci_dev, true);
  116. pci_dev->msi_trigger = pci_msi_trigger;
  117. }

pci_qdev_realize函数主要包括三个方面的工作:

(1)首先调用do_pci_register_device函数进行注册。

代码片段如下:

  1. pci_dev = do_pci_register_device(pci_dev,
  2. object_get_typename(OBJECT(qdev)),
  3. pci_dev->devfn, errp);
  4. if (pci_dev == NULL)
  5. return;

do_pci_register_device函数同样在hw/pci/pci.c中,代码如下:

  1. /* -1 for devfn means auto assign */
  2. static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
  3. const char *name, int devfn,
  4. Error **errp)
  5. {
  6. PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
  7. PCIConfigReadFunc *config_read = pc->config_read;
  8. PCIConfigWriteFunc *config_write = pc->config_write;
  9. Error *local_err = NULL;
  10. DeviceState *dev = DEVICE(pci_dev);
  11. PCIBus *bus = pci_get_bus(pci_dev);
  12. bool is_bridge = IS_PCI_BRIDGE(pci_dev);
  13. /* Only pci bridges can be attached to extra PCI root buses */
  14. if (pci_bus_is_root(bus) && bus->parent_dev && !is_bridge) {
  15. error_setg(errp,
  16. "PCI: Only PCI/PCIe bridges can be plugged into %s",
  17. bus->parent_dev->name);
  18. return NULL;
  19. }
  20. if (devfn < 0) {
  21. for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
  22. devfn += PCI_FUNC_MAX) {
  23. if (pci_bus_devfn_available(bus, devfn) &&
  24. !pci_bus_devfn_reserved(bus, devfn)) {
  25. goto found;
  26. }
  27. }
  28. error_setg(errp, "PCI: no slot/function available for %s, all in use "
  29. "or reserved", name);
  30. return NULL;
  31. found: ;
  32. } else if (pci_bus_devfn_reserved(bus, devfn)) {
  33. error_setg(errp, "PCI: slot %d function %d not available for %s,"
  34. " reserved",
  35. PCI_SLOT(devfn), PCI_FUNC(devfn), name);
  36. return NULL;
  37. } else if (!pci_bus_devfn_available(bus, devfn)) {
  38. error_setg(errp, "PCI: slot %d function %d not available for %s,"
  39. " in use by %s,id=%s",
  40. PCI_SLOT(devfn), PCI_FUNC(devfn), name,
  41. bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
  42. return NULL;
  43. } /*
  44. * Populating function 0 triggers a scan from the guest that
  45. * exposes other non-zero functions. Hence we need to ensure that
  46. * function 0 wasn't added yet.
  47. */
  48. else if (dev->hotplugged &&
  49. !pci_is_vf(pci_dev) &&
  50. pci_get_function_0(pci_dev)) {
  51. error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
  52. " new func %s cannot be exposed to guest.",
  53. PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
  54. pci_get_function_0(pci_dev)->name,
  55. name);
  56. return NULL;
  57. }
  58. pci_dev->devfn = devfn;
  59. pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
  60. pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
  61. memory_region_init(&pci_dev->bus_master_container_region, OBJECT(pci_dev),
  62. "bus master container", UINT64_MAX);
  63. address_space_init(&pci_dev->bus_master_as,
  64. &pci_dev->bus_master_container_region, pci_dev->name);
  65. if (phase_check(PHASE_MACHINE_READY)) {
  66. pci_init_bus_master(pci_dev);
  67. }
  68. pci_dev->irq_state = 0;
  69. pci_config_alloc(pci_dev);
  70. pci_config_set_vendor_id(pci_dev->config, pc->vendor_id);
  71. pci_config_set_device_id(pci_dev->config, pc->device_id);
  72. pci_config_set_revision(pci_dev->config, pc->revision);
  73. pci_config_set_class(pci_dev->config, pc->class_id);
  74. if (!is_bridge) {
  75. if (pc->subsystem_vendor_id || pc->subsystem_id) {
  76. pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
  77. pc->subsystem_vendor_id);
  78. pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
  79. pc->subsystem_id);
  80. } else {
  81. pci_set_default_subsystem_id(pci_dev);
  82. }
  83. } else {
  84. /* subsystem_vendor_id/subsystem_id are only for header type 0 */
  85. assert(!pc->subsystem_vendor_id);
  86. assert(!pc->subsystem_id);
  87. }
  88. pci_init_cmask(pci_dev);
  89. pci_init_wmask(pci_dev);
  90. pci_init_w1cmask(pci_dev);
  91. if (is_bridge) {
  92. pci_init_mask_bridge(pci_dev);
  93. }
  94. pci_init_multifunction(bus, pci_dev, &local_err);
  95. if (local_err) {
  96. error_propagate(errp, local_err);
  97. do_pci_unregister_device(pci_dev);
  98. return NULL;
  99. }
  100. if (!config_read)
  101. config_read = pci_default_read_config;
  102. if (!config_write)
  103. config_write = pci_default_write_config;
  104. pci_dev->config_read = config_read;
  105. pci_dev->config_write = config_write;
  106. bus->devices[devfn] = pci_dev;
  107. pci_dev->version_id = 2; /* Current pci device vmstate version */
  108. return pci_dev;
  109. }

do_pci_register_device函数完成设备及其对应 PCI总线 上的一些初始化工作。

1)如果指定的devfn为-1,表示由总线自己选择插槽,得到插槽之后保存在PCIDevice的devfn(即pci_dev->devfn)中;如果在设备命令行中指定了addr,则addr会作为设备的devfn。代码片段如下:

  1. if (devfn < 0) {
  2. for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
  3. devfn += PCI_FUNC_MAX) {
  4. if (pci_bus_devfn_available(bus, devfn) &&
  5. !pci_bus_devfn_reserved(bus, devfn)) {
  6. goto found;
  7. }
  8. }
  9. error_setg(errp, "PCI: no slot/function available for %s, all in use "
  10. "or reserved", name);
  11. return NULL;
  12. found: ;
  13. } else if (pci_bus_devfn_reserved(bus, devfn)) {
  14. error_setg(errp, "PCI: slot %d function %d not available for %s,"
  15. " reserved",
  16. PCI_SLOT(devfn), PCI_FUNC(devfn), name);
  17. return NULL;
  18. } else if (!pci_bus_devfn_available(bus, devfn)) {
  19. error_setg(errp, "PCI: slot %d function %d not available for %s,"
  20. " in use by %s,id=%s",
  21. PCI_SLOT(devfn), PCI_FUNC(devfn), name,
  22. bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
  23. return NULL;
  24. } /*
  25. * Populating function 0 triggers a scan from the guest that
  26. * exposes other non-zero functions. Hence we need to ensure that
  27. * function 0 wasn't added yet.
  28. */
  29. else if (dev->hotplugged &&
  30. !pci_is_vf(pci_dev) &&
  31. pci_get_function_0(pci_dev)) {
  32. error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
  33. " new func %s cannot be exposed to guest.",
  34. PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
  35. pci_get_function_0(pci_dev)->name,
  36. name);
  37. return NULL;
  38. }
  39. pci_dev->devfn = devfn;
  40. pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
  41. pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);

2)接下来设置PCIDevice结构体中的各个域,包括调用pci_init_bus_master函数初始化PCIDevice中的 Address 成员bus_master_as及其对应的MR。代码片段如下:

  1. memory_region_init(&pci_dev->bus_master_container_region, OBJECT(pci_dev),
  2. "bus master container", UINT64_MAX);
  3. address_space_init(&pci_dev->bus_master_as,
  4. &pci_dev->bus_master_container_region, pci_dev->name);
  5. if (phase_check(PHASE_MACHINE_READY)) {
  6. pci_init_bus_master(pci_dev);
  7. }

3)之后,调用pci_config_alloc函数分配PCI设备的配置空间,cmask用来检测相关的能力,wmask用来控制读写,w1cmask用来实现RW1C。由此完成一些初始化的设置,如vendor_id等。代码片段如下:

  1. pci_config_alloc(pci_dev);
  2. pci_config_set_vendor_id(pci_dev->config, pc->vendor_id);
  3. pci_config_set_device_id(pci_dev->config, pc->device_id);
  4. pci_config_set_revision(pci_dev->config, pc->revision);
  5. pci_config_set_class(pci_dev->config, pc->class_id);
  6. if (!is_bridge) {
  7. if (pc->subsystem_vendor_id || pc->subsystem_id) {
  8. pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
  9. pc->subsystem_vendor_id);
  10. pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
  11. pc->subsystem_id);
  12. } else {
  13. pci_set_default_subsystem_id(pci_dev);
  14. }
  15. } else {
  16. /* subsystem_vendor_id/subsystem_id are only for header type 0 */
  17. assert(!pc->subsystem_vendor_id);
  18. assert(!pc->subsystem_id);
  19. }
  20. pci_init_cmask(pci_dev);
  21. pci_init_wmask(pci_dev);
  22. pci_init_w1cmask(pci_dev);
  23. if (is_bridge) {
  24. pci_init_mask_bridge(pci_dev);
  25. }
  26. pci_init_multifunction(bus, pci_dev, &local_err);
  27. if (local_err) {
  28. error_propagate(errp, local_err);
  29. do_pci_unregister_device(pci_dev);
  30. return NULL;
  31. }

4)然后是设置设备的config_read和config_write函数。如果相关的子类自己没有设置,那么就使用默认的pci_default_read/write_config函数。代码片段如下:

  1. if (!config_read)
  2. config_read = pci_default_read_config;
  3. if (!config_write)
  4. config_write = pci_default_write_config;
  5. pci_dev->config_read = config_read;
  6. pci_dev->config_write = config_write;

5)最后,将该device复制到bus->devices数组中。代码片段如下:

  1. bus->devices[devfn] = pci_dev;
  2. pci_dev->version_id = 2; /* Current pci device vmstate version */

至此,pci_qdev_realize函数所做的第一方面工作即所调用的第1个函数do_pci_register_device()就解析完了。

欲知后事如何,且看下回分解。

举报

选择你想要举报的内容(必选)
  • 内容涉黄
  • 政治相关
  • 内容抄袭
  • 涉嫌广告
  • 内容侵权
  • 侮辱谩骂
  • 样式问题
  • 其他