QEMU源码全解析 —— virtio(17)

88 篇文章 19 订阅
本文从QEMU源码的角度,详细阐述virtio驱动的加载过程。内容涉及virtio PCI代理设备如何被虚拟机内部操作系统加载,以及如何通过probe函数与virtio设备通信。分析了virtio_pci_probe函数的步骤,包括virtio_pci_device结构体的分配、设置pci_dev的私有数据以及初始化virtio_device相关结构。
摘要由CSDN通过智能技术生成

接前一篇文章:

QEMU源码全解析 —— virtio(3) 到上一回 QEMU源码全解析 —— virtio(16) 花了十几个回目讲解了 virtio 设备的初始化,本回开始讲解virtio驱动的加载。

virtio驱动的加载

前文书讲了每一个virtio设备都有一个对应的virtio PCI代理设备,本回开始来分析 虚拟机 内部操作系统是如何加载virtio PCI代理设备和virtio设备驱动、以及如何与virtio设备通信的。由于virtio PCI代理设备的存在,PCI进行扫描的时候会扫描到这个设备,并且会调用相应驱动的probe函数。

virtio_pci_driver及其probe回调函数 Linux 内核源码/drivers/virtio/virtio_pci_common.c中,如下所示:

  • virtio_pci_driver
  1. static struct pci_driver virtio_pci_driver = {
  2. .name = "virtio-pci",
  3. .id_table = virtio_pci_id_table,
  4. .probe = virtio_pci_probe,
  5. .remove = virtio_pci_remove,
  6. #ifdef CONFIG_PM_SLEEP
  7. .driver.pm = &virtio_pci_pm_ops,
  8. #endif
  9. .sriov_configure = virtio_pci_sriov_configure,
  10. };
  11. module_pci_driver(virtio_pci_driver);
  • virtio_pci_driver的probe函数virtio_pci_probe
  1. static int virtio_pci_probe(struct pci_dev *pci_dev,
  2. const struct pci_device_id *id)
  3. {
  4. struct virtio_pci_device *vp_dev, *reg_dev = NULL;
  5. int rc;
  6. /* allocate our structure and fill it out */
  7. vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
  8. if (!vp_dev)
  9. return -ENOMEM;
  10. pci_set_drvdata(pci_dev, vp_dev);
  11. vp_dev->vdev.dev.parent = &pci_dev->dev;
  12. vp_dev->vdev.dev.release = virtio_pci_release_dev;
  13. vp_dev->pci_dev = pci_dev;
  14. INIT_LIST_HEAD(&vp_dev->virtqueues);
  15. spin_lock_init(&vp_dev->lock);
  16. /* enable the device */
  17. rc = pci_enable_device(pci_dev);
  18. if (rc)
  19. goto err_enable_device;
  20. if (force_legacy) {
  21. rc = virtio_pci_legacy_probe(vp_dev);
  22. /* Also try modern mode if we can't map BAR0 (no IO space). */
  23. if (rc == -ENODEV || rc == -ENOMEM)
  24. rc = virtio_pci_modern_probe(vp_dev);
  25. if (rc)
  26. goto err_probe;
  27. } else {
  28. rc = virtio_pci_modern_probe(vp_dev);
  29. if (rc == -ENODEV)
  30. rc = virtio_pci_legacy_probe(vp_dev);
  31. if (rc)
  32. goto err_probe;
  33. }
  34. pci_set_master(pci_dev);
  35. vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false;
  36. rc = register_virtio_device(&vp_dev->vdev);
  37. reg_dev = vp_dev;
  38. if (rc)
  39. goto err_register;
  40. return 0;
  41. err_register:
  42. if (vp_dev->is_legacy)
  43. virtio_pci_legacy_remove(vp_dev);
  44. else
  45. virtio_pci_modern_remove(vp_dev);
  46. err_probe:
  47. pci_disable_device(pci_dev);
  48. err_enable_device:
  49. if (reg_dev)
  50. put_device(&vp_dev->vdev.dev);
  51. else
  52. kfree(vp_dev);
  53. return rc;
  54. }

(1)virtio_pci_probe函数分配一个virtio_pci_device结构体实例并赋值给vp_dev,用来表示一个virtio PCI代理设备。代码片段如下:

  1. struct virtio_pci_device *vp_dev, *reg_dev = NULL;
  2. ……
  3. /* allocate our structure and fill it out */
  4. vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
  5. if (!vp_dev)
  6. return -ENOMEM;

struct virtio_pci_device的定义在Linux内核源码/drivers/virtio/virtio_pci_common.h中,代码如下:

  1. /* Our device structure */
  2. struct virtio_pci_device {
  3. struct virtio_device vdev;
  4. struct pci_dev *pci_dev;
  5. struct virtio_pci_legacy_device ldev;
  6. struct virtio_pci_modern_device mdev;
  7. bool is_legacy;
  8. /* Where to read and clear interrupt */
  9. u8 __iomem *isr;
  10. /* a list of queues so we can dispatch IRQs */
  11. spinlock_t lock;
  12. struct list_head virtqueues;
  13. /* array of all queues for house-keeping */
  14. struct virtio_pci_vq_info **vqs;
  15. /* MSI-X support */
  16. int msix_enabled;
  17. int intx_enabled;
  18. cpumask_var_t *msix_affinity_masks;
  19. /* Name strings for interrupts. This size should be enough,
  20. * and I'm too lazy to allocate each name separately. */
  21. char (*msix_names)[256];
  22. /* Number of available vectors */
  23. unsigned int msix_vectors;
  24. /* Vectors allocated, excluding per-vq vectors if any */
  25. unsigned int msix_used_vectors;
  26. /* Whether we have vector per vq */
  27. bool per_vq_vectors;
  28. struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev,
  29. struct virtio_pci_vq_info *info,
  30. unsigned int idx,
  31. void (*callback)(struct virtqueue *vq),
  32. const char *name,
  33. bool ctx,
  34. u16 msix_vec);
  35. void (*del_vq)(struct virtio_pci_vq_info *info);
  36. u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
  37. };

(2)将vp_dev设置为该pci_dev的私有结构。代码片段如下:

    pci_set_drvdata(pci_dev, vp_dev);

pci_set_drvdata函数在Linux内核源码/include/linux/pci.h中,代码如下:

  1. static inline void pci_set_drvdata(struct pci_dev *pdev, void *data)
  2. {
  3. dev_set_drvdata(&pdev->dev, data);
  4. }

dev_set_drvdata函数在Linux内核源码/include\linux\device.h中,代码如下:

  1. static inline void dev_set_drvdata(struct device *dev, void *data)
  2. {
  3. dev->driver_data = data;
  4. }

综上,

    pci_set_drvdata(pci_dev, vp_dev);

展开后实际上是:

    (&pci_dev->dev)->driver_data = vp_dev;

struct pci_dev的定义在Linux内核源码/include/linux/pci.h中,如下:

  1. /* The pci_dev structure describes PCI devices */
  2. struct pci_dev {
  3. struct list_head bus_list; /* Node in per-bus list */
  4. struct pci_bus *bus; /* Bus this device is on */
  5. struct pci_bus *subordinate; /* Bus this device bridges to */
  6. void *sysdata; /* Hook for sys-specific extension */
  7. struct proc_dir_entry *procent; /* Device entry in /proc/bus/pci */
  8. struct pci_slot *slot; /* Physical slot this device is in */
  9. unsigned int devfn; /* Encoded device & function index */
  10. unsigned short vendor;
  11. unsigned short device;
  12. unsigned short subsystem_vendor;
  13. unsigned short subsystem_device;
  14. unsigned int class; /* 3 bytes: (base,sub,prog-if) */
  15. u8 revision; /* PCI revision, low byte of class word */
  16. u8 hdr_type; /* PCI header type (`multi' flag masked out) */
  17. #ifdef CONFIG_PCIEAER
  18. u16 aer_cap; /* AER capability offset */
  19. struct aer_stats *aer_stats; /* AER stats for this device */
  20. #endif
  21. #ifdef CONFIG_PCIEPORTBUS
  22. struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */
  23. struct pci_dev *rcec; /* Associated RCEC device */
  24. #endif
  25. u32 devcap; /* PCIe Device Capabilities */
  26. u8 pcie_cap; /* PCIe capability offset */
  27. u8 msi_cap; /* MSI capability offset */
  28. u8 msix_cap; /* MSI-X capability offset */
  29. u8 pcie_mpss:3; /* PCIe Max Payload Size Supported */
  30. u8 rom_base_reg; /* Config register controlling ROM */
  31. u8 pin; /* Interrupt pin this device uses */
  32. u16 pcie_flags_reg; /* Cached PCIe Capabilities Register */
  33. unsigned long *dma_alias_mask;/* Mask of enabled devfn aliases */
  34. struct pci_driver *driver; /* Driver bound to this device */
  35. u64 dma_mask; /* Mask of the bits of bus address this
  36. device implements. Normally this is
  37. 0xffffffff. You only need to change
  38. this if your device has broken DMA
  39. or supports 64-bit transfers. */
  40. struct device_dma_parameters dma_parms;
  41. pci_power_t current_state; /* Current operating state. In ACPI,
  42. this is D0-D3, D0 being fully
  43. functional, and D3 being off. */
  44. unsigned int imm_ready:1; /* Supports Immediate Readiness */
  45. u8 pm_cap; /* PM capability offset */
  46. unsigned int pme_support:5; /* Bitmask of states from which PME#
  47. can be generated */
  48. unsigned int pme_poll:1; /* Poll device's PME status bit */
  49. unsigned int d1_support:1; /* Low power state D1 is supported */
  50. unsigned int d2_support:1; /* Low power state D2 is supported */
  51. unsigned int no_d1d2:1; /* D1 and D2 are forbidden */
  52. unsigned int no_d3cold:1; /* D3cold is forbidden */
  53. unsigned int bridge_d3:1; /* Allow D3 for bridge */
  54. unsigned int d3cold_allowed:1; /* D3cold is allowed by user */
  55. unsigned int mmio_always_on:1; /* Disallow turning off io/mem
  56. decoding during BAR sizing */
  57. unsigned int wakeup_prepared:1;
  58. unsigned int skip_bus_pm:1; /* Internal: Skip bus-level PM */
  59. unsigned int ignore_hotplug:1; /* Ignore hotplug events */
  60. unsigned int hotplug_user_indicators:1; /* SlotCtl indicators
  61. controlled exclusively by
  62. user sysfs */
  63. unsigned int clear_retrain_link:1; /* Need to clear Retrain Link
  64. bit manually */
  65. unsigned int d3hot_delay; /* D3hot->D0 transition time in ms */
  66. unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */
  67. #ifdef CONFIG_PCIEASPM
  68. struct pcie_link_state *link_state; /* ASPM link state */
  69. unsigned int ltr_path:1; /* Latency Tolerance Reporting
  70. supported from root to here */
  71. u16 l1ss; /* L1SS Capability pointer */
  72. #endif
  73. unsigned int pasid_no_tlp:1; /* PASID works without TLP Prefix */
  74. unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */
  75. pci_channel_state_t error_state; /* Current connectivity state */
  76. struct device dev; /* Generic device interface */
  77. int cfg_size; /* Size of config space */
  78. /*
  79. * Instead of touching interrupt line and base address registers
  80. * directly, use the values stored here. They might be different!
  81. */
  82. unsigned int irq;
  83. struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
  84. bool match_driver; /* Skip attaching driver */
  85. unsigned int transparent:1; /* Subtractive decode bridge */
  86. unsigned int io_window:1; /* Bridge has I/O window */
  87. unsigned int pref_window:1; /* Bridge has pref mem window */
  88. unsigned int pref_64_window:1; /* Pref mem window is 64-bit */
  89. unsigned int multifunction:1; /* Multi-function device */
  90. unsigned int is_busmaster:1; /* Is busmaster */
  91. unsigned int no_msi:1; /* May not use MSI */
  92. unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */
  93. unsigned int block_cfg_access:1; /* Config space access blocked */
  94. unsigned int broken_parity_status:1; /* Generates false positive parity */
  95. unsigned int irq_reroute_variant:2; /* Needs IRQ rerouting variant */
  96. unsigned int msi_enabled:1;
  97. unsigned int msix_enabled:1;
  98. unsigned int ari_enabled:1; /* ARI forwarding */
  99. unsigned int ats_enabled:1; /* Address Translation Svc */
  100. unsigned int pasid_enabled:1; /* Process Address Space ID */
  101. unsigned int pri_enabled:1; /* Page Request Interface */
  102. unsigned int is_managed:1; /* Managed via devres */
  103. unsigned int is_msi_managed:1; /* MSI release via devres installed */
  104. unsigned int needs_freset:1; /* Requires fundamental reset */
  105. unsigned int state_saved:1;
  106. unsigned int is_physfn:1;
  107. unsigned int is_virtfn:1;
  108. unsigned int is_hotplug_bridge:1;
  109. unsigned int shpc_managed:1; /* SHPC owned by shpchp */
  110. unsigned int is_thunderbolt:1; /* Thunderbolt controller */
  111. /*
  112. * Devices marked being untrusted are the ones that can potentially
  113. * execute DMA attacks and similar. They are typically connected
  114. * through external ports such as Thunderbolt but not limited to
  115. * that. When an IOMMU is enabled they should be getting full
  116. * mappings to make sure they cannot access arbitrary memory.
  117. */
  118. unsigned int untrusted:1;
  119. /*
  120. * Info from the platform, e.g., ACPI or device tree, may mark a
  121. * device as "external-facing". An external-facing device is
  122. * itself internal but devices downstream from it are external.
  123. */
  124. unsigned int external_facing:1;
  125. unsigned int broken_intx_masking:1; /* INTx masking can't be used */
  126. unsigned int io_window_1k:1; /* Intel bridge 1K I/O windows */
  127. unsigned int irq_managed:1;
  128. unsigned int non_compliant_bars:1; /* Broken BARs; ignore them */
  129. unsigned int is_probed:1; /* Device probing in progress */
  130. unsigned int link_active_reporting:1;/* Device capable of reporting link active */
  131. unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */
  132. unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */
  133. unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */
  134. pci_dev_flags_t dev_flags;
  135. atomic_t enable_cnt; /* pci_enable_device has been called */
  136. u32 saved_config_space[16]; /* Config space saved at suspend time */
  137. struct hlist_head saved_cap_space;
  138. int rom_attr_enabled; /* Display of ROM attribute enabled? */
  139. struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
  140. struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
  141. #ifdef CONFIG_HOTPLUG_PCI_PCIE
  142. unsigned int broken_cmd_compl:1; /* No compl for some cmds */
  143. #endif
  144. #ifdef CONFIG_PCIE_PTM
  145. u16 ptm_cap; /* PTM Capability */
  146. unsigned int ptm_root:1;
  147. unsigned int ptm_enabled:1;
  148. u8 ptm_granularity;
  149. #endif
  150. #ifdef CONFIG_PCI_MSI
  151. void __iomem *msix_base;
  152. raw_spinlock_t msi_lock;
  153. #endif
  154. struct pci_vpd vpd;
  155. #ifdef CONFIG_PCIE_DPC
  156. u16 dpc_cap;
  157. unsigned int dpc_rp_extensions:1;
  158. u8 dpc_rp_log_size;
  159. #endif
  160. #ifdef CONFIG_PCI_ATS
  161. union {
  162. struct pci_sriov *sriov; /* PF: SR-IOV info */
  163. struct pci_dev *physfn; /* VF: related PF */
  164. };
  165. u16 ats_cap; /* ATS Capability offset */
  166. u8 ats_stu; /* ATS Smallest Translation Unit */
  167. #endif
  168. #ifdef CONFIG_PCI_PRI
  169. u16 pri_cap; /* PRI Capability offset */
  170. u32 pri_reqs_alloc; /* Number of PRI requests allocated */
  171. unsigned int pasid_required:1; /* PRG Response PASID Required */
  172. #endif
  173. #ifdef CONFIG_PCI_PASID
  174. u16 pasid_cap; /* PASID Capability offset */
  175. u16 pasid_features;
  176. #endif
  177. #ifdef CONFIG_PCI_P2PDMA
  178. struct pci_p2pdma __rcu *p2pdma;
  179. #endif
  180. u16 acs_cap; /* ACS Capability offset */
  181. phys_addr_t rom; /* Physical address if not from BAR */
  182. size_t romlen; /* Length if not from BAR */
  183. /*
  184. * Driver name to force a match. Do not set directly, because core
  185. * frees it. Use driver_set_override() to set or clear it.
  186. */
  187. const char *driver_override;
  188. unsigned long priv_flags; /* Private flags for the PCI driver */
  189. /* These methods index pci_reset_fn_methods[] */
  190. u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */
  191. };

(3) 初始化 vp_dev即virtio_pci_device结构中virtio_device类型的vdev成员相关结构。代码片段如下:

  1. vp_dev->vdev.dev.parent = &pci_dev->dev;
  2. vp_dev->vdev.dev.release = virtio_pci_release_dev;
  3. vp_dev->pci_dev = pci_dev;
  4. INIT_LIST_HEAD(&vp_dev->virtqueues);
  5. spin_lock_init(&vp_dev->lock);

virtio_pci_probe函数其余部分代码的解析,请看下回。

举报

选择你想要举报的内容(必选)
  • 内容涉黄
  • 政治相关
  • 内容抄袭
  • 涉嫌广告
  • 内容侵权
  • 侮辱谩骂
  • 样式问题
  • 其他