QEMU源码全解析 —— virtio(19)

88 篇文章 19 订阅
本文详细解析了Linux内核中virtio_pci_modern_probe和virtio_pci_legacy_probe函数,重点讨论了virtio_pci_modern_probe的执行流程,包括设置virtio设备的vendor ID和device ID,发现PCI capability,映射BAR到内核地址空间等关键步骤。同时,简要介绍了virtio_pci_legacy_probe。通过对这两个函数的解析,加深了对QEMU中virtio设备初始化的理解。
摘要由CSDN通过智能技术生成

接前一篇文章:

上回书继续讲解virtio_pci_driver的 probe 回调函数virtio_pci_probe(),在讲到第5段代码的时候,

  1. if (force_legacy) {
  2. rc = virtio_pci_legacy_probe(vp_dev);
  3. /* Also try modern mode if we can't map BAR0 (no IO space). */
  4. if (rc == -ENODEV || rc == -ENOMEM)
  5. rc = virtio_pci_modern_probe(vp_dev);
  6. if (rc)
  7. goto err_probe;
  8. } else {
  9. rc = virtio_pci_modern_probe(vp_dev);
  10. if (rc == -ENODEV)
  11. rc = virtio_pci_legacy_probe(vp_dev);
  12. if (rc)
  13. goto err_probe;
  14. }

引出来两个函数:virtio_pci_legacy_probe和virtio_pci_modern_probe。本回就来对它们进行解析。当然,由于legacy已是“过去时”(针对传统设备的),因此重点围绕virtio_pci_modern_probe函数进行解析,捎带手地也讲一下virtio_pci_legacy_probe函数。为了便于理解和回顾,再次贴出两个函数的源码:

  • virtio_pci_legacy_probe

virtio_pci_legacy_probe函数在 Linux 内核源码/drivers/ virtio /virtio_pci_legacy.c中,代码如下:

  1. /* the PCI probing function */
  2. int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
  3. {
  4. struct virtio_pci_legacy_device *ldev = &vp_dev->ldev;
  5. struct pci_dev *pci_dev = vp_dev->pci_dev;
  6. int rc;
  7. ldev->pci_dev = pci_dev;
  8. rc = vp_legacy_probe(ldev);
  9. if (rc)
  10. return rc;
  11. vp_dev->isr = ldev->isr;
  12. vp_dev->vdev.id = ldev->id;
  13. vp_dev->vdev.config = &virtio_pci_config_ops;
  14. vp_dev->config_vector = vp_config_vector;
  15. vp_dev->setup_vq = setup_vq;
  16. vp_dev->del_vq = del_vq;
  17. return 0;
  18. }
  • virtio_pci_modern_probe

virtio_pci_modern_probe函数在Linux内核源码/drivers/virtio/virtio_pci_modern.c中,代码如下:

  1. /* the PCI probing function */
  2. int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
  3. {
  4. struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
  5. struct pci_dev *pci_dev = vp_dev->pci_dev;
  6. int err;
  7. mdev->pci_dev = pci_dev;
  8. err = vp_modern_probe(mdev);
  9. if (err)
  10. return err;
  11. if (mdev->device)
  12. vp_dev->vdev.config = &virtio_pci_config_ops;
  13. else
  14. vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
  15. vp_dev->config_vector = vp_config_vector;
  16. vp_dev->setup_vq = setup_vq;
  17. vp_dev->del_vq = del_vq;
  18. vp_dev->isr = mdev->isr;
  19. vp_dev->vdev.id = mdev->id;
  20. return 0;
  21. }

virtio_pci_modern_probe函数中最主要地是调用了vp_modern_probe函数,其在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:

  1. /*
  2. * vp_modern_probe: probe the modern virtio pci device, note that the
  3. * caller is required to enable PCI device before calling this function.
  4. * @mdev: the modern virtio-pci device
  5. *
  6. * Return 0 on succeed otherwise fail
  7. */
  8. int vp_modern_probe(struct virtio_pci_modern_device *mdev)
  9. {
  10. struct pci_dev *pci_dev = mdev->pci_dev;
  11. int err, common, isr, notify, device;
  12. u32 notify_length;
  13. u32 notify_offset;
  14. check_offsets();
  15. /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
  16. if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
  17. return -ENODEV;
  18. if (pci_dev->device < 0x1040) {
  19. /* Transitional devices: use the PCI subsystem device id as
  20. * virtio device id, same as legacy driver always did.
  21. */
  22. mdev->id.device = pci_dev->subsystem_device;
  23. } else {
  24. /* Modern devices: simply use PCI device id, but start from 0x1040. */
  25. mdev->id.device = pci_dev->device - 0x1040;
  26. }
  27. mdev->id.vendor = pci_dev->subsystem_vendor;
  28. /* check for a common config: if not, use legacy mode (bar 0). */
  29. common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
  30. IORESOURCE_IO | IORESOURCE_MEM,
  31. &mdev->modern_bars);
  32. if (!common) {
  33. dev_info(&pci_dev->dev,
  34. "virtio_pci: leaving for legacy driver\n");
  35. return -ENODEV;
  36. }
  37. /* If common is there, these should be too... */
  38. isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
  39. IORESOURCE_IO | IORESOURCE_MEM,
  40. &mdev->modern_bars);
  41. notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
  42. IORESOURCE_IO | IORESOURCE_MEM,
  43. &mdev->modern_bars);
  44. if (!isr || !notify) {
  45. dev_err(&pci_dev->dev,
  46. "virtio_pci: missing capabilities %i/%i/%i\n",
  47. common, isr, notify);
  48. return -EINVAL;
  49. }
  50. err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
  51. if (err)
  52. err = dma_set_mask_and_coherent(&pci_dev->dev,
  53. DMA_BIT_MASK(32));
  54. if (err)
  55. dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
  56. /* Device capability is only mandatory for devices that have
  57. * device-specific configuration.
  58. */
  59. device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
  60. IORESOURCE_IO | IORESOURCE_MEM,
  61. &mdev->modern_bars);
  62. err = pci_request_selected_regions(pci_dev, mdev->modern_bars,
  63. "virtio-pci-modern");
  64. if (err)
  65. return err;
  66. err = -EINVAL;
  67. mdev->common = vp_modern_map_capability(mdev, common,
  68. sizeof(struct virtio_pci_common_cfg), 4,
  69. 0, sizeof(struct virtio_pci_common_cfg),
  70. NULL, NULL);
  71. if (!mdev->common)
  72. goto err_map_common;
  73. mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
  74. 0, 1,
  75. NULL, NULL);
  76. if (!mdev->isr)
  77. goto err_map_isr;
  78. /* Read notify_off_multiplier from config space. */
  79. pci_read_config_dword(pci_dev,
  80. notify + offsetof(struct virtio_pci_notify_cap,
  81. notify_off_multiplier),
  82. &mdev->notify_offset_multiplier);
  83. /* Read notify length and offset from config space. */
  84. pci_read_config_dword(pci_dev,
  85. notify + offsetof(struct virtio_pci_notify_cap,
  86. cap.length),
  87. &notify_length);
  88. pci_read_config_dword(pci_dev,
  89. notify + offsetof(struct virtio_pci_notify_cap,
  90. cap.offset),
  91. &notify_offset);
  92. /* We don't know how many VQs we'll map, ahead of the time.
  93. * If notify length is small, map it all now.
  94. * Otherwise, map each VQ individually later.
  95. */
  96. if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
  97. mdev->notify_base = vp_modern_map_capability(mdev, notify,
  98. 2, 2,
  99. 0, notify_length,
  100. &mdev->notify_len,
  101. &mdev->notify_pa);
  102. if (!mdev->notify_base)
  103. goto err_map_notify;
  104. } else {
  105. mdev->notify_map_cap = notify;
  106. }
  107. /* Again, we don't know how much we should map, but PAGE_SIZE
  108. * is more than enough for all existing devices.
  109. */
  110. if (device) {
  111. mdev->device = vp_modern_map_capability(mdev, device, 0, 4,
  112. 0, PAGE_SIZE,
  113. &mdev->device_len,
  114. NULL);
  115. if (!mdev->device)
  116. goto err_map_device;
  117. }
  118. return 0;
  119. err_map_device:
  120. if (mdev->notify_base)
  121. pci_iounmap(pci_dev, mdev->notify_base);
  122. err_map_notify:
  123. pci_iounmap(pci_dev, mdev->isr);
  124. err_map_isr:
  125. pci_iounmap(pci_dev, mdev->common);
  126. err_map_common:
  127. pci_release_selected_regions(pci_dev, mdev->modern_bars);
  128. return err;
  129. }
  130. EXPORT_SYMBOL_GPL(vp_modern_probe);

实际上在老版本KVM即Linux内核代码中,vp_modern_probe函数中的内容绝大多数是直接放在virtio_pci_modern_probe函数中的,后来才单独封了这样一个函数。

(1)vp_modern_probe首先设置了virtio设备的verdor ID和device ID。代码片段如下:

  1. /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
  2. if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
  3. return -ENODEV;
  4. if (pci_dev->device < 0x1040) {
  5. /* Transitional devices: use the PCI subsystem device id as
  6. * virtio device id, same as legacy driver always did.
  7. */
  8. mdev->id.device = pci_dev->subsystem_device;
  9. } else {
  10. /* Modern devices: simply use PCI device id, but start from 0x1040. */
  11. mdev->id.device = pci_dev->device - 0x1040;
  12. }
  13. mdev->id.vendor = pci_dev->subsystem_vendor;

值得注意的是,virtio PCI代理设备的device ID就是前文书(参见 QEMU源码全解析 —— virtio(14) )在讲virtio_pci_device_plugged函数(QEMU源码中)时设置的PCI_DEVICE_ID_VIRTIO_10_BASE+VIRTIO_ID_BALLOON,即0x1040+5。

163069cc9d1bd09f58e1a6bc6fa54b23.png

所以,这里virtio设备的device ID(mdev->id.device)就是0x1040+5-0x1040=5,也就代表了VIRTIO_ID_BALLOON。

Vendor ID和Device ID参考笔者的文章: 《PCI Express体系结构导读》随记 —— 第I篇 第2章 PCI总线的桥与配置(12)

3bb0af6357bff314fa193654da606928.png

(2)接下来,调用多次virtio_pci_find_capability函数来发现virtio PCI代理设备的pci capability。代码片段如下:

  1. /* check for a common config: if not, use legacy mode (bar 0). */
  2. common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
  3. IORESOURCE_IO | IORESOURCE_MEM,
  4. &mdev->modern_bars);
  5. if (!common) {
  6. dev_info(&pci_dev->dev,
  7. "virtio_pci: leaving for legacy driver\n");
  8. return -ENODEV;
  9. }
  10. /* If common is there, these should be too... */
  11. isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
  12. IORESOURCE_IO | IORESOURCE_MEM,
  13. &mdev->modern_bars);
  14. notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
  15. IORESOURCE_IO | IORESOURCE_MEM,
  16. &mdev->modern_bars);
  17. if (!isr || !notify) {
  18. dev_err(&pci_dev->dev,
  19. "virtio_pci: missing capabilities %i/%i/%i\n",
  20. common, isr, notify);
  21. return -EINVAL;
  22. }
  23. err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
  24. if (err)
  25. err = dma_set_mask_and_coherent(&pci_dev->dev,
  26. DMA_BIT_MASK(32));
  27. if (err)
  28. dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
  29. /* Device capability is only mandatory for devices that have
  30. * device-specific configuration.
  31. */
  32. device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
  33. IORESOURCE_IO | IORESOURCE_MEM,
  34. &mdev->modern_bars);

这也是在(QEMU源码)virtio_pci_device_plugged函数中写入到virtio PCI代理设备的配置空间中的,参见 QEMU源码全解析 —— virtio(14) QEMU源码全解析 —— virtio(15)

dd30960444ce56406735129ce2aeb9a2.png

关于pci capability这一部分的PCI知识,参考:

VirtIO实现原理——PCI基础

2ceee9988f076577e66833f938e5f0e7.png

(3)virtio_pci_find_capability函数找到所属的PCI BAR,然后写入到virt_pci_device的modern_bars成员中。代码片段如下:

  1. /* Device capability is only mandatory for devices that have
  2. * device-specific configuration.
  3. */
  4. device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
  5. IORESOURCE_IO | IORESOURCE_MEM,
  6. &mdev->modern_bars);

从(QEMU源码)virtio_pci_realize函数中可以知道这个modern_bars是1<<4,如下图所示:

12d4462772a8ce7ecaffdf9106e27af5.png

(4)接着,pci_request_selected_regions函数就将virtio PCI代理设备的BAR地址空间保留出来了。代码片段如下:

  1. err = pci_request_selected_regions(pci_dev, mdev->modern_bars,
  2. "virtio-pci-modern");
  3. if (err)
  4. return err;

(5)调用vp_modern_map_capability函数将对应的capability在PCI代理设备中的BAR空间映射到内核地址空间。代码片段如下:

  1. err = -EINVAL;
  2. mdev->common = vp_modern_map_capability(mdev, common,
  3. sizeof(struct virtio_pci_common_cfg), 4,
  4. 0, sizeof(struct virtio_pci_common_cfg),
  5. NULL, NULL);
  6. if (!mdev->common)
  7. goto err_map_common;
  8. mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
  9. 0, 1,
  10. NULL, NULL);
  11. if (!mdev->isr)
  12. goto err_map_isr;
  13. /* Read notify_off_multiplier from config space. */
  14. pci_read_config_dword(pci_dev,
  15. notify + offsetof(struct virtio_pci_notify_cap,
  16. notify_off_multiplier),
  17. &mdev->notify_offset_multiplier);
  18. /* Read notify length and offset from config space. */
  19. pci_read_config_dword(pci_dev,
  20. notify + offsetof(struct virtio_pci_notify_cap,
  21. cap.length),
  22. &notify_length);
  23. pci_read_config_dword(pci_dev,
  24. notify + offsetof(struct virtio_pci_notify_cap,
  25. cap.offset),
  26. &notify_offset);
  27. /* We don't know how many VQs we'll map, ahead of the time.
  28. * If notify length is small, map it all now.
  29. * Otherwise, map each VQ individually later.
  30. */
  31. if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
  32. mdev->notify_base = vp_modern_map_capability(mdev, notify,
  33. 2, 2,
  34. 0, notify_length,
  35. &mdev->notify_len,
  36. &mdev->notify_pa);
  37. if (!mdev->notify_base)
  38. goto err_map_notify;
  39. } else {
  40. mdev->notify_map_cap = notify;
  41. }
  42. /* Again, we don't know how much we should map, but PAGE_SIZE
  43. * is more than enough for all existing devices.
  44. */
  45. if (device) {
  46. mdev->device = vp_modern_map_capability(mdev, device, 0, 4,
  47. 0, PAGE_SIZE,
  48. &mdev->device_len,
  49. NULL);
  50. if (!mdev->device)
  51. goto err_map_device;
  52. }

如mp_dev(struct virtio_pci_modern_device *mdev = &vp_dev->mdev;)的common成员映射了virtio_pci_common_cfg的数据到内核中。这样,后续就可以直接通过这个内存地址空间来访问common这一capability了,其它的capability(isr、notify、device)也与此类似。

vp_modern_map_capability函数在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:

  1. /*
  2. * vp_modern_map_capability - map a part of virtio pci capability
  3. * @mdev: the modern virtio-pci device
  4. * @off: offset of the capability
  5. * @minlen: minimal length of the capability
  6. * @align: align requirement
  7. * @start: start from the capability
  8. * @size: map size
  9. * @len: the length that is actually mapped
  10. * @pa: physical address of the capability
  11. *
  12. * Returns the io address of for the part of the capability
  13. */
  14. static void __iomem *
  15. vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
  16. size_t minlen, u32 align, u32 start, u32 size,
  17. size_t *len, resource_size_t *pa)
  18. {
  19. struct pci_dev *dev = mdev->pci_dev;
  20. u8 bar;
  21. u32 offset, length;
  22. void __iomem *p;
  23. pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
  24. bar),
  25. &bar);
  26. pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
  27. &offset);
  28. pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
  29. &length);
  30. /* Check if the BAR may have changed since we requested the region. */
  31. if (bar >= PCI_STD_NUM_BARS || !(mdev->modern_bars & (1 << bar))) {
  32. dev_err(&dev->dev,
  33. "virtio_pci: bar unexpectedly changed to %u\n", bar);
  34. return NULL;
  35. }
  36. if (length <= start) {
  37. dev_err(&dev->dev,
  38. "virtio_pci: bad capability len %u (>%u expected)\n",
  39. length, start);
  40. return NULL;
  41. }
  42. if (length - start < minlen) {
  43. dev_err(&dev->dev,
  44. "virtio_pci: bad capability len %u (>=%zu expected)\n",
  45. length, minlen);
  46. return NULL;
  47. }
  48. length -= start;
  49. if (start + offset < offset) {
  50. dev_err(&dev->dev,
  51. "virtio_pci: map wrap-around %u+%u\n",
  52. start, offset);
  53. return NULL;
  54. }
  55. offset += start;
  56. if (offset & (align - 1)) {
  57. dev_err(&dev->dev,
  58. "virtio_pci: offset %u not aligned to %u\n",
  59. offset, align);
  60. return NULL;
  61. }
  62. if (length > size)
  63. length = size;
  64. if (len)
  65. *len = length;
  66. if (minlen + offset < minlen ||
  67. minlen + offset > pci_resource_len(dev, bar)) {
  68. dev_err(&dev->dev,
  69. "virtio_pci: map virtio %zu@%u "
  70. "out of range on bar %i length %lu\n",
  71. minlen, offset,
  72. bar, (unsigned long)pci_resource_len(dev, bar));
  73. return NULL;
  74. }
  75. p = pci_iomap_range(dev, bar, offset, length);
  76. if (!p)
  77. dev_err(&dev->dev,
  78. "virtio_pci: unable to map virtio %u@%u on bar %i\n",
  79. length, offset, bar);
  80. else if (pa)
  81. *pa = pci_resource_start(dev, bar) + offset;
  82. return p;
  83. }

这样实际上就将virtio PCI代理设备的BAR映射到 虚拟机 内核地址空间了,后续直接访问这些地址即可实现对virtio PCI代理设备的配置和控制。

回到virtio_pci_modern_probe函数。

  1. /* the PCI probing function */
  2. int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
  3. {
  4. struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
  5. struct pci_dev *pci_dev = vp_dev->pci_dev;
  6. int err;
  7. mdev->pci_dev = pci_dev;
  8. err = vp_modern_probe(mdev);
  9. if (err)
  10. return err;
  11. if (mdev->device)
  12. vp_dev->vdev.config = &virtio_pci_config_ops;
  13. else
  14. vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
  15. vp_dev->config_vector = vp_config_vector;
  16. vp_dev->setup_vq = setup_vq;
  17. vp_dev->del_vq = del_vq;
  18. vp_dev->isr = mdev->isr;
  19. vp_dev->vdev.id = mdev->id;
  20. return 0;
  21. }

在调用完vp_modern_probe函数之后,virtio_pci_modern_probe函数接着设置virtio_pci_device中virtio_device的成员vdev的config成员。如果有device这一capability,则设置为virtio_pci_config_ops,否则设置为virtio_pci_config_nodev_ops。

之后设置vpdev即struct virtio_pci_device的几个回调函数:config_vector与MSI中断有关,设置为vp_config_vector;setup_vq用来配置virtio设备virt queue,设置为setup_vq;del_vq用来删除virt queue,设置为del_vq。

至此,virtio_pci_modern_probe函数就解析完了。

欲知后事如何,且看下回分解。

举报

选择你想要举报的内容(必选)
  • 内容涉黄
  • 政治相关
  • 内容抄袭
  • 涉嫌广告
  • 内容侵权
  • 侮辱谩骂
  • 样式问题
  • 其他