QEMU源码全解析 —— virtio(23)

88 篇文章 19 订阅
本文深入解析QEMU中的virtio balloon设备初始化过程,通过virtio_balloon_probe函数,详细介绍如何分配和初始化virtio_balloon结构,以及如何通过init_vqs函数设置virtqueue和vring,实现virtio驱动与设备的数据通信。文章还涉及virtio_device_ready和virtio_has_feature等关键函数的作用。
摘要由CSDN通过智能技术生成

接前一篇文章:

本回开始以 virtio balloon设备的初始化过程为例,分析virtio设备的初始化过程,即前文书( QEMU源码全解析 —— virtio(21) )所讲到的virtio驱动初始化设备的过程中的“(9)执行设备相关的 初始化操作 ”一步。

下边就以virtio balloon设备的初始化过程为例,分析virtio设备的初始化过程。再次贴出virtioballoon_probe函数的源码, Linux 内核源码/drivers/virtio/virtio_balloon.c中,如下:

  1. static int virtballoon_probe(struct virtio_device *vdev)
  2. {
  3. struct virtio_balloon *vb;
  4. int err;
  5. if (!vdev->config->get) {
  6. dev_err(&vdev->dev, "%s failure: config access disabled\n",
  7. __func__);
  8. return -EINVAL;
  9. }
  10. vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
  11. if (!vb) {
  12. err = -ENOMEM;
  13. goto out;
  14. }
  15. INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
  16. INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
  17. spin_lock_init(&vb->stop_update_lock);
  18. mutex_init(&vb->balloon_lock);
  19. init_waitqueue_head(&vb->acked);
  20. vb->vdev = vdev;
  21. balloon_devinfo_init(&vb->vb_dev_info);
  22. err = init_vqs(vb);
  23. if (err)
  24. goto out_free_vb;
  25. #ifdef CONFIG_BALLOON_COMPACTION
  26. vb->vb_dev_info.migratepage = virtballoon_migratepage;
  27. #endif
  28. if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
  29. /*
  30. * There is always one entry reserved for cmd id, so the ring
  31. * size needs to be at least two to report free page hints.
  32. */
  33. if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
  34. err = -ENOSPC;
  35. goto out_del_vqs;
  36. }
  37. vb->balloon_wq = alloc_workqueue("balloon-wq",
  38. WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
  39. if (!vb->balloon_wq) {
  40. err = -ENOMEM;
  41. goto out_del_vqs;
  42. }
  43. INIT_WORK(&vb->report_free_page_work, report_free_page_func);
  44. vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP;
  45. vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
  46. VIRTIO_BALLOON_CMD_ID_STOP);
  47. vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
  48. VIRTIO_BALLOON_CMD_ID_STOP);
  49. spin_lock_init(&vb->free_page_list_lock);
  50. INIT_LIST_HEAD(&vb->free_page_list);
  51. /*
  52. * We're allowed to reuse any free pages, even if they are
  53. * still to be processed by the host.
  54. */
  55. err = virtio_balloon_register_shrinker(vb);
  56. if (err)
  57. goto out_del_balloon_wq;
  58. }
  59. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
  60. vb->oom_nb.notifier_call = virtio_balloon_oom_notify;
  61. vb->oom_nb.priority = VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY;
  62. err = register_oom_notifier(&vb->oom_nb);
  63. if (err < 0)
  64. goto out_unregister_shrinker;
  65. }
  66. if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
  67. /* Start with poison val of 0 representing general init */
  68. __u32 poison_val = 0;
  69. /*
  70. * Let the hypervisor know that we are expecting a
  71. * specific value to be written back in balloon pages.
  72. *
  73. * If the PAGE_POISON value was larger than a byte we would
  74. * need to byte swap poison_val here to guarantee it is
  75. * little-endian. However for now it is a single byte so we
  76. * can pass it as-is.
  77. */
  78. if (!want_init_on_free())
  79. memset(&poison_val, PAGE_POISON, sizeof(poison_val));
  80. virtio_cwrite_le(vb->vdev, struct virtio_balloon_config,
  81. poison_val, &poison_val);
  82. }
  83. vb->pr_dev_info.report = virtballoon_free_page_report;
  84. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
  85. unsigned int capacity;
  86. capacity = virtqueue_get_vring_size(vb->reporting_vq);
  87. if (capacity < PAGE_REPORTING_CAPACITY) {
  88. err = -ENOSPC;
  89. goto out_unregister_oom;
  90. }
  91. /*
  92. * The default page reporting order is @pageblock_order, which
  93. * corresponds to 512MB in size on ARM64 when 64KB base page
  94. * size is used. The page reporting won't be triggered if the
  95. * freeing page can't come up with a free area like that huge.
  96. * So we specify the page reporting order to 5, corresponding
  97. * to 2MB. It helps to avoid THP splitting if 4KB base page
  98. * size is used by host.
  99. *
  100. * Ideally, the page reporting order is selected based on the
  101. * host's base page size. However, it needs more work to report
  102. * that value. The hard-coded order would be fine currently.
  103. */
  104. #if defined(CONFIG_ARM64) && defined(CONFIG_ARM64_64K_PAGES)
  105. vb->pr_dev_info.order = 5;
  106. #endif
  107. err = page_reporting_register(&vb->pr_dev_info);
  108. if (err)
  109. goto out_unregister_oom;
  110. }
  111. virtio_device_ready(vdev);
  112. if (towards_target(vb))
  113. virtballoon_changed(vdev);
  114. return 0;
  115. out_unregister_oom:
  116. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
  117. unregister_oom_notifier(&vb->oom_nb);
  118. out_unregister_shrinker:
  119. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
  120. virtio_balloon_unregister_shrinker(vb);
  121. out_del_balloon_wq:
  122. if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
  123. destroy_workqueue(vb->balloon_wq);
  124. out_del_vqs:
  125. vdev->config->del_vqs(vdev);
  126. out_free_vb:
  127. kfree(vb);
  128. out:
  129. return err;
  130. }

virtio balloon设备用virtio_balloon结构表示,该结构在Linux内核源码/drivers/virtio/virtio_balloon.c中,定义如下:

  1. struct virtio_balloon {
  2. struct virtio_device *vdev;
  3. struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
  4. /* Balloon's own wq for cpu-intensive work items */
  5. struct workqueue_struct *balloon_wq;
  6. /* The free page reporting work item submitted to the balloon wq */
  7. struct work_struct report_free_page_work;
  8. /* The balloon servicing is delegated to a freezable workqueue. */
  9. struct work_struct update_balloon_stats_work;
  10. struct work_struct update_balloon_size_work;
  11. /* Prevent updating balloon when it is being canceled. */
  12. spinlock_t stop_update_lock;
  13. bool stop_update;
  14. /* Bitmap to indicate if reading the related config fields are needed */
  15. unsigned long config_read_bitmap;
  16. /* The list of allocated free pages, waiting to be given back to mm */
  17. struct list_head free_page_list;
  18. spinlock_t free_page_list_lock;
  19. /* The number of free page blocks on the above list */
  20. unsigned long num_free_page_blocks;
  21. /*
  22. * The cmd id received from host.
  23. * Read it via virtio_balloon_cmd_id_received to get the latest value
  24. * sent from host.
  25. */
  26. u32 cmd_id_received_cache;
  27. /* The cmd id that is actively in use */
  28. __virtio32 cmd_id_active;
  29. /* Buffer to store the stop sign */
  30. __virtio32 cmd_id_stop;
  31. /* Waiting for host to ack the pages we released. */
  32. wait_queue_head_t acked;
  33. /* Number of balloon pages we've told the Host we're not using. */
  34. unsigned int num_pages;
  35. /*
  36. * The pages we've told the Host we're not using are enqueued
  37. * at vb_dev_info->pages list.
  38. * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
  39. * to num_pages above.
  40. */
  41. struct balloon_dev_info vb_dev_info;
  42. /* Synchronize access/update to this struct virtio_balloon elements */
  43. struct mutex balloon_lock;
  44. /* The array of pfns we tell the Host about. */
  45. unsigned int num_pfns;
  46. __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
  47. /* Memory statistics */
  48. struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
  49. /* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */
  50. struct shrinker *shrinker;
  51. /* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */
  52. struct notifier_block oom_nb;
  53. /* Free page reporting device */
  54. struct virtqueue *reporting_vq;
  55. struct page_reporting_dev_info pr_dev_info;
  56. };

virtio_balloon结构(struct virtio_balloon)中存放了与该virtio balloon设备密切相关的数据成员。

(1)virtioballoon_probe函数首先分配了一个virtio_balloon结构对象,并赋值给vb,并且virio_device的priv也会保存该结构对象的地址 。代码片段如下:

  1. struct virtio_balloon *vb;
  2. ……
  3. vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
  4. if (!vb) {
  5. err = -ENOMEM;
  6. goto out;
  7. }

(2)接着对分配的virtio_balloon对象的成员进行初始化 。这基本上就是该函数余下的代码了。其中有两个重要函数:init_vqs()和virtio_device_ready()。

virtio_device_ready函数同样在 QEMU源码全解析 —— virtio(21) 中已经讲过了,如下所示:

前一个函数init_vqs用于初始化virtqueue和vring,virtio驱动与virtio设备通过virtqueue进行数据通信。下边详细解析init_vqs函数。

init_vqs函数也在Linux内核源码/drivers/virtio/virtio_balloon.c中,代码如下:

  1. static int init_vqs(struct virtio_balloon *vb)
  2. {
  3. struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
  4. vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
  5. const char *names[VIRTIO_BALLOON_VQ_MAX];
  6. int err;
  7. /*
  8. * Inflateq and deflateq are used unconditionally. The names[]
  9. * will be NULL if the related feature is not enabled, which will
  10. * cause no allocation for the corresponding virtqueue in find_vqs.
  11. */
  12. callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
  13. names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
  14. callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
  15. names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
  16. callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL;
  17. names[VIRTIO_BALLOON_VQ_STATS] = NULL;
  18. callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
  19. names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
  20. names[VIRTIO_BALLOON_VQ_REPORTING] = NULL;
  21. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
  22. names[VIRTIO_BALLOON_VQ_STATS] = "stats";
  23. callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
  24. }
  25. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
  26. names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
  27. callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
  28. }
  29. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
  30. names[VIRTIO_BALLOON_VQ_REPORTING] = "reporting_vq";
  31. callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
  32. }
  33. err = virtio_find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, vqs,
  34. callbacks, names, NULL);
  35. if (err)
  36. return err;
  37. vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
  38. vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
  39. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
  40. struct scatterlist sg;
  41. unsigned int num_stats;
  42. vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
  43. /*
  44. * Prime this virtqueue with one buffer so the hypervisor can
  45. * use it to signal us later (it can't be broken yet!).
  46. */
  47. num_stats = update_balloon_stats(vb);
  48. sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
  49. err = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb,
  50. GFP_KERNEL);
  51. if (err) {
  52. dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n",
  53. __func__);
  54. return err;
  55. }
  56. virtqueue_kick(vb->stats_vq);
  57. }
  58. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
  59. vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
  60. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
  61. vb->reporting_vq = vqs[VIRTIO_BALLOON_VQ_REPORTING];
  62. return 0;
  63. }

(1)init_vqs函数首先初始化callbacks指针数组和names指针数组 。代码片段如下:

  1. struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
  2. vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
  3. const char *names[VIRTIO_BALLOON_VQ_MAX];
  4. int err;
  5. /*
  6. * Inflateq and deflateq are used unconditionally. The names[]
  7. * will be NULL if the related feature is not enabled, which will
  8. * cause no allocation for the corresponding virtqueue in find_vqs.
  9. */
  10. callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
  11. names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
  12. callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
  13. names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
  14. callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL;
  15. names[VIRTIO_BALLOON_VQ_STATS] = NULL;
  16. callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
  17. names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
  18. names[VIRTIO_BALLOON_VQ_REPORTING] = NULL;

先来看一下VIRTIO_BALLOON_VQ_STATS的定义,同样Linux内核源码/drivers/virtio/virtio_balloon.c中,如下:

  1. enum virtio_balloon_vq {
  2. VIRTIO_BALLOON_VQ_INFLATE,
  3. VIRTIO_BALLOON_VQ_DEFLATE,
  4. VIRTIO_BALLOON_VQ_STATS,
  5. VIRTIO_BALLOON_VQ_FREE_PAGE,
  6. VIRTIO_BALLOON_VQ_REPORTING,
  7. VIRTIO_BALLOON_VQ_MAX
  8. };

从定义中就能看出,VIRTIO_BALLOON_VQ_INFLATE的值为0,VIRTIO_BALLOON_VQ_DEFLATE的值为1,VIRTIO_BALLOON_VQ_STATS的值为2,VIRTIO_BALLOON_VQ_FREE_PAGE的值为3,VIRTIO_BALLOON_VQ_REPORTING的值为4,VIRTIO_BALLOON_VQ_MAX的值为5。

根据代码注释,indlateq和deflateq是无条件使用的。而如果相关feature未被使能,则相应的names[i]将为NULL,这将导致在find_vqs函数中不分配相应的virtqueue。

由注释就能够理解后边的代码了。由于indlateq和deflateq是无条件使用的,因此它们所对应的names[VIRTIO_BALLOON_VQ_INFLATE](即names[0])和names[VIRTIO_BALLOON_VQ_DEFLATE](即names[1])在一开始时就不是NULL,而分别是"inflate"和"deflate"。同时,各自的callback也均被设置为balloon_ack。

(2)接下来就是判断余下的VIRTIO_BALLOON_F_STATS_VQ、VIRTIO_BALLOON_VQ_FREE_PAGE、VIRTIO_BALLOON_VQ_REPORTING 特性是否存在 。代码片段如下:

  1. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
  2. names[VIRTIO_BALLOON_VQ_STATS] = "stats";
  3. callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
  4. }
  5. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
  6. names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
  7. callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
  8. }
  9. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
  10. names[VIRTIO_BALLOON_VQ_REPORTING] = "reporting_vq";
  11. callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
  12. }

按照上边所讲的,这三项都不是无条件使能的feature,因此在最开始初始化时都默认为不使能,将对应的names[i]和callbacks[i]都设置为了NULL。然后在这里对于相应特性是否使能进行判断,哪项使能了,就设置其names[i]为相应值,当然callbacks[i]也一样。

virtio_has_feature函数在Linux内核源码/include/linux/virtio_config.h中,代码如下:

  1. /**
  2. * virtio_has_feature - helper to determine if this device has this feature.
  3. * @vdev: the device
  4. * @fbit: the feature bit
  5. */
  6. static inline bool virtio_has_feature(const struct virtio_device *vdev,
  7. unsigned int fbit)
  8. {
  9. if (fbit < VIRTIO_TRANSPORT_F_START)
  10. virtio_check_driver_offered_feature(vdev, fbit);
  11. return __virtio_test_bit(vdev, fbit);
  12. }

VIRTIO_TRANSPORT_F_START宏的定义在Linux内核源码/include/uapi/linux/virtio_config.h中,如下:

  1. /*
  2. * Virtio feature bits VIRTIO_TRANSPORT_F_START through
  3. * VIRTIO_TRANSPORT_F_END are reserved for the transport
  4. * being used (e.g. virtio_ring, virtio_pci etc.), the
  5. * rest are per-device feature bits.
  6. */
  7. #define VIRTIO_TRANSPORT_F_START 28
  8. #define VIRTIO_TRANSPORT_F_END 41

在这里,传给virtio_has_feature函数的参数fbit所对应的实参分别是VIRTIO_BALLOON_VQ_STATS(2)、VIRTIO_BALLOON_VQ_FREE_PAGE(3)、VIRTIO_BALLOON_VQ_REPORTING(4),都小于VIRTIO_TRANSPORT_F_START(28)。所以,都要经过virtio_check_driver_offered_feature函数。virtio_check_driver_offered_feature函数在Linux内核源码/drivers/virtio/virtio.c中,代码如下:

  1. void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
  2. unsigned int fbit)
  3. {
  4. unsigned int i;
  5. struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
  6. for (i = 0; i < drv->feature_table_size; i++)
  7. if (drv->feature_table[i] == fbit)
  8. return;
  9. if (drv->feature_table_legacy) {
  10. for (i = 0; i < drv->feature_table_size_legacy; i++)
  11. if (drv->feature_table_legacy[i] == fbit)
  12. return;
  13. }
  14. BUG();
  15. }
  16. EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);

virtio_check_driver_offered_feature函数检查vdev(struct virtio_device类型)所对应的drv(struct virtio_driver类型)的feature_table[]中是否有对应项,如果有,相安无事;如果没有,则报BUG()。

经过virtio_check_driver_offered_feature函数的检查后,virtio_has_feature函数调用__virtio_test_bit函数并返回了。__virtio_test_bit函数也在Linux内核源码/include/linux/virtio_config.h中,代码如下:

  1. /**
  2. * __virtio_test_bit - helper to test feature bits. For use by transports.
  3. * Devices should normally use virtio_has_feature,
  4. * which includes more checks.
  5. * @vdev: the device
  6. * @fbit: the feature bit
  7. */
  8. static inline bool __virtio_test_bit(const struct virtio_device *vdev,
  9. unsigned int fbit)
  10. {
  11. /* Did you forget to fix assumptions on max features? */
  12. if (__builtin_constant_p(fbit))
  13. BUILD_BUG_ON(fbit >= 64);
  14. else
  15. BUG_ON(fbit >= 64);
  16. return vdev->features & BIT_ULL(fbit);
  17. }

这个函数比较好理解,就是检查vdev->feature中相应的bit位是否置1,如果置为了1,则说明该feature已经使能,返回真(True);否则说明该feature未使能,返回假(False)。

回到init_vqs函数。在经过了对于各feature的初始化、检查和设置后,接下来init_vqs函数调用了virtio_find_vqs函数,对于该函数的解析,请看下回。

举报

选择你想要举报的内容(必选)
  • 内容涉黄
  • 政治相关
  • 内容抄袭
  • 涉嫌广告
  • 内容侵权
  • 侮辱谩骂
  • 样式问题
  • 其他