接前一篇文章:
本回开始以 virtio balloon设备的初始化过程为例,分析virtio设备的初始化过程,即前文书( QEMU源码全解析 —— virtio(21) )所讲到的virtio驱动初始化设备的过程中的“(9)执行设备相关的 初始化操作 ”一步。
下边就以virtio balloon设备的初始化过程为例,分析virtio设备的初始化过程。再次贴出virtioballoon_probe函数的源码, Linux 内核源码/drivers/virtio/virtio_balloon.c中,如下:
- static int virtballoon_probe(struct virtio_device *vdev)
- {
- struct virtio_balloon *vb;
- int err;
-
- if (!vdev->config->get) {
- dev_err(&vdev->dev, "%s failure: config access disabled\n",
- __func__);
- return -EINVAL;
- }
-
- vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
- if (!vb) {
- err = -ENOMEM;
- goto out;
- }
-
- INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
- INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
- spin_lock_init(&vb->stop_update_lock);
- mutex_init(&vb->balloon_lock);
- init_waitqueue_head(&vb->acked);
- vb->vdev = vdev;
-
- balloon_devinfo_init(&vb->vb_dev_info);
-
- err = init_vqs(vb);
- if (err)
- goto out_free_vb;
-
- #ifdef CONFIG_BALLOON_COMPACTION
- vb->vb_dev_info.migratepage = virtballoon_migratepage;
- #endif
- if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
- /*
- * There is always one entry reserved for cmd id, so the ring
- * size needs to be at least two to report free page hints.
- */
- if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
- err = -ENOSPC;
- goto out_del_vqs;
- }
- vb->balloon_wq = alloc_workqueue("balloon-wq",
- WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
- if (!vb->balloon_wq) {
- err = -ENOMEM;
- goto out_del_vqs;
- }
- INIT_WORK(&vb->report_free_page_work, report_free_page_func);
- vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP;
- vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
- VIRTIO_BALLOON_CMD_ID_STOP);
- vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
- VIRTIO_BALLOON_CMD_ID_STOP);
- spin_lock_init(&vb->free_page_list_lock);
- INIT_LIST_HEAD(&vb->free_page_list);
- /*
- * We're allowed to reuse any free pages, even if they are
- * still to be processed by the host.
- */
- err = virtio_balloon_register_shrinker(vb);
- if (err)
- goto out_del_balloon_wq;
- }
-
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
- vb->oom_nb.notifier_call = virtio_balloon_oom_notify;
- vb->oom_nb.priority = VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY;
- err = register_oom_notifier(&vb->oom_nb);
- if (err < 0)
- goto out_unregister_shrinker;
- }
-
- if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
- /* Start with poison val of 0 representing general init */
- __u32 poison_val = 0;
-
- /*
- * Let the hypervisor know that we are expecting a
- * specific value to be written back in balloon pages.
- *
- * If the PAGE_POISON value was larger than a byte we would
- * need to byte swap poison_val here to guarantee it is
- * little-endian. However for now it is a single byte so we
- * can pass it as-is.
- */
- if (!want_init_on_free())
- memset(&poison_val, PAGE_POISON, sizeof(poison_val));
-
- virtio_cwrite_le(vb->vdev, struct virtio_balloon_config,
- poison_val, &poison_val);
- }
-
- vb->pr_dev_info.report = virtballoon_free_page_report;
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
- unsigned int capacity;
-
- capacity = virtqueue_get_vring_size(vb->reporting_vq);
- if (capacity < PAGE_REPORTING_CAPACITY) {
- err = -ENOSPC;
- goto out_unregister_oom;
- }
-
- /*
- * The default page reporting order is @pageblock_order, which
- * corresponds to 512MB in size on ARM64 when 64KB base page
- * size is used. The page reporting won't be triggered if the
- * freeing page can't come up with a free area like that huge.
- * So we specify the page reporting order to 5, corresponding
- * to 2MB. It helps to avoid THP splitting if 4KB base page
- * size is used by host.
- *
- * Ideally, the page reporting order is selected based on the
- * host's base page size. However, it needs more work to report
- * that value. The hard-coded order would be fine currently.
- */
- #if defined(CONFIG_ARM64) && defined(CONFIG_ARM64_64K_PAGES)
- vb->pr_dev_info.order = 5;
- #endif
-
- err = page_reporting_register(&vb->pr_dev_info);
- if (err)
- goto out_unregister_oom;
- }
-
- virtio_device_ready(vdev);
-
- if (towards_target(vb))
- virtballoon_changed(vdev);
- return 0;
-
- out_unregister_oom:
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
- unregister_oom_notifier(&vb->oom_nb);
- out_unregister_shrinker:
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
- virtio_balloon_unregister_shrinker(vb);
- out_del_balloon_wq:
- if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
- destroy_workqueue(vb->balloon_wq);
- out_del_vqs:
- vdev->config->del_vqs(vdev);
- out_free_vb:
- kfree(vb);
- out:
- return err;
- }
virtio balloon设备用virtio_balloon结构表示,该结构在Linux内核源码/drivers/virtio/virtio_balloon.c中,定义如下:
- struct virtio_balloon {
- struct virtio_device *vdev;
- struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
-
- /* Balloon's own wq for cpu-intensive work items */
- struct workqueue_struct *balloon_wq;
- /* The free page reporting work item submitted to the balloon wq */
- struct work_struct report_free_page_work;
-
- /* The balloon servicing is delegated to a freezable workqueue. */
- struct work_struct update_balloon_stats_work;
- struct work_struct update_balloon_size_work;
-
- /* Prevent updating balloon when it is being canceled. */
- spinlock_t stop_update_lock;
- bool stop_update;
- /* Bitmap to indicate if reading the related config fields are needed */
- unsigned long config_read_bitmap;
-
- /* The list of allocated free pages, waiting to be given back to mm */
- struct list_head free_page_list;
- spinlock_t free_page_list_lock;
- /* The number of free page blocks on the above list */
- unsigned long num_free_page_blocks;
- /*
- * The cmd id received from host.
- * Read it via virtio_balloon_cmd_id_received to get the latest value
- * sent from host.
- */
- u32 cmd_id_received_cache;
- /* The cmd id that is actively in use */
- __virtio32 cmd_id_active;
- /* Buffer to store the stop sign */
- __virtio32 cmd_id_stop;
-
- /* Waiting for host to ack the pages we released. */
- wait_queue_head_t acked;
-
- /* Number of balloon pages we've told the Host we're not using. */
- unsigned int num_pages;
- /*
- * The pages we've told the Host we're not using are enqueued
- * at vb_dev_info->pages list.
- * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
- * to num_pages above.
- */
- struct balloon_dev_info vb_dev_info;
-
- /* Synchronize access/update to this struct virtio_balloon elements */
- struct mutex balloon_lock;
-
- /* The array of pfns we tell the Host about. */
- unsigned int num_pfns;
- __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
-
- /* Memory statistics */
- struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
-
- /* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */
- struct shrinker *shrinker;
-
- /* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */
- struct notifier_block oom_nb;
-
- /* Free page reporting device */
- struct virtqueue *reporting_vq;
- struct page_reporting_dev_info pr_dev_info;
- };
virtio_balloon结构(struct virtio_balloon)中存放了与该virtio balloon设备密切相关的数据成员。
(1)virtioballoon_probe函数首先分配了一个virtio_balloon结构对象,并赋值给vb,并且virio_device的priv也会保存该结构对象的地址 。代码片段如下:
- struct virtio_balloon *vb;
- ……
- vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
- if (!vb) {
- err = -ENOMEM;
- goto out;
- }
(2)接着对分配的virtio_balloon对象的成员进行初始化 。这基本上就是该函数余下的代码了。其中有两个重要函数:init_vqs()和virtio_device_ready()。
virtio_device_ready函数同样在 QEMU源码全解析 —— virtio(21) 中已经讲过了,如下所示:
前一个函数init_vqs用于初始化virtqueue和vring,virtio驱动与virtio设备通过virtqueue进行数据通信。下边详细解析init_vqs函数。
init_vqs函数也在Linux内核源码/drivers/virtio/virtio_balloon.c中,代码如下:
- static int init_vqs(struct virtio_balloon *vb)
- {
- struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
- vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
- const char *names[VIRTIO_BALLOON_VQ_MAX];
- int err;
-
- /*
- * Inflateq and deflateq are used unconditionally. The names[]
- * will be NULL if the related feature is not enabled, which will
- * cause no allocation for the corresponding virtqueue in find_vqs.
- */
- callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
- names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
- callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
- names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
- callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL;
- names[VIRTIO_BALLOON_VQ_STATS] = NULL;
- callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
- names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
- names[VIRTIO_BALLOON_VQ_REPORTING] = NULL;
-
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
- names[VIRTIO_BALLOON_VQ_STATS] = "stats";
- callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
- }
-
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
- names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
- callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
- }
-
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
- names[VIRTIO_BALLOON_VQ_REPORTING] = "reporting_vq";
- callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
- }
-
- err = virtio_find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, vqs,
- callbacks, names, NULL);
- if (err)
- return err;
-
- vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
- vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
- struct scatterlist sg;
- unsigned int num_stats;
- vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
-
- /*
- * Prime this virtqueue with one buffer so the hypervisor can
- * use it to signal us later (it can't be broken yet!).
- */
- num_stats = update_balloon_stats(vb);
-
- sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
- err = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb,
- GFP_KERNEL);
- if (err) {
- dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n",
- __func__);
- return err;
- }
- virtqueue_kick(vb->stats_vq);
- }
-
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
- vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
-
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
- vb->reporting_vq = vqs[VIRTIO_BALLOON_VQ_REPORTING];
-
- return 0;
- }
(1)init_vqs函数首先初始化callbacks指针数组和names指针数组 。代码片段如下:
- struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
- vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
- const char *names[VIRTIO_BALLOON_VQ_MAX];
- int err;
-
- /*
- * Inflateq and deflateq are used unconditionally. The names[]
- * will be NULL if the related feature is not enabled, which will
- * cause no allocation for the corresponding virtqueue in find_vqs.
- */
- callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
- names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
- callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
- names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
- callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL;
- names[VIRTIO_BALLOON_VQ_STATS] = NULL;
- callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
- names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
- names[VIRTIO_BALLOON_VQ_REPORTING] = NULL;
先来看一下VIRTIO_BALLOON_VQ_STATS的定义,同样Linux内核源码/drivers/virtio/virtio_balloon.c中,如下:
- enum virtio_balloon_vq {
- VIRTIO_BALLOON_VQ_INFLATE,
- VIRTIO_BALLOON_VQ_DEFLATE,
- VIRTIO_BALLOON_VQ_STATS,
- VIRTIO_BALLOON_VQ_FREE_PAGE,
- VIRTIO_BALLOON_VQ_REPORTING,
- VIRTIO_BALLOON_VQ_MAX
- };
从定义中就能看出,VIRTIO_BALLOON_VQ_INFLATE的值为0,VIRTIO_BALLOON_VQ_DEFLATE的值为1,VIRTIO_BALLOON_VQ_STATS的值为2,VIRTIO_BALLOON_VQ_FREE_PAGE的值为3,VIRTIO_BALLOON_VQ_REPORTING的值为4,VIRTIO_BALLOON_VQ_MAX的值为5。
根据代码注释,indlateq和deflateq是无条件使用的。而如果相关feature未被使能,则相应的names[i]将为NULL,这将导致在find_vqs函数中不分配相应的virtqueue。
由注释就能够理解后边的代码了。由于indlateq和deflateq是无条件使用的,因此它们所对应的names[VIRTIO_BALLOON_VQ_INFLATE](即names[0])和names[VIRTIO_BALLOON_VQ_DEFLATE](即names[1])在一开始时就不是NULL,而分别是"inflate"和"deflate"。同时,各自的callback也均被设置为balloon_ack。
(2)接下来就是判断余下的VIRTIO_BALLOON_F_STATS_VQ、VIRTIO_BALLOON_VQ_FREE_PAGE、VIRTIO_BALLOON_VQ_REPORTING 特性是否存在 。代码片段如下:
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
- names[VIRTIO_BALLOON_VQ_STATS] = "stats";
- callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
- }
-
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
- names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
- callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
- }
-
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
- names[VIRTIO_BALLOON_VQ_REPORTING] = "reporting_vq";
- callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
- }
按照上边所讲的,这三项都不是无条件使能的feature,因此在最开始初始化时都默认为不使能,将对应的names[i]和callbacks[i]都设置为了NULL。然后在这里对于相应特性是否使能进行判断,哪项使能了,就设置其names[i]为相应值,当然callbacks[i]也一样。
virtio_has_feature函数在Linux内核源码/include/linux/virtio_config.h中,代码如下:
- /**
- * virtio_has_feature - helper to determine if this device has this feature.
- * @vdev: the device
- * @fbit: the feature bit
- */
- static inline bool virtio_has_feature(const struct virtio_device *vdev,
- unsigned int fbit)
- {
- if (fbit < VIRTIO_TRANSPORT_F_START)
- virtio_check_driver_offered_feature(vdev, fbit);
-
- return __virtio_test_bit(vdev, fbit);
- }
VIRTIO_TRANSPORT_F_START宏的定义在Linux内核源码/include/uapi/linux/virtio_config.h中,如下:
- /*
- * Virtio feature bits VIRTIO_TRANSPORT_F_START through
- * VIRTIO_TRANSPORT_F_END are reserved for the transport
- * being used (e.g. virtio_ring, virtio_pci etc.), the
- * rest are per-device feature bits.
- */
- #define VIRTIO_TRANSPORT_F_START 28
- #define VIRTIO_TRANSPORT_F_END 41
在这里,传给virtio_has_feature函数的参数fbit所对应的实参分别是VIRTIO_BALLOON_VQ_STATS(2)、VIRTIO_BALLOON_VQ_FREE_PAGE(3)、VIRTIO_BALLOON_VQ_REPORTING(4),都小于VIRTIO_TRANSPORT_F_START(28)。所以,都要经过virtio_check_driver_offered_feature函数。virtio_check_driver_offered_feature函数在Linux内核源码/drivers/virtio/virtio.c中,代码如下:
- void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
- unsigned int fbit)
- {
- unsigned int i;
- struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
-
- for (i = 0; i < drv->feature_table_size; i++)
- if (drv->feature_table[i] == fbit)
- return;
-
- if (drv->feature_table_legacy) {
- for (i = 0; i < drv->feature_table_size_legacy; i++)
- if (drv->feature_table_legacy[i] == fbit)
- return;
- }
-
- BUG();
- }
- EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);
virtio_check_driver_offered_feature函数检查vdev(struct virtio_device类型)所对应的drv(struct virtio_driver类型)的feature_table[]中是否有对应项,如果有,相安无事;如果没有,则报BUG()。
经过virtio_check_driver_offered_feature函数的检查后,virtio_has_feature函数调用__virtio_test_bit函数并返回了。__virtio_test_bit函数也在Linux内核源码/include/linux/virtio_config.h中,代码如下:
- /**
- * __virtio_test_bit - helper to test feature bits. For use by transports.
- * Devices should normally use virtio_has_feature,
- * which includes more checks.
- * @vdev: the device
- * @fbit: the feature bit
- */
- static inline bool __virtio_test_bit(const struct virtio_device *vdev,
- unsigned int fbit)
- {
- /* Did you forget to fix assumptions on max features? */
- if (__builtin_constant_p(fbit))
- BUILD_BUG_ON(fbit >= 64);
- else
- BUG_ON(fbit >= 64);
-
- return vdev->features & BIT_ULL(fbit);
- }
这个函数比较好理解,就是检查vdev->feature中相应的bit位是否置1,如果置为了1,则说明该feature已经使能,返回真(True);否则说明该feature未使能,返回假(False)。
回到init_vqs函数。在经过了对于各feature的初始化、检查和设置后,接下来init_vqs函数调用了virtio_find_vqs函数,对于该函数的解析,请看下回。