接前一篇文章: QEMU源码全解析 —— virtio(20)
从开始 QEMU源码全解析 —— virtio(17) 到 QEMU源码全解析 —— virtio(20) ,用4篇文章讲解了 virtio 驱动的加载过程,本回来回顾和复习一下。
一般来讲,virtio驱动初始化一个设备的过程如下:
(1)重置设备
这是在上一回所讲的register_virtio_device函数中通过dev->config->reset调用完成的。这里要注意,老版本中调用的是dev->config->reset(),而新版本中直接使用virtio_reset_device函数。该函数在 Linux 内核源码/drivers/virtio/virtio.c中,代码如下:
- /**
- * virtio_reset_device - quiesce device for removal
- * @dev: the device to reset
- *
- * Prevents device from sending interrupts and accessing memory.
- *
- * Generally used for cleanup during driver / device removal.
- *
- * Once this has been invoked, caller must ensure that
- * virtqueue_notify / virtqueue_kick are not in progress.
- *
- * Note: this guarantees that vq callbacks are not in progress, however caller
- * is responsible for preventing access from other contexts, such as a system
- * call/workqueue/bh. Invoking virtio_break_device then flushing any such
- * contexts is one way to handle that.
- * */
- void virtio_reset_device(struct virtio_device *dev)
- {
- #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
- /*
- * The below virtio_synchronize_cbs() guarantees that any
- * interrupt for this line arriving after
- * virtio_synchronize_vqs() has completed is guaranteed to see
- * vq->broken as true.
- */
- virtio_break_device(dev);
- virtio_synchronize_cbs(dev);
- #endif
-
- dev->config->reset(dev);
- }
- EXPORT_SYMBOL_GPL(virtio_reset_device);
可以看到,实际上virtio_reset_device函数可以说是了dev->config->reset()的简单封装,只是多了一些额外的配置选项及该选项下的相关处理。
(2)设置ACKNOWLEDGE状态位
设置ACKNOWLEDGE状态位,表示virtio驱动已经知道了该设备。这同样是在register_virtio_device函数中由virtio_add_status()函数完成的,代码片段如下:
- /* We always start by resetting the device, in case a previous
- * driver messed it up. This also tests that code path a little. */
- virtio_reset_device(dev);
-
- /* Acknowledge that we've seen the device. */
- virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
virtio_add_status函数也在Linux内核源码/drivers/virtio/virtio.c中,代码如下:
- void virtio_add_status(struct virtio_device *dev, unsigned int status)
- {
- might_sleep();
- dev->config->set_status(dev, dev->config->get_status(dev) | status);
- }
- EXPORT_SYMBOL_GPL(virtio_add_status);
(3)设置DRIVER状态位
设置DRIVER状态位,表示virtio驱动知道怎样驱动该设备。这是在virtio总线的probe函数virtio_dev_probe中,通过virtio_add_status函数完成的。virtio_dev_probe函数在Linux内核源码/drivers/virtio/virtio.c中,代码如下:
- static struct bus_type virtio_bus = {
- .name = "virtio",
- .match = virtio_dev_match,
- .dev_groups = virtio_dev_groups,
- .uevent = virtio_uevent,
- .probe = virtio_dev_probe,
- .remove = virtio_dev_remove,
- };
-
- int register_virtio_driver(struct virtio_driver *driver)
- {
- /* Catch this early. */
- BUG_ON(driver->feature_table_size && !driver->feature_table);
- driver->driver.bus = &virtio_bus;
- return driver_register(&driver->driver);
- }
- EXPORT_SYMBOL_GPL(register_virtio_driver);
- static int virtio_dev_probe(struct device *_d)
- {
- int err, i;
- struct virtio_device *dev = dev_to_virtio(_d);
- struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
- u64 device_features;
- u64 driver_features;
- u64 driver_features_legacy;
-
- /* We have a driver! */
- virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
-
- /* Figure out what features the device supports. */
- device_features = dev->config->get_features(dev);
-
- /* Figure out what features the driver supports. */
- driver_features = 0;
- for (i = 0; i < drv->feature_table_size; i++) {
- unsigned int f = drv->feature_table[i];
- BUG_ON(f >= 64);
- driver_features |= (1ULL << f);
- }
-
- /* Some drivers have a separate feature table for virtio v1.0 */
- if (drv->feature_table_legacy) {
- driver_features_legacy = 0;
- for (i = 0; i < drv->feature_table_size_legacy; i++) {
- unsigned int f = drv->feature_table_legacy[i];
- BUG_ON(f >= 64);
- driver_features_legacy |= (1ULL << f);
- }
- } else {
- driver_features_legacy = driver_features;
- }
-
- if (device_features & (1ULL << VIRTIO_F_VERSION_1))
- dev->features = driver_features & device_features;
- else
- dev->features = driver_features_legacy & device_features;
-
- /* Transport features always preserved to pass to finalize_features. */
- for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
- if (device_features & (1ULL << i))
- __virtio_set_bit(dev, i);
-
- err = dev->config->finalize_features(dev);
- if (err)
- goto err;
-
- if (drv->validate) {
- u64 features = dev->features;
-
- err = drv->validate(dev);
- if (err)
- goto err;
-
- /* Did validation change any features? Then write them again. */
- if (features != dev->features) {
- err = dev->config->finalize_features(dev);
- if (err)
- goto err;
- }
- }
-
- err = virtio_features_ok(dev);
- if (err)
- goto err;
-
- err = drv->probe(dev);
- if (err)
- goto err;
-
- /* If probe didn't do it, mark device DRIVER_OK ourselves. */
- if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
- virtio_device_ready(dev);
-
- if (drv->scan)
- drv->scan(dev);
-
- virtio_config_enable(dev);
-
- return 0;
- err:
- virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
- return err;
-
- }
关键代码片段为:
- /* We have a driver! */
- virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
(4)读取virtio设备的feature位,求出驱动设置的feature
读取virtio设备的feature位,求出驱动设置的feature。这也是在上边的virtio_dev_probe函数中完成的,代码片段如下:
- /* Figure out what features the device supports. */
- device_features = dev->config->get_features(dev);
-
- /* Figure out what features the driver supports. */
- driver_features = 0;
- for (i = 0; i < drv->feature_table_size; i++) {
- unsigned int f = drv->feature_table[i];
- BUG_ON(f >= 64);
- driver_features |= (1ULL << f);
- }
-
- /* Some drivers have a separate feature table for virtio v1.0 */
- if (drv->feature_table_legacy) {
- driver_features_legacy = 0;
- for (i = 0; i < drv->feature_table_size_legacy; i++) {
- unsigned int f = drv->feature_table_legacy[i];
- BUG_ON(f >= 64);
- driver_features_legacy |= (1ULL << f);
- }
- } else {
- driver_features_legacy = driver_features;
- }
(5)将两者(virtio设备的feature和驱动设置的feature)计算子集
将两者(virtio设备的feature和驱动设置的feature)计算子集。这也是在上边的virtio_dev_probe函数中完成的,代码片段如下:
- if (device_features & (1ULL << VIRTIO_F_VERSION_1))
- dev->features = driver_features & device_features;
- else
- dev->features = driver_features_legacy & device_features;
(6)向设备写入此子集特性
计算virtio设备的feature和驱动设置的feature的子集后,向设备写入这个子集特性。这也是在上边的virtio_dev_probe函数中完成的,代码片段如下:
- /* Transport features always preserved to pass to finalize_features. */
- for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
- if (device_features & (1ULL << i))
- __virtio_set_bit(dev, i);
-
- err = dev->config->finalize_features(dev);
- if (err)
- goto err;
计算driver_features和device_features,然后调用virtio_finalize_features。
(7)设置FEATURES_OK特性位
设置FEATURES_OK特性位,在此之后,virtio驱动就不会再接收新的特性了。这也是在上边的virtio_dev_probe函数中完成的,代码片段如下:
- err = virtio_features_ok(dev);
- if (err)
- goto err;
这一步是在virtio_features_ok函数中通过调用virtio_add_status函数完成的。virtio_features_ok函数也在Linux内核源码/drivers/virtio/virtio.c中,代码如下:
- /* Do some validation, then set FEATURES_OK */
- static int virtio_features_ok(struct virtio_device *dev)
- {
- unsigned int status;
-
- might_sleep();
-
- if (virtio_check_mem_acc_cb(dev)) {
- if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1)) {
- dev_warn(&dev->dev,
- "device must provide VIRTIO_F_VERSION_1\n");
- return -ENODEV;
- }
-
- if (!virtio_has_feature(dev, VIRTIO_F_ACCESS_PLATFORM)) {
- dev_warn(&dev->dev,
- "device must provide VIRTIO_F_ACCESS_PLATFORM\n");
- return -ENODEV;
- }
- }
-
- if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1))
- return 0;
-
- virtio_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
- status = dev->config->get_status(dev);
- if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
- dev_err(&dev->dev, "virtio: device refuses features: %x\n",
- status);
- return -ENODEV;
- }
- return 0;
- }
上边提到的关键代码片段为:
virtio_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
(8)重新读取设备的feature位
重新读取设备的feature位,确保设置了VIRTIO_CONFIG_S_FEATURES_OK。代码片段如下:
- status = dev->config->get_status(dev);
- if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
- dev_err(&dev->dev, "virtio: device refuses features: %x\n",
- status);
- return -ENODEV;
- }
如果并没有设置成功,则设备不支持virtio驱动设置的一些状态,表示设备不可用。这同样是在virtio_features_ok函数中完成的。
(9)执行设备相关的初始化操作
执行设备相关的 初始化操作 ,包括发现设备的virtqueue、读写virtio设备的配置空间等。这些都是在virtio_dev_probe函数中通过调用驱动的probe函数(即drv->probe(dev))完成的。代码片段如下:
- err = drv->probe(dev);
- if (err)
- goto err;
对于virtio balloon来说是virtballoon_probe函数。该函数在Linux内核源码/drivers/virtio/virtio_balloon.c中,代码如下:
- static int virtballoon_probe(struct virtio_device *vdev)
- {
- struct virtio_balloon *vb;
- int err;
-
- if (!vdev->config->get) {
- dev_err(&vdev->dev, "%s failure: config access disabled\n",
- __func__);
- return -EINVAL;
- }
-
- vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
- if (!vb) {
- err = -ENOMEM;
- goto out;
- }
-
- INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
- INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
- spin_lock_init(&vb->stop_update_lock);
- mutex_init(&vb->balloon_lock);
- init_waitqueue_head(&vb->acked);
- vb->vdev = vdev;
-
- balloon_devinfo_init(&vb->vb_dev_info);
-
- err = init_vqs(vb);
- if (err)
- goto out_free_vb;
-
- #ifdef CONFIG_BALLOON_COMPACTION
- vb->vb_dev_info.migratepage = virtballoon_migratepage;
- #endif
- if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
- /*
- * There is always one entry reserved for cmd id, so the ring
- * size needs to be at least two to report free page hints.
- */
- if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
- err = -ENOSPC;
- goto out_del_vqs;
- }
- vb->balloon_wq = alloc_workqueue("balloon-wq",
- WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
- if (!vb->balloon_wq) {
- err = -ENOMEM;
- goto out_del_vqs;
- }
- INIT_WORK(&vb->report_free_page_work, report_free_page_func);
- vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP;
- vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
- VIRTIO_BALLOON_CMD_ID_STOP);
- vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
- VIRTIO_BALLOON_CMD_ID_STOP);
- spin_lock_init(&vb->free_page_list_lock);
- INIT_LIST_HEAD(&vb->free_page_list);
- /*
- * We're allowed to reuse any free pages, even if they are
- * still to be processed by the host.
- */
- err = virtio_balloon_register_shrinker(vb);
- if (err)
- goto out_del_balloon_wq;
- }
-
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
- vb->oom_nb.notifier_call = virtio_balloon_oom_notify;
- vb->oom_nb.priority = VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY;
- err = register_oom_notifier(&vb->oom_nb);
- if (err < 0)
- goto out_unregister_shrinker;
- }
-
- if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
- /* Start with poison val of 0 representing general init */
- __u32 poison_val = 0;
-
- /*
- * Let the hypervisor know that we are expecting a
- * specific value to be written back in balloon pages.
- *
- * If the PAGE_POISON value was larger than a byte we would
- * need to byte swap poison_val here to guarantee it is
- * little-endian. However for now it is a single byte so we
- * can pass it as-is.
- */
- if (!want_init_on_free())
- memset(&poison_val, PAGE_POISON, sizeof(poison_val));
-
- virtio_cwrite_le(vb->vdev, struct virtio_balloon_config,
- poison_val, &poison_val);
- }
-
- vb->pr_dev_info.report = virtballoon_free_page_report;
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
- unsigned int capacity;
-
- capacity = virtqueue_get_vring_size(vb->reporting_vq);
- if (capacity < PAGE_REPORTING_CAPACITY) {
- err = -ENOSPC;
- goto out_unregister_oom;
- }
-
- /*
- * The default page reporting order is @pageblock_order, which
- * corresponds to 512MB in size on ARM64 when 64KB base page
- * size is used. The page reporting won't be triggered if the
- * freeing page can't come up with a free area like that huge.
- * So we specify the page reporting order to 5, corresponding
- * to 2MB. It helps to avoid THP splitting if 4KB base page
- * size is used by host.
- *
- * Ideally, the page reporting order is selected based on the
- * host's base page size. However, it needs more work to report
- * that value. The hard-coded order would be fine currently.
- */
- #if defined(CONFIG_ARM64) && defined(CONFIG_ARM64_64K_PAGES)
- vb->pr_dev_info.order = 5;
- #endif
-
- err = page_reporting_register(&vb->pr_dev_info);
- if (err)
- goto out_unregister_oom;
- }
-
- virtio_device_ready(vdev);
-
- if (towards_target(vb))
- virtballoon_changed(vdev);
- return 0;
-
- out_unregister_oom:
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
- unregister_oom_notifier(&vb->oom_nb);
- out_unregister_shrinker:
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
- virtio_balloon_unregister_shrinker(vb);
- out_del_balloon_wq:
- if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
- destroy_workqueue(vb->balloon_wq);
- out_del_vqs:
- vdev->config->del_vqs(vdev);
- out_free_vb:
- kfree(vb);
- out:
- return err;
- }
(10)设置DRIVER_OK状态位
设置DRIVER_OK状态位,这通常是在具体设备驱动的probe函数中,通过调用virtio_device_ready函数完成的。对于virtio ballon设备来说,就是上边的virtballoon_probe函数。代码片段如下:
virtio_device_ready(vdev);
virtio_device_ready函数在Linux内核源码/include/linux/virtio_config.h中,代码如下:
- /**
- * virtio_device_ready - enable vq use in probe function
- * @dev: the virtio device
- *
- * Driver must call this to use vqs in the probe function.
- *
- * Note: vqs are enabled automatically after probe returns.
- */
- static inline
- void virtio_device_ready(struct virtio_device *dev)
- {
- unsigned status = dev->config->get_status(dev);
-
- WARN_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
-
- #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
- /*
- * The virtio_synchronize_cbs() makes sure vring_interrupt()
- * will see the driver specific setup if it sees vq->broken
- * as false (even if the notifications come before DRIVER_OK).
- */
- virtio_synchronize_cbs(dev);
- __virtio_unbreak_device(dev);
- #endif
- /*
- * The transport should ensure the visibility of vq->broken
- * before setting DRIVER_OK. See the comments for the transport
- * specific set_status() method.
- *
- * A well behaved device will only notify a virtqueue after
- * DRIVER_OK, this means the device should "see" the coherenct
- * memory write that set vq->broken as false which is done by
- * the driver when it sees DRIVER_OK, then the following
- * driver's vring_interrupt() will see vq->broken as false so
- * we won't lose any notification.
- */
- dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
- }
如果设备驱动没有设置DRIVER_OK状态位,则会由总线的probe函数virtio_dev_probe函数来设置。这也是在上边的virtio_dev_probe函数中完成的,代码片段如下:
- /* If probe didn't do it, mark device DRIVER_OK ourselves. */
- if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
- virtio_device_ready(dev);
至此,virtio驱动的加载就基本讲解完了。下一回开始讲解virtio驱动的初始化。