接前一篇文章:
上一回讲到了vp_find_vqs_ms ix函数 的第5步:进入循环,设置每个使能的vq并申请中断。
再次贴出代码循环的代码片段:
- for (i = 0; i < nvqs; ++i) {
- if (!names[i]) {
- vqs[i] = NULL;
- continue;
- }
-
- if (!callbacks[i])
- msix_vec = VIRTIO_MSI_NO_VECTOR;
- else if (vp_dev->per_vq_vectors)
- msix_vec = allocated_vectors++;
- else
- msix_vec = VP_MSIX_VQ_VECTOR;
- vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
- ctx ? ctx[i] : false,
- msix_vec);
- if (IS_ERR(vqs[i])) {
- err = PTR_ERR(vqs[i]);
- goto error_find;
- }
-
- if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
- continue;
-
- /* allocate per-vq irq if available and necessary */
- snprintf(vp_dev->msix_names[msix_vec],
- sizeof *vp_dev->msix_names,
- "%s-%s",
- dev_name(&vp_dev->vdev.dev), names[i]);
- err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
- vring_interrupt, 0,
- vp_dev->msix_names[msix_vec],
- vqs[i]);
- if (err)
- goto error_find;
- }
上一回也讲解了循环中的前两个步骤,来到了第一个关键函数:vp_setup_vq。本回对该函数进行解析。
vp_setup_vq函数也在 Linux 内核源码/drivers/ virtio /virtio_pci_common.c中,代码如下:
- static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index,
- void (*callback)(struct virtqueue *vq),
- const char *name,
- bool ctx,
- u16 msix_vec)
- {
- struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
- struct virtqueue *vq;
- unsigned long flags;
-
- /* fill out our structure that represents an active queue */
- if (!info)
- return ERR_PTR(-ENOMEM);
-
- vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
- msix_vec);
- if (IS_ERR(vq))
- goto out_info;
-
- info->vq = vq;
- if (callback) {
- spin_lock_irqsave(&vp_dev->lock, flags);
- list_add(&info->node, &vp_dev->virtqueues);
- spin_unlock_irqrestore(&vp_dev->lock, flags);
- } else {
- INIT_LIST_HEAD(&info->node);
- }
-
- vp_dev->vqs[index] = info;
- return vq;
-
- out_info:
- kfree(info);
- return vq;
- }
vp_setup_vq函数初始化virtqueue。在该函数中会分配一个具体的virtio_pci_vq_info结构体对象,来表示一个virtqueue信息,代码片段如下:
struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
并且会以该对象会为参数,调用virtio_pci_device的setup_vq回调函数,代码片段如下:
- vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
- msix_vec);
- if (IS_ERR(vq))
- goto out_info;
这个回调函数同样是在virtio_pci_modern_probe函数中设置的,参考前文所讲的virtio_pci_modern_probe函数代码( Linux内核源码 /drivers/virtio/virtio_pci_modern.c中):
- /* the PCI probing function */
- int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
- {
- struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
- struct pci_dev *pci_dev = vp_dev->pci_dev;
- int err;
-
- mdev->pci_dev = pci_dev;
-
- err = vp_modern_probe(mdev);
- if (err)
- return err;
-
- if (mdev->device)
- vp_dev->vdev.config = &virtio_pci_config_ops;
- else
- vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
-
- vp_dev->config_vector = vp_config_vector;
- vp_dev->setup_vq = setup_vq;
- vp_dev->del_vq = del_vq;
- vp_dev->isr = mdev->isr;
- vp_dev->vdev.id = mdev->id;
-
- return 0;
- }
可见,回调函数指向的是同文件(Linux内核源码/drivers/virtio/virtio_pci_modern.c)中的setup_vq函数。该函数代码如下:
- static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
- struct virtio_pci_vq_info *info,
- unsigned int index,
- void (*callback)(struct virtqueue *vq),
- const char *name,
- bool ctx,
- u16 msix_vec)
- {
-
- struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
- bool (*notify)(struct virtqueue *vq);
- struct virtqueue *vq;
- u16 num;
- int err;
-
- if (__virtio_test_bit(&vp_dev->vdev, VIRTIO_F_NOTIFICATION_DATA))
- notify = vp_notify_with_data;
- else
- notify = vp_notify;
-
- if (index >= vp_modern_get_num_queues(mdev))
- return ERR_PTR(-EINVAL);
-
- /* Check if queue is either not available or already active. */
- num = vp_modern_get_queue_size(mdev, index);
- if (!num || vp_modern_get_queue_enable(mdev, index))
- return ERR_PTR(-ENOENT);
-
- info->msix_vector = msix_vec;
-
- /* create the vring */
- vq = vring_create_virtqueue(index, num,
- SMP_CACHE_BYTES, &vp_dev->vdev,
- true, true, ctx,
- notify, callback, name);
- if (!vq)
- return ERR_PTR(-ENOMEM);
-
- vq->num_max = num;
-
- err = vp_active_vq(vq, msix_vec);
- if (err)
- goto err;
-
- vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
- if (!vq->priv) {
- err = -ENOMEM;
- goto err;
- }
-
- return vq;
-
- err:
- vring_del_virtqueue(vq);
- return ERR_PTR(err);
- }
(1) setup_vq函数 首先检测virtio PCI设备是否具有VIRTIO_F_NOTIFICATION_DATA特性 。代码片段如下:
- if (__virtio_test_bit(&vp_dev->vdev, VIRTIO_F_NOTIFICATION_DATA))
- notify = vp_notify_with_data;
- else
- notify = vp_notify;
VIRTIO_F_NOTIFICATION_DATA宏在Linux内核源码/include/uapi/linux/virtio_config.h中定义,如下:
- /*
- * This feature indicates that the driver passes extra data (besides
- * identifying the virtqueue) in its device notifications.
- */
- #define VIRTIO_F_NOTIFICATION_DATA 38
如果设备支持 VIRTIO_F_NOTIFICATION_DATA,则说明notify时携带(额外)数据,就将notify函数指针设置为vp_notify_with_data,即指向vp_notify_with_data函数;否则指向notify函数。
vp_notify_with_data函数也在Linux内核源码/drivers/virtio/virtio_pci_modern.c中(就在上边),代码如下:
- static bool vp_notify_with_data(struct virtqueue *vq)
- {
- u32 data = vring_notification_data(vq);
-
- iowrite32(data, (void __iomem *)vq->priv);
-
- return true;
- }
而vp_notify函数则是在Linux内核源码/drivers/virtio/virtio_pci_common.c中,代码如下:
- /* the notify function used when creating a virt queue */
- bool vp_notify(struct virtqueue *vq)
- {
- /* we write the queue's selector into the notification register to
- * signal the other end */
- iowrite16(vq->index, (void __iomem *)vq->priv);
- return true;
- }
(2) 接下来, 调用vp_modern_get_num_queues函数获取virtqueues的长度(个数) 。代码片段如下:
- if (index >= vp_modern_get_num_queues(mdev))
- return ERR_PTR(-EINVAL);
vp_modern_get_num_queues函数在Linux内核源代码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:
- /*
- * vp_modern_get_num_queues - get the number of virtqueues
- * @mdev: the modern virtio-pci device
- *
- * Returns the number of virtqueues
- */
- u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev)
- {
- return vp_ioread16(&mdev->common->num_queues);
- }
- EXPORT_SYMBOL_GPL(vp_modern_get_num_queues);
(3) 接下来, 调用vp_modern_get_queue_size函数获得一个virtqueue的大小 。代码片段如下:
- /* Check if queue is either not available or already active. */
- num = vp_modern_get_queue_size(mdev, index);
- if (!num || vp_modern_get_queue_enable(mdev, index))
- return ERR_PTR(-ENOENT);
vp_modern_get_queue_size函数在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:
- /*
- * vp_modern_get_queue_size - get size for a virtqueue
- * @mdev: the modern virtio-pci device
- * @index: the queue index
- *
- * Returns the size of the virtqueue
- */
- u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev,
- u16 index)
- {
- vp_iowrite16(index, &mdev->common->queue_select);
-
- return vp_ioread16(&mdev->common->queue_size);
-
- }
- EXPORT_SYMBOL_GPL(vp_modern_get_queue_size);
vp_modern_get_queue_size函数先选择某个virtqueue,然后得到其大小。
vp_modern_get_queue_enable函数也在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:
- /*
- * vp_modern_get_queue_enable - enable a virtqueue
- * @mdev: the modern virtio-pci device
- * @index: the queue index
- *
- * Returns whether a virtqueue is enabled or not
- */
- bool vp_modern_get_queue_enable(struct virtio_pci_modern_device *mdev,
- u16 index)
- {
- vp_iowrite16(index, &mdev->common->queue_select);
-
- return vp_ioread16(&mdev->common->queue_enable);
- }
- EXPORT_SYMBOL_GPL(vp_modern_get_queue_enable);
vp_modern_get_queue_enable函数获得所选中的virtqueue是否使能。
这里要特别说明一下,在老版代码的setup_vq函数中,一上来首先得到virtio_pci_device的common成员,代码片段如下:
struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
在新版本中,虽然是直接使用具体的common->x,并未单独使用一个中间变量专门保存virtio_pci_device的common成员,但意思一样。
struct virtio_pci_common_cfg的定义在Linux内核源码/include/uapi/linux/virtio_pci.h中,代码如下:
- /* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
- struct virtio_pci_common_cfg {
- /* About the whole device. */
- __le32 device_feature_select; /* read-write */
- __le32 device_feature; /* read-only */
- __le32 guest_feature_select; /* read-write */
- __le32 guest_feature; /* read-write */
- __le16 msix_config; /* read-write */
- __le16 num_queues; /* read-only */
- __u8 device_status; /* read-write */
- __u8 config_generation; /* read-only */
-
- /* About a specific virtqueue. */
- __le16 queue_select; /* read-write */
- __le16 queue_size; /* read-write, power of 2. */
- __le16 queue_msix_vector; /* read-write */
- __le16 queue_enable; /* read-write */
- __le16 queue_notify_off; /* read-only */
- __le32 queue_desc_lo; /* read-write */
- __le32 queue_desc_hi; /* read-write */
- __le32 queue_avail_lo; /* read-write */
- __le32 queue_avail_hi; /* read-write */
- __le32 queue_used_lo; /* read-write */
- __le32 queue_used_hi; /* read-write */
- };
这是virtio PCI代理设备中用来配置的一段MMIO,如下图中间部分所示:
直接读写这些地址会陷入到QEMU的virtio_pci_common_read/write函数。这里将common的各个偏移和对应的寄存器名列出来以方便对照,在Linux内核源码/include/uapi/linux/virtio_pci.h中,如下:
- /* Macro versions of offsets for the Old Timers! */
- #define VIRTIO_PCI_CAP_VNDR 0
- #define VIRTIO_PCI_CAP_NEXT 1
- #define VIRTIO_PCI_CAP_LEN 2
- #define VIRTIO_PCI_CAP_CFG_TYPE 3
- #define VIRTIO_PCI_CAP_BAR 4
- #define VIRTIO_PCI_CAP_OFFSET 8
- #define VIRTIO_PCI_CAP_LENGTH 12
-
- #define VIRTIO_PCI_NOTIFY_CAP_MULT 16
-
- #define VIRTIO_PCI_COMMON_DFSELECT 0
- #define VIRTIO_PCI_COMMON_DF 4
- #define VIRTIO_PCI_COMMON_GFSELECT 8
- #define VIRTIO_PCI_COMMON_GF 12
- #define VIRTIO_PCI_COMMON_MSIX 16
- #define VIRTIO_PCI_COMMON_NUMQ 18
- #define VIRTIO_PCI_COMMON_STATUS 20
- #define VIRTIO_PCI_COMMON_CFGGENERATION 21
- #define VIRTIO_PCI_COMMON_Q_SELECT 22
- #define VIRTIO_PCI_COMMON_Q_SIZE 24
- #define VIRTIO_PCI_COMMON_Q_MSIX 26
- #define VIRTIO_PCI_COMMON_Q_ENABLE 28
- #define VIRTIO_PCI_COMMON_Q_NOFF 30
- #define VIRTIO_PCI_COMMON_Q_DESCLO 32
- #define VIRTIO_PCI_COMMON_Q_DESCHI 36
- #define VIRTIO_PCI_COMMON_Q_AVAILLO 40
- #define VIRTIO_PCI_COMMON_Q_AVAILHI 44
- #define VIRTIO_PCI_COMMON_Q_USEDLO 48
- #define VIRTIO_PCI_COMMON_Q_USEDHI 52
- #define VIRTIO_PCI_COMMON_Q_NDATA 56
- #define VIRTIO_PCI_COMMON_Q_RESET 58
对照上边的struct virtio_pci_common_cfg的定义,一目了然。
- /* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
- struct virtio_pci_common_cfg {
- /* About the whole device. */
- __le32 device_feature_select; /* read-write */
- __le32 device_feature; /* read-only */
- __le32 guest_feature_select; /* read-write */
- __le32 guest_feature; /* read-write */
- __le16 msix_config; /* read-write */
- __le16 num_queues; /* read-only */
- __u8 device_status; /* read-write */
- __u8 config_generation; /* read-only */
-
- /* About a specific virtqueue. */
- __le16 queue_select; /* read-write */
- __le16 queue_size; /* read-write, power of 2. */
- __le16 queue_msix_vector; /* read-write */
- __le16 queue_enable; /* read-write */
- __le16 queue_notify_off; /* read-only */
- __le32 queue_desc_lo; /* read-write */
- __le32 queue_desc_hi; /* read-write */
- __le32 queue_avail_lo; /* read-write */
- __le32 queue_avail_hi; /* read-write */
- __le32 queue_used_lo; /* read-write */
- __le32 queue_used_hi; /* read-write */
- };
这里特别说明以上内容,是因为这是前不久(2024年春节后)笔者参加地平线面试时被问到的一个问题:
问:
virtio的前端驱动和后端设备是怎样进行通信的?
答:
使用virtqueue,vring。
问:前端驱动即Guest写完vring后,怎样通知后端设备?
答:Guest添加完buffer是否通知Host,有两种机制:一是判断Ring中的flags,二是Event_idx。
问:底层的机制是什么?
答:……需要再研究一下。
面试官(问者)解答:开辟一段空间,当Guest中操作这段地空间时,会陷入到VMX root中。
当前位于virtqueue创建流程的以下阶段(红色矩形框中)(图片引自 https://note.youdao.com/ynoteshare/index.html?id=f247acce8c21eb4ca4a7e37403f065f9&type=note&_time=1633000040495 ):
回到setup_vq函数来,setup_vq函数的余下部分,将在下一回中进行解析。