接前一篇文章:
上回书讲解了virtioballoon_probe函数及其中的两个重要函数init_vqs()和virtio_device_ready(),解析了init_vq s函数 的前两步,本回继续解析该函数,
(3) init_vqs函数在经过了对于各feature的初始化、检查和设置后, 接下来调用virtio_find_vqs函数 。代码片段如下:
- err = virtio_find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, vqs,
- callbacks, names, NULL);
- if (err)
- return err;
virtio_find_vqs函数在 Linux 内核源码/include/linux/virtio_config.h中,代码如下:
- static inline
- int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[],
- struct irq_affinity *desc)
- {
- return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
- }
实际上,老版本代码中是直接在init_vqs函数中调用的virtio_config_ops的find_vqs回调,新版本代码将其封装到了virtio_find_vqs中。
前文书 QEMU源码全解析 —— virtio(22) 已经对于virtio_config_ops进行过讲解了,如下:
virtio_pci_config_ops的初始化有两处,分别在Linux内核源码/drivers/ virtio /virtio_pci_legacy.c和Linux内核源码/drivers/virtio/virtio_pci_modern.c中。代码分别如下:
- legacy
static const struct virtio_config_ops virtio_pci_config_ops = { .get = vp_get, .set = vp_set, .get_status = vp_get_status, .set_status = vp_set_status, .reset = vp_reset, .find_vqs = vp_find_vqs, .del_vqs = vp_del_vqs, .synchronize_cbs = vp_synchronize_vectors, .get_features = vp_get_features, .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, .set_vq_affinity = vp_set_vq_affinity, .get_vq_affinity = vp_get_vq_affinity, };
- modern
static const struct virtio_config_ops virtio_pci_config_ops = { .get = vp_get, .set = vp_set, .generation = vp_generation, .get_status = vp_get_status, .set_status = vp_set_status, .reset = vp_reset, .find_vqs = vp_modern_find_vqs, .del_vqs = vp_del_vqs, .synchronize_cbs = vp_synchronize_vectors, .get_features = vp_get_features, .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, .set_vq_affinity = vp_set_vq_affinity, .get_vq_affinity = vp_get_vq_affinity, .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, };
仍以modern为例,struct virtio_config_ops virtio_pci_config_ops中find_vqs成员对应的函数是vp_modern_find_vqs()。vp_modern_find_vqs函数在Linux内核源码/drivers/virtio/virtio_pci_modern.c中,代码如下:
- static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
- struct virtqueue *vqs[],
- vq_callback_t *callbacks[],
- const char * const names[], const bool *ctx,
- struct irq_affinity *desc)
- {
- struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- struct virtqueue *vq;
- int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
-
- if (rc)
- return rc;
-
- /* Select and activate all queues. Has to be done last: once we do
- * this, there's no way to go back except reset.
- */
- list_for_each_entry(vq, &vdev->vqs, list)
- vp_modern_set_queue_enable(&vp_dev->mdev, vq->index, true);
-
- return 0;
- }
再贴一下调用代码:
- static inline
- int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[],
- struct irq_affinity *desc)
- {
- return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
- }
- err = virtio_find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, vqs,
- callbacks, names, NULL);
- if (err)
- return err;
vp_modern_find_vqs函数以相同参数调用了vp_find_vqs函数。代码片段如下:
- int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
-
- if (rc)
- return rc;
vp_find_vqs函数在Linux内核源码/drivers/virtio/virtio_pci_common.c中,代码如下:
- /* the config->find_vqs() implementation */
- int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], const bool *ctx,
- struct irq_affinity *desc)
- {
- int err;
-
- /* Try MSI-X with one vector per queue. */
- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
- if (!err)
- return 0;
- /* Fallback: MSI-X with one vector for config, one shared for queues. */
- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
- if (!err)
- return 0;
- /* Is there an interrupt? If not give up. */
- if (!(to_vp_device(vdev)->pci_dev->irq))
- return err;
- /* Finally fall back to regular interrupts. */
- return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
- }
在老版的Linux内核中的KVM代码中,vp_find_vqs函数本质上只是调用了一个函数vp_try_to_find_ops()。而新版本代码则变化比较大了,主要是调用了两个函数,vp_find_vqs_msix()和vp_find_vqs_intx()。
vp_find_vqs_msix函数在Linux内核源码/drivers/virtio/virtio_pci_common.c中,代码如下:
- static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], bool per_vq_vectors,
- const bool *ctx,
- struct irq_affinity *desc)
- {
- struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- u16 msix_vec;
- int i, err, nvectors, allocated_vectors, queue_idx = 0;
-
- vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
- if (!vp_dev->vqs)
- return -ENOMEM;
-
- if (per_vq_vectors) {
- /* Best option: one for change interrupt, one per vq. */
- nvectors = 1;
- for (i = 0; i < nvqs; ++i)
- if (names[i] && callbacks[i])
- ++nvectors;
- } else {
- /* Second best: one for change, shared for all vqs. */
- nvectors = 2;
- }
-
- err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors,
- per_vq_vectors ? desc : NULL);
- if (err)
- goto error_find;
-
- vp_dev->per_vq_vectors = per_vq_vectors;
- allocated_vectors = vp_dev->msix_used_vectors;
- for (i = 0; i < nvqs; ++i) {
- if (!names[i]) {
- vqs[i] = NULL;
- continue;
- }
-
- if (!callbacks[i])
- msix_vec = VIRTIO_MSI_NO_VECTOR;
- else if (vp_dev->per_vq_vectors)
- msix_vec = allocated_vectors++;
- else
- msix_vec = VP_MSIX_VQ_VECTOR;
- vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
- ctx ? ctx[i] : false,
- msix_vec);
- if (IS_ERR(vqs[i])) {
- err = PTR_ERR(vqs[i]);
- goto error_find;
- }
-
- if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
- continue;
-
- /* allocate per-vq irq if available and necessary */
- snprintf(vp_dev->msix_names[msix_vec],
- sizeof *vp_dev->msix_names,
- "%s-%s",
- dev_name(&vp_dev->vdev.dev), names[i]);
- err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
- vring_interrupt, 0,
- vp_dev->msix_names[msix_vec],
- vqs[i]);
- if (err)
- goto error_find;
- }
- return 0;
-
- error_find:
- vp_del_vqs(vdev);
- return err;
- }
从代码上来看,vp_find_vqs_msix函数与旧版本KVM代码的vp_try_to_find_vqs函数很相近。
(1)vp_find_vqs_msix函数首先通过kcalloc函数分配nvqs个指向struct virtio_pci_vq_info的指针,并赋值给了virtio_pct_device的vqs成员 (struct virtio_pci_vq_info **vqs;),每个virtio_pci_vq_info记录了virtqueue的信息。代码片段如下:
- vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
- if (!vp_dev->vqs)
- return -ENOMEM;
nvqs的实参是VIRTIO_BALLOON_VQ_MAX,前文书讲过,该值是5。
注意,这里只是分配了指针,并没有分配具体的结构体,即每个指针指向的空间。
(2)vq_find_vqs_msix函数接着计算nvectors 。代码片段如下:
- if (per_vq_vectors) {
- /* Best option: one for change interrupt, one per vq. */
- nvectors = 1;
- for (i = 0; i < nvqs; ++i)
- if (names[i] && callbacks[i])
- ++nvectors;
- } else {
- /* Second best: one for change, shared for all vqs. */
- nvectors = 2;
- }
nvectors表示总共需要的MSIx vector 。由于vp_find_vqs函数调用了两次vp_find_vqs_msix函数,
- /* Try MSI-X with one vector per queue. */
- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
- if (!err)
- return 0;
- /* Fallback: MSI-X with one vector for config, one shared for queues. */
- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
- if (!err)
- return 0;
因此,这里前一次调用得到的nvqs的值是[2,4](视上一回所讲的有条件的feature的使能情况而定);而后一次调用得到的nvqs的值是2。
在往下继续解析代码之前,要先对virtio的中断进行一下知识补强。
参考以下博文:
virtio中包括 两种中断类型 :
- change中断
当设备的配置信息发生改变(config changed),会产生一个中断(称为change中断),中断处理程序需要调用相应的处理函数(需要驱动定义)。
- vq中断
当设备向队列中写入信息时,会产生一个中断(称为vq中断),中断处理函数需要调用相应的队列的回调函数(需要驱动定义)。
virtio中包括 三种中断处理方式 :
1)不使用msix中断,使用常规中断
change中断和所有vq中断共用一个中断irq。
中断处理函数为vp_interrupt,vp_interrupt函数中包含了对change中断和vq中断的处理。
2)使用msix中断,但只有两个vector
两个vector中,一个用来对应change中断,另一个对应所有队列的vq中断。
change中断处理函数为vp_config_changed;vq中断处理函数为vp_vring_interrupt。
对应的就是vp_find_vqs函数第二次调用vp_find_vqs_msix函数的代码:
- /* Fallback: MSI-X with one vector for config, one shared for queues. */
- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
- if (!err)
- return 0;
- else {
- /* Second best: one for change, shared for all vqs. */
- nvectors = 2;
- }
3)使用msix中断,有n+1个vector
n+1个vector中,一个用来对应change中断,n个分别对应n个队列的vq中断,每个vq一个vector。
change中断处理函数为vp_config_changed;vq中断处理函数为vring_interrupt。
- enum virtio_balloon_vq {
- VIRTIO_BALLOON_VQ_INFLATE,
- VIRTIO_BALLOON_VQ_DEFLATE,
- VIRTIO_BALLOON_VQ_STATS,
- VIRTIO_BALLOON_VQ_FREE_PAGE,
- VIRTIO_BALLOON_VQ_REPORTING,
- VIRTIO_BALLOON_VQ_MAX
- };
对应的就是vp_find_vqs函数第一次调用vp_find_vqs_msix函数的代码:
- /* Try MSI-X with one vector per queue. */
- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
- if (!err)
- return 0;
- if (per_vq_vectors) {
- /* Best option: one for change interrupt, one per vq. */
- nvectors = 1;
- for (i = 0; i < nvqs; ++i)
- if (names[i] && callbacks[i])
- ++nvectors;
- }
根据vp_find_vqs函数的调用顺序,
- /* the config->find_vqs() implementation */
- int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
- struct virtqueue *vqs[], vq_callback_t *callbacks[],
- const char * const names[], const bool *ctx,
- struct irq_affinity *desc)
- {
- int err;
-
- /* Try MSI-X with one vector per queue. */
- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
- if (!err)
- return 0;
- /* Fallback: MSI-X with one vector for config, one shared for queues. */
- err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
- if (!err)
- return 0;
- /* Is there an interrupt? If not give up. */
- if (!(to_vp_device(vdev)->pci_dev->irq))
- return err;
- /* Finally fall back to regular interrupts. */
- return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
- }
优先使用中断处理方式3(使用msix中断,有n+1个vector),然后方式2(使用msix中断,但只有两个vector),最后方式1(不使用msix中断,使用常规中断)。
(3)vp_find_vqs_msix函数接下来调用vp_request_msix_vectors函数 。代码片段如下:
- err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors,
- per_vq_vectors ? desc : NULL);
- if (err)
- goto error_find;
vp_request_msix_vectors函数在Linux内核源码/drivers/virtio/virtio_pci_common.c中,代码如下:
- static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
- bool per_vq_vectors, struct irq_affinity *desc)
- {
- struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- const char *name = dev_name(&vp_dev->vdev.dev);
- unsigned int flags = PCI_IRQ_MSIX;
- unsigned int i, v;
- int err = -ENOMEM;
-
- vp_dev->msix_vectors = nvectors;
-
- vp_dev->msix_names = kmalloc_array(nvectors,
- sizeof(*vp_dev->msix_names),
- GFP_KERNEL);
- if (!vp_dev->msix_names)
- goto error;
- vp_dev->msix_affinity_masks
- = kcalloc(nvectors, sizeof(*vp_dev->msix_affinity_masks),
- GFP_KERNEL);
- if (!vp_dev->msix_affinity_masks)
- goto error;
- for (i = 0; i < nvectors; ++i)
- if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
- GFP_KERNEL))
- goto error;
-
- if (desc) {
- flags |= PCI_IRQ_AFFINITY;
- desc->pre_vectors++; /* virtio config vector */
- }
-
- err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
- nvectors, flags, desc);
- if (err < 0)
- goto error;
- vp_dev->msix_enabled = 1;
-
- /* Set the vector used for configuration */
- v = vp_dev->msix_used_vectors;
- snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
- "%s-config", name);
- err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
- vp_config_changed, 0, vp_dev->msix_names[v],
- vp_dev);
- if (err)
- goto error;
- ++vp_dev->msix_used_vectors;
-
- v = vp_dev->config_vector(vp_dev, v);
- /* Verify we had enough resources to assign the vector */
- if (v == VIRTIO_MSI_NO_VECTOR) {
- err = -EBUSY;
- goto error;
- }
-
- if (!per_vq_vectors) {
- /* Shared vector for all VQs */
- v = vp_dev->msix_used_vectors;
- snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
- "%s-virtqueues", name);
- err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
- vp_vring_interrupt, 0, vp_dev->msix_names[v],
- vp_dev);
- if (err)
- goto error;
- ++vp_dev->msix_used_vectors;
- }
- return 0;
- error:
- return err;
- }
对于vp_request_msix_vectors函数的解析,请看下回。