QEMU源码全解析 —— virtio（25）-CSDN博客

本文链接： https://blog.csdn.net/phmatthaus/article/details/136288603

本文详细解析了QEMU virtio中vp_request_msix_vectors函数的工作原理，对比了新老版本的区别，并介绍了在分配MSI-X中断向量及处理中断方面的逻辑。文章还提及了vp_find_vqs_msix函数的部分流程，以及如何根据virtio设备的特征启用中断处理。

摘要由CSDN通过智能技术生成

接前一篇文章：

上回书由init_vq s函数中调用的virtio_find_vqs函数，跟进到virtio_find_vqs函数中调用的vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc)（即virtio_config_ops的find_vqs回调），再到此回调所指向的函数vp_modern_find_vqs()，跟到其所调用的vp_find_vqs函数。调用流程如下：

init_vqs()

---> virtio_find_vqs()

---> vdev->config->find_vqs() ---> vp_modern_find_vqs()

---> vp_find_vqs()

再次贴出vp_find_vqs函数代码，在 Linux 内核源码/drivers/ virtio /virtio_pci_common.c中，代码如下：


/* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
		struct virtqueue *vqs[], vq_callback_t *callbacks[],
		const char * const names[], const bool *ctx,
		struct irq_affinity *desc)
{
	int err;
 
	/* Try MSI-X with one vector per queue. */
	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
	if (!err)
		return 0;
	/* Fallback: MSI-X with one vector for config, one shared for queues. */
	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
	if (!err)
		return 0;
	/* Is there an interrupt? If not give up. */
	if (!(to_vp_device(vdev)->pci_dev->irq))
		return err;
	/* Finally fall back to regular interrupts. */
	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
}

上回书讲到vp_find_vqs_msix函数，讲了其中的前两步。到第三步调用vp_request_msix_vectors函数的时候，又往下跟进了一层。为了便于理解和加深印象，再次贴出vp_find_vqs_msix函数的代码，在Linux内核源码/drivers/virtio/virtio_pci_common.c中，如下：


static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
		struct virtqueue *vqs[], vq_callback_t *callbacks[],
		const char * const names[], bool per_vq_vectors,
		const bool *ctx,
		struct irq_affinity *desc)
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	u16 msix_vec;
	int i, err, nvectors, allocated_vectors, queue_idx = 0;
 
	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
	if (!vp_dev->vqs)
		return -ENOMEM;
 
	if (per_vq_vectors) {
		/* Best option: one for change interrupt, one per vq. */
		nvectors = 1;
		for (i = 0; i < nvqs; ++i)
			if (names[i] && callbacks[i])
				++nvectors;
	} else {
		/* Second best: one for change, shared for all vqs. */
		nvectors = 2;
	}
 
	err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors,
				      per_vq_vectors ? desc : NULL);
	if (err)
		goto error_find;
 
	vp_dev->per_vq_vectors = per_vq_vectors;
	allocated_vectors = vp_dev->msix_used_vectors;
	for (i = 0; i < nvqs; ++i) {
		if (!names[i]) {
			vqs[i] = NULL;
			continue;
		}
 
		if (!callbacks[i])
			msix_vec = VIRTIO_MSI_NO_VECTOR;
		else if (vp_dev->per_vq_vectors)
			msix_vec = allocated_vectors++;
		else
			msix_vec = VP_MSIX_VQ_VECTOR;
		vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
				     ctx ? ctx[i] : false,
				     msix_vec);
		if (IS_ERR(vqs[i])) {
			err = PTR_ERR(vqs[i]);
			goto error_find;
		}
 
		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
			continue;
 
		/* allocate per-vq irq if available and necessary */
		snprintf(vp_dev->msix_names[msix_vec],
			 sizeof *vp_dev->msix_names,
			 "%s-%s",
			 dev_name(&vp_dev->vdev.dev), names[i]);
		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
				  vring_interrupt, 0,
				  vp_dev->msix_names[msix_vec],
				  vqs[i]);
		if (err)
			goto error_find;
	}
	return 0;
 
error_find:
	vp_del_vqs(vdev);
	return err;
}

vp_request_msix_vectors函数在Linux内核源码/drivers/virtio/virtio_pci_common.c中，代码如下：


static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
				   bool per_vq_vectors, struct irq_affinity *desc)
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	const char *name = dev_name(&vp_dev->vdev.dev);
	unsigned int flags = PCI_IRQ_MSIX;
	unsigned int i, v;
	int err = -ENOMEM;
 
	vp_dev->msix_vectors = nvectors;
 
	vp_dev->msix_names = kmalloc_array(nvectors,
					   sizeof(*vp_dev->msix_names),
					   GFP_KERNEL);
	if (!vp_dev->msix_names)
		goto error;
	vp_dev->msix_affinity_masks
		= kcalloc(nvectors, sizeof(*vp_dev->msix_affinity_masks),
			  GFP_KERNEL);
	if (!vp_dev->msix_affinity_masks)
		goto error;
	for (i = 0; i < nvectors; ++i)
		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
					GFP_KERNEL))
			goto error;
 
	if (desc) {
		flags |= PCI_IRQ_AFFINITY;
		desc->pre_vectors++; /* virtio config vector */
	}
 
	err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
					     nvectors, flags, desc);
	if (err < 0)
		goto error;
	vp_dev->msix_enabled = 1;
 
	/* Set the vector used for configuration */
	v = vp_dev->msix_used_vectors;
	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
		 "%s-config", name);
	err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
			  vp_config_changed, 0, vp_dev->msix_names[v],
			  vp_dev);
	if (err)
		goto error;
	++vp_dev->msix_used_vectors;
 
	v = vp_dev->config_vector(vp_dev, v);
	/* Verify we had enough resources to assign the vector */
	if (v == VIRTIO_MSI_NO_VECTOR) {
		err = -EBUSY;
		goto error;
	}
 
	if (!per_vq_vectors) {
		/* Shared vector for all VQs */
		v = vp_dev->msix_used_vectors;
		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
			 "%s-virtqueues", name);
		err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
				  vp_dev);
		if (err)
			goto error;
		++vp_dev->msix_used_vectors;
	}
	return 0;
error:
	return err;
}

本回就对vp_request_msix_vectors函数进行详细解析。

在这里必须也提一下老版本中该函数的代码，如下所示：


static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
				   bool per_vq_vectors)
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	const char *name = dev_name(&vp_dev->vdev.dev);
	unsigned i, v;
	int err = -ENOMEM;
 
	vp_dev->msix_vectors = nvectors;
 
	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
				       GFP_KERNEL);
	if (!vp_dev->msix_entries)
		goto error;
	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
				     GFP_KERNEL);
	if (!vp_dev->msix_names)
		goto error;
	vp_dev->msix_affinity_masks
		= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
			  GFP_KERNEL);
	if (!vp_dev->msix_affinity_masks)
		goto error;
	for (i = 0; i < nvectors; ++i)
		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
					GFP_KERNEL))
			goto error;
 
	for (i = 0; i < nvectors; ++i)
		vp_dev->msix_entries[i].entry = i;
 
	err = pci_enable_msix_exact(vp_dev->pci_dev,
				    vp_dev->msix_entries, nvectors);
	if (err)
		goto error;
	vp_dev->msix_enabled = 1;
 
	/* Set the vector used for configuration */
	v = vp_dev->msix_used_vectors;
	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
		 "%s-config", name);
	err = request_irq(vp_dev->msix_entries[v].vector,
			  vp_config_changed, 0, vp_dev->msix_names[v],
			  vp_dev);
	if (err)
		goto error;
	++vp_dev->msix_used_vectors;
 
	v = vp_dev->config_vector(vp_dev, v);
	/* Verify we had enough resources to assign the vector */
	if (v == VIRTIO_MSI_NO_VECTOR) {
		err = -EBUSY;
		goto error;
	}
 
	if (!per_vq_vectors) {
		/* Shared vector for all VQs */
		v = vp_dev->msix_used_vectors;
		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
			 "%s-virtqueues", name);
		err = request_irq(vp_dev->msix_entries[v].vector,
				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
				  vp_dev);
		if (err)
			goto error;
		++vp_dev->msix_used_vectors;
	}
	return 0;
error:
	vp_free_vectors(vdev);
	return err;
}

老版本和新版本vp_request_msix_vectors函数最大的区别是新版本中没有了vp_dev->msix_entries相关的代码，这是因为在新版本的virtio_pci_device结构中，不再有msix_entries这个成员所导致的。

老版本中的vp_request_msix_vectors函数完成了以下工作（仍然参考 virtio设备中断分析）：

1）分配nvectors个msix中断用 vector ，并使用1个vector来指定vp_config_changed为change中断处理函数。
2）如果per_vq_vectors为false，则nvectors就是2，再用掉另一个vector来指定n个队列共用的vq中断处理函数vp_vring_interrupt。
3）如果per_vq_vectors为true，则在下面代码中为每个队列指定一个vector，vq中断处理函数为vring_interrupt。

由于新老版本中的代码差异与功能并没有太大联系，因此新版本vp_request_msix_vectors函数的功能与上面所述基本一致。

（4） vp_find_vqs_msix函数 接下来是两个赋值语句 。代码片段如下：


    vp_dev->per_vq_vectors = per_vq_vectors;
	allocated_vectors = vp_dev->msix_used_vectors;

第一句代码的意思是将vp_find_vqs_msix函数的参数bool per_vq_vectors赋值给vp_dev（virtio PCI设备）的per_vq_vectors成员。

第二句代码是将vp_dev->msix_used_vectors即使用的中断向量个数赋给allocated_vectors。vp_dev->msix_used_vectors在上边的vp_request_msix_vectors函数中生成，代码片段如下：


    /* Set the vector used for configuration */
	v = vp_dev->msix_used_vectors;
	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
		 "%s-config", name);
	err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
			  vp_config_changed, 0, vp_dev->msix_names[v],
			  vp_dev);
	if (err)
		goto error;
	++vp_dev->msix_used_vectors;
 
	v = vp_dev->config_vector(vp_dev, v);
	/* Verify we had enough resources to assign the vector */
	if (v == VIRTIO_MSI_NO_VECTOR) {
		err = -EBUSY;
		goto error;
	}
 
	if (!per_vq_vectors) {
		/* Shared vector for all VQs */
		v = vp_dev->msix_used_vectors;
		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
			 "%s-virtqueues", name);
		err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
				  vp_dev);
		if (err)
			goto error;
		++vp_dev->msix_used_vectors;
	}

（5） 接下来，vp_find_vqs_msix函数进入循环， 设置每个使能的vq并申请中断 。


enum virtio_balloon_vq {
	VIRTIO_BALLOON_VQ_INFLATE,
	VIRTIO_BALLOON_VQ_DEFLATE,
	VIRTIO_BALLOON_VQ_STATS,
	VIRTIO_BALLOON_VQ_FREE_PAGE,
	VIRTIO_BALLOON_VQ_REPORTING,
	VIRTIO_BALLOON_VQ_MAX
};

代码片段如下：


    for (i = 0; i < nvqs; ++i) {
		if (!names[i]) {
			vqs[i] = NULL;
			continue;
		}
 
		if (!callbacks[i])
			msix_vec = VIRTIO_MSI_NO_VECTOR;
		else if (vp_dev->per_vq_vectors)
			msix_vec = allocated_vectors++;
		else
			msix_vec = VP_MSIX_VQ_VECTOR;
		vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
				     ctx ? ctx[i] : false,
				     msix_vec);
		if (IS_ERR(vqs[i])) {
			err = PTR_ERR(vqs[i]);
			goto error_find;
		}
 
		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
			continue;
 
		/* allocate per-vq irq if available and necessary */
		snprintf(vp_dev->msix_names[msix_vec],
			 sizeof *vp_dev->msix_names,
			 "%s-%s",
			 dev_name(&vp_dev->vdev.dev), names[i]);
		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
				  vring_interrupt, 0,
				  vp_dev->msix_names[msix_vec],
				  vqs[i]);
		if (err)
			goto error_find;
	}

names[]前文已经讲过了，参见 QEMU源码全解析 —— virtio（23）

有2个无条件的feature，如下所示：


    struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
    vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
    const char *names[VIRTIO_BALLOON_VQ_MAX];
    int err;
 
    /*
     * Inflateq and deflateq are used unconditionally. The names[]
     * will be NULL if the related feature is not enabled, which will
     * cause no allocation for the corresponding virtqueue in find_vqs.
     */
    callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
    names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
    callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
    names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
    callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL;
    names[VIRTIO_BALLOON_VQ_STATS] = NULL;
    callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
    names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
    names[VIRTIO_BALLOON_VQ_REPORTING] = NULL;

还有3个有条件的feature，如下所示：


    if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
		callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
	}
 
	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
		names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
	}
 
	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
		names[VIRTIO_BALLOON_VQ_REPORTING] = "reporting_vq";
		callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
	}

如果某个feature未被使能，则其对应的names[i]就为NULL，就把其对应的vqs[i]也置为NULL，跳过该特性。代码片段如下：


        if (!names[i]) {
			vqs[i] = NULL;
			continue;
		}

能再往下走的都是使能的feature了。这里又分为两种情况：一种是其对应的 callbacks [i]不为NULL，这是对于大多数feature来说的；另一种是虽然names[i]不为空，但callbacks[i]为NULL，目前这种情况只是对于VIRTIO_BALLOON_VQ_FREE_PAGE才会出现。对于后一种情况，将msix_vec设置为VIRTIO_MSI_NO_VECTOR；对于前一种情况，又要看根据上一回所讲的virtio的中断处理方式进行区别对待。对于“使用msix中断，有n+1个vector”的情况，每循环至此一次，allocated_vectors自增1，同时赋给msix_vec；否则就将msix_vec设置为VP_MSIX_VQ_VECTOR。代码片段如下：


		if (!callbacks[i])
			msix_vec = VIRTIO_MSI_NO_VECTOR;
		else if (vp_dev->per_vq_vectors)
			msix_vec = allocated_vectors++;
		else
			msix_vec = VP_MSIX_VQ_VECTOR;

接下来，循环中就来到了第一个核心函数：vp_setup_vq。对于它的详细解析，请看下回。