QEMU源码全解析 —— PCI设备模拟(7)

88 篇文章 19 订阅
本文深入解析QEMU中PCI设备模拟的MMIO读写回调函数,介绍了MemoryRegionOps结构及其实现的读写操作。在虚拟机访问设备MMIO时,QEMU会调用相应的回调函数进行模拟处理,如设置值、触发中断或进行DMA操作。
摘要由CSDN通过智能技术生成

接前一篇文章:

上一回讲解了pci_edu_realize函数中的pci_register_bar函数,本回开始对于edu设备的MMIO读写函数进行解析。

操作系统 与PCI设备交互的主要方式是PIO和MMIO。MMIO虽然是一段内存,但是其没有EPT映射,在 虚拟机 访问设备的MMIO时,会产生VM Exit;KVM识别此MMIO访问并且将该访问分派到应用层QEMU中;QEMU根据内存虚拟化的步骤进行分派,找到设备注册的MMIO读写回调函数;设备的MMIO读写回调函数根据设备的功能进行模拟,完成模拟之后可能会发送中断到虚拟机中,从而完成一些MMIO访问。

前文书( QEMU源码全解析 —— PCI设备模拟(5) )已经讲过,pci_edu_realize函数中调用memory_region_init_io函数,指定其读写函数是edu_mmio_ops。

edu_mmio_ops在hw/misc/edu中初始化,代码如下:

  1. static const MemoryRegionOps edu_mmio_ops = {
  2. .read = edu_mmio_read,
  3. .write = edu_mmio_write,
  4. .endianness = DEVICE_NATIVE_ENDIAN,
  5. .valid = {
  6. .min_access_size = 4,
  7. .max_access_size = 8,
  8. },
  9. .impl = {
  10. .min_access_size = 4,
  11. .max_access_size = 8,
  12. },
  13. };

edu_mmio_ops的类型为MemoryRegionOps,此结构在include/exec/memory.h中定义,代码如下:

typedef struct MemoryRegionOps MemoryRegionOps;

而struct MemoryRegionOps的定义也在include/exec/memory.h中,如下:

  1. /*
  2. * Memory region callbacks
  3. */
  4. struct MemoryRegionOps {
  5. /* Read from the memory region. @addr is relative to @mr; @size is
  6. * in bytes. */
  7. uint64_t (*read)(void *opaque,
  8. hwaddr addr,
  9. unsigned size);
  10. /* Write to the memory region. @addr is relative to @mr; @size is
  11. * in bytes. */
  12. void (*write)(void *opaque,
  13. hwaddr addr,
  14. uint64_t data,
  15. unsigned size);
  16. MemTxResult (*read_with_attrs)(void *opaque,
  17. hwaddr addr,
  18. uint64_t *data,
  19. unsigned size,
  20. MemTxAttrs attrs);
  21. MemTxResult (*write_with_attrs)(void *opaque,
  22. hwaddr addr,
  23. uint64_t data,
  24. unsigned size,
  25. MemTxAttrs attrs);
  26. enum device_endian endianness;
  27. /* Guest-visible constraints: */
  28. struct {
  29. /* If nonzero, specify bounds on access sizes beyond which a machine
  30. * check is thrown.
  31. */
  32. unsigned min_access_size;
  33. unsigned max_access_size;
  34. /* If true, unaligned accesses are supported. Otherwise unaligned
  35. * accesses throw machine checks.
  36. */
  37. bool unaligned;
  38. /*
  39. * If present, and returns #false, the transaction is not accepted
  40. * by the device (and results in machine dependent behaviour such
  41. * as a machine check exception).
  42. */
  43. bool (*accepts)(void *opaque, hwaddr addr,
  44. unsigned size, bool is_write,
  45. MemTxAttrs attrs);
  46. } valid;
  47. /* Internal implementation constraints: */
  48. struct {
  49. /* If nonzero, specifies the minimum size implemented. Smaller sizes
  50. * will be rounded upwards and a partial result will be returned.
  51. */
  52. unsigned min_access_size;
  53. /* If nonzero, specifies the maximum size implemented. Larger sizes
  54. * will be done as a series of accesses with smaller sizes.
  55. */
  56. unsigned max_access_size;
  57. /* If true, unaligned accesses are supported. Otherwise all accesses
  58. * are converted to (possibly multiple) naturally aligned accesses.
  59. */
  60. bool unaligned;
  61. } impl;
  62. };

其中的read和Write函数分别表示该MMIO的读写回调;endianness表示字节的大小端模式。

以write回调函数为例,

  1. /* Write to the memory region. @addr is relative to @mr; @size is
  2. * in bytes. */
  3. void (*write)(void *opaque,
  4. hwaddr addr,
  5. uint64_t data,
  6. unsigned size);
  1. static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
  2. unsigned size)

其原型中的opaque表示的是设备的对象;addr表示虚拟机读的地址在该MMIO中的偏移地址;data(val)表示要写入的值;size表示写入值的大小,通常由单字节、双字节、四字节以及八字节。

edu_mmio_write函数同样在hw/misc/edu.c中,代码如下:

  1. static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
  2. unsigned size)
  3. {
  4. EduState *edu = opaque;
  5. if (addr < 0x80 && size != 4) {
  6. return;
  7. }
  8. if (addr >= 0x80 && size != 4 && size != 8) {
  9. return;
  10. }
  11. switch (addr) {
  12. case 0x04:
  13. edu->addr4 = ~val;
  14. break;
  15. case 0x08:
  16. if (qatomic_read(&edu->status) & EDU_STATUS_COMPUTING) {
  17. break;
  18. }
  19. /* EDU_STATUS_COMPUTING cannot go 0->1 concurrently, because it is only
  20. * set in this function and it is under the iothread mutex.
  21. */
  22. qemu_mutex_lock(&edu->thr_mutex);
  23. edu->fact = val;
  24. qatomic_or(&edu->status, EDU_STATUS_COMPUTING);
  25. qemu_cond_signal(&edu->thr_cond);
  26. qemu_mutex_unlock(&edu->thr_mutex);
  27. break;
  28. case 0x20:
  29. if (val & EDU_STATUS_IRQFACT) {
  30. qatomic_or(&edu->status, EDU_STATUS_IRQFACT);
  31. /* Order check of the COMPUTING flag after setting IRQFACT. */
  32. smp_mb__after_rmw();
  33. } else {
  34. qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT);
  35. }
  36. break;
  37. case 0x60:
  38. edu_raise_irq(edu, val);
  39. break;
  40. case 0x64:
  41. edu_lower_irq(edu, val);
  42. break;
  43. case 0x80:
  44. dma_rw(edu, true, &val, &edu->dma.src, false);
  45. break;
  46. case 0x88:
  47. dma_rw(edu, true, &val, &edu->dma.dst, false);
  48. break;
  49. case 0x90:
  50. dma_rw(edu, true, &val, &edu->dma.cnt, false);
  51. break;
  52. case 0x98:
  53. if (!(val & EDU_DMA_RUN)) {
  54. break;
  55. }
  56. dma_rw(edu, true, &val, &edu->dma.cmd, true);
  57. break;
  58. }
  59. }

edu_mmio_write函数展示了一个虚拟机在写设备MMIO地址时QEMU中设备模拟的典型行为。

(1)首先,需要检查读写地址以及大小是否在范围之内。代码片段如下:

  1. if (addr < 0x80 && size != 4) {
  2. return;
  3. }
  4. if (addr >= 0x80 && size != 4 && size != 8) {
  5. return;
  6. }

(2)然后,根据具体的地址来进行适当的行为。

这些行为可以是简单地设置一个值,如这里的写0x04地址,代码片段如下:

  1. case 0x04:
  2. edu->addr4 = ~val;
  3. break;

也可以是将中断设置为高电平(写0x60地址)或者设置为低电平(写0x64地址),代码片段如下:

  1. case 0x60:
  2. edu_raise_irq(edu, val);
  3. break;
  4. case 0x64:
  5. edu_lower_irq(edu, val);
  6. break;

还可以是通过dma读写设备虚拟机的物理地址(写0x80地址),代码片段如下:

  1. case 0x80:
  2. dma_rw(edu, true, &val, &edu->dma.src, false);
  3. break;

对于read回调函数,也是类似的机制。这里仅给出edu_mmio_read函数源码,在hw/misc/edu.c中,代码如下:

  1. static uint64_t edu_mmio_read(void *opaque, hwaddr addr, unsigned size)
  2. {
  3. EduState *edu = opaque;
  4. uint64_t val = ~0ULL;
  5. if (addr < 0x80 && size != 4) {
  6. return val;
  7. }
  8. if (addr >= 0x80 && size != 4 && size != 8) {
  9. return val;
  10. }
  11. switch (addr) {
  12. case 0x00:
  13. val = 0x010000edu;
  14. break;
  15. case 0x04:
  16. val = edu->addr4;
  17. break;
  18. case 0x08:
  19. qemu_mutex_lock(&edu->thr_mutex);
  20. val = edu->fact;
  21. qemu_mutex_unlock(&edu->thr_mutex);
  22. break;
  23. case 0x20:
  24. val = qatomic_read(&edu->status);
  25. break;
  26. case 0x24:
  27. val = edu->irq_status;
  28. break;
  29. case 0x80:
  30. dma_rw(edu, false, &val, &edu->dma.src, false);
  31. break;
  32. case 0x88:
  33. dma_rw(edu, false, &val, &edu->dma.dst, false);
  34. break;
  35. case 0x90:
  36. dma_rw(edu, false, &val, &edu->dma.cnt, false);
  37. break;
  38. case 0x98:
  39. dma_rw(edu, false, &val, &edu->dma.cmd, false);
  40. break;
  41. }
  42. return val;
  43. }

欲知后事如何,且看下回分解。

举报

选择你想要举报的内容(必选)
  • 内容涉黄
  • 政治相关
  • 内容抄袭
  • 涉嫌广告
  • 内容侵权
  • 侮辱谩骂
  • 样式问题
  • 其他