diff --git a/Kconfig.host b/Kconfig.host index b03d5c6c0d6188cd5fdb331072dcf6c961be63f4..0f94c0208f869b975da9ad764aaf4df72c2e3dfe 100644 --- a/Kconfig.host +++ b/Kconfig.host @@ -61,4 +61,7 @@ config URMA_MIGRATION bool config HAM_MIGRATION + bool + +config VIRTCCA_MIGRATION bool \ No newline at end of file diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 92ecbc78ff30a275f7891f1503e70ab7eb2cb4f9..b33ad750cda110fc7b2873cded190465ec56ac61 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2660,8 +2660,11 @@ static int kvm_init(MachineState *ms) if (!s->kvm_dirty_ring_size) { dirty_log_manual_caps = kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | - KVM_DIRTY_LOG_INITIALLY_SET); + if (virtcca_cvm_enabled()) + dirty_log_manual_caps &= (KVM_DIRTY_LOG_INITIALLY_SET); + else + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); s->manual_dirty_log_protect = dirty_log_manual_caps; if (dirty_log_manual_caps) { ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0, @@ -2675,6 +2678,10 @@ static int kvm_init(MachineState *ms) } } } +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) + s->manual_dirty_log_protect = 0; +#endif #ifdef KVM_CAP_VCPU_EVENTS s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index a1ec1dd4bfdeafcd297b4f00c3b88fa44c0f0b27..d8b98542226ec416822cae242b01a6925d8c39af 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2693,7 +2693,16 @@ static void fdt_add_all_hisi_nodes(const VirtMachineState *vms, int dev_id) fdt_add_hisi_hpre_nodes(vms, i); } } - +#ifdef CONFIG_VIRTCCA_MIGRATION +static void virt_migvm_cid_notify(Notifier *n, void *opaque) +{ + if (!virtcca_migcvm_enabled()) { + return; + } + virtcca_migvm_save_cid(); + info_report("Detected guest CID in machine_init_done"); +} +#endif static void machvirt_init(MachineState *machine) { VirtMachineState *vms = VIRT_MACHINE(machine); @@ -3054,11 +3063,16 @@ static void machvirt_init(MachineState *machine) vms->bootinfo.psci_conduit = vms->psci_conduit; vms->bootinfo.confidential = virt_machine_is_confidential(vms) || virtcca_cvm_enabled(); - vms->bootinfo.skip_bootloader = vms->bootinfo.confidential; + vms->bootinfo.skip_bootloader = virtcca_cvm_enabled() ? false : vms->bootinfo.confidential; arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo); - +#ifdef CONFIG_VIRTCCA_MIGRATION vms->machine_done.notify = virt_machine_done; qemu_add_machine_init_done_notifier(&vms->machine_done); + static Notifier migvm_cid_notifier = { + .notify = virt_migvm_cid_notify, + }; + qemu_add_machine_init_done_notifier(&migvm_cid_notifier); +#endif } static bool virt_get_secure(Object *obj, Error **errp) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index a71ebe26b427e6ca2ceb383b5d5adfe3460cfb91..c9b672d8fc8af9d5c323d974e18e5aa898b527ee 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1226,6 +1226,8 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, #define ASCEND710_XLOADER_OFFSET 0x100430 #define ASCEND310_XLOADER_SIZE 4 #define ASCEND310_XLOADER_OFFSET 0x400 +#define ASCEND710_LARGE_TEST_SIZE 0x1000 +#define ASCEND710_LARGE_TEST_OFFSET 0x100000 typedef struct VFIOAscendBarQuirk { struct VFIOPCIDevice *vdev; @@ -1296,6 +1298,79 @@ static void vfio_probe_ascend910_bar0_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); } +static uint64_t virtcca_vfio_ascend_710bar2_quirk_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIOAscendBarQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + + return vfio_region_read(&vdev->bars[quirk->bar].region, + addr + quirk->offset, size); +} + +static void virtcca_vfio_ascend_710bar2_quirk_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIOAscendBarQuirk *quirk = opaque; + VFIOPCIDevice *vdev = quirk->vdev; + hwaddr offset_addr = addr + quirk->offset; + + if ((offset_addr >= ASCEND710_XLOADER_OFFSET && + offset_addr < ASCEND710_XLOADER_OFFSET + ASCEND710_XLOADER_SIZE) || + (offset_addr >= ASCEND710_XLOADER_OFFSET + ASCEND710_2P_BASE && + offset_addr < ASCEND710_XLOADER_OFFSET + ASCEND710_XLOADER_SIZE + ASCEND710_2P_BASE)) { + qemu_log("modifying RO region is not allowed! addr=0x%" + HWADDR_PRIx ", data=0x%" PRIx64 ", size=%d\n", + offset_addr, data, size); + return; + } + vfio_region_write(&vdev->bars[quirk->bar].region, offset_addr, data, size); +} + +static const MemoryRegionOps virtcca_vfio_710_ascend_intercept_bar2_regs_quirk = { + .read = virtcca_vfio_ascend_710bar2_quirk_read, + .write = virtcca_vfio_ascend_710bar2_quirk_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +/* + * Intercepts w/r to the 4K-aligned memory region containing the xloader-updating register. + * In virtCCA scenarios, the guest VM also cannot directly enable xloader-updating. Furthermore, + * other registers within this 4K region are not accessible from user mode. so we permit write + * access to this region expect xloader-updating register via vfio_region_write. + */ +static void virtcca_vfio_probe_ascend710_bar2_quirk(VFIOPCIDevice *vdev, VFIOQuirk *quirk, + VFIOAscendBarQuirk *bar2_quirk, int devnum, int nr) +{ + bar2_quirk[0].offset = ASCEND710_LARGE_TEST_OFFSET; + + memory_region_init_io(&quirk->mem[0], OBJECT(vdev), + &virtcca_vfio_710_ascend_intercept_bar2_regs_quirk, + &bar2_quirk[0], + "vfio-ascend710-bar2-1p-intercept-regs-quirk", + ASCEND710_LARGE_TEST_SIZE); + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, + bar2_quirk[0].offset, + &quirk->mem[0], 1); + + if (devnum == ASCEND710_2P_DEVNUM) { + bar2_quirk[1].vdev = vdev; + bar2_quirk[1].offset = (ASCEND710_2P_BASE + ASCEND710_LARGE_TEST_OFFSET); + bar2_quirk[1].bar = nr; + + memory_region_init_io(&quirk->mem[1], OBJECT(vdev), + &virtcca_vfio_710_ascend_intercept_bar2_regs_quirk, + &bar2_quirk[1], + "vfio-ascend710-bar2-2p-intercept-regs-quirk", + ASCEND710_LARGE_TEST_SIZE); + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, + bar2_quirk[1].offset, + &quirk->mem[1], 1); + } + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); +} + static void vfio_probe_ascend710_bar2_quirk(VFIOPCIDevice *vdev, int nr) { VFIOQuirk *quirk; @@ -1329,6 +1404,10 @@ static void vfio_probe_ascend710_bar2_quirk(VFIOPCIDevice *vdev, int nr) bar2_quirk[0].offset = ASCEND710_XLOADER_OFFSET; bar2_quirk[0].bar = nr; + if (virtcca_cvm_enabled()) { + return virtcca_vfio_probe_ascend710_bar2_quirk(vdev, quirk, bar2_quirk, devnum, nr); + } + /* * intercept w/r to the xloader-updating register, * so the vm can't enable xloader-updating diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 4669c070d634fb7ab897c0bea6b2604ab756e2b3..10aef5827a3d0652d2753d871c6887efa26add24 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -34,6 +34,7 @@ extern bool kvm_allowed; extern bool virtcca_cvm_allowed; +extern bool virtcca_mig_migcvm_allowed; extern bool kvm_kernel_irqchip; extern bool kvm_split_irqchip; extern bool kvm_async_interrupts_allowed; @@ -48,6 +49,7 @@ extern bool kvm_csv3_allowed; #define kvm_enabled() (kvm_allowed) #define virtcca_cvm_enabled() (virtcca_cvm_allowed) +#define virtcca_migcvm_enabled() (virtcca_mig_migcvm_allowed) #define VIRTCCA_CVM_TYPE (1UL << 8) /** * kvm_irqchip_in_kernel: diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 777b6688511051d150571265439486a0e5c95316..5c822273580b2685ef877d483aabcef1e5897566 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -591,9 +591,12 @@ struct reg_mask_range { #define KVM_CAP_ARM_TMM_MEASUREMENT_ALGO_SHA256 0 #define KVM_CAP_ARM_TMM_MEASUREMENT_ALGO_SHA512 1 + +#define KVM_CAP_ARM_TMM_MIGVM_DEFAULT 0 +#define KVM_CAP_ARM_TMM_MIGVM_ENABLE 1 #define KVM_CAP_ARM_TMM_RPV_SIZE 64 - + /* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ #define KVM_CAP_ARM_TMM_CFG_RPV 0 #define KVM_CAP_ARM_TMM_CFG_HASH_ALGO 1 @@ -601,6 +604,8 @@ struct reg_mask_range { #define KVM_CAP_ARM_TMM_CFG_DBG 3 #define KVM_CAP_ARM_TMM_CFG_PMU 4 #define KVM_CAP_ARM_TMM_CFG_KAE 5 +#define KVM_CAP_ARM_TMM_CFG_MIG 6 /* add mig config */ +#define KVM_CAP_ARM_TMM_CFG_MIG_CVM 7 #define KVM_ARM_TMM_MAX_KAE_VF_NUM 11 @@ -639,6 +644,18 @@ struct kvm_cap_arm_tmm_config_item { __u64 sec_addr[KVM_ARM_TMM_MAX_KAE_VF_NUM]; __u64 hpre_addr[KVM_ARM_TMM_MAX_KAE_VF_NUM]; }; + + /* cfg == KVM_CAP_ARM_TMM_CFG_MIG */ + struct { + __u32 mig_enable; + __u32 mig_src; + }; + + /* cfg == KVM_CAP_ARM_TMM_CFG_MIG_CVM */ + struct { + __u32 migration_migvm_cap; + }; + /* Fix the size of the union */ __u8 reserved[256]; }; @@ -654,6 +671,72 @@ struct kvm_cap_arm_tmm_populate_region_args { __u32 reserved[3]; }; +/* mig virtcca head */ +#define KVM_DEV_VIRTCCA_MIG_ATTR 0x1 + +struct kvm_dev_virtcca_mig_attr { +#define KVM_DEV_VIRTCCA_MIG_ATTR_VERSION 0 + __u32 version; +/* 4KB buffer can hold 512 entries at most */ +#define VIRTCCA_MIG_BUF_LIST_PAGES_MAX 512 + __u32 buf_list_pages; + __u32 max_migs; +}; + +#define VIRTCCA_MIG_STREAM_MBMD_MAP_OFFSET 0 +#define VIRTCCA_MIG_STREAM_GPA_LIST_MAP_OFFSET 1 +#define VIRTCCA_MIG_STREAM_MAC_LIST_MAP_OFFSET 2 +#define VIRTCCA_MIG_STREAM_BUF_LIST_MAP_OFFSET 4 + +#define VIRTCCA_MIG_EXPORT_TRACK_F_IN_ORDER_DONE (1UL << 63) +#define KVM_CVM_MIGVM_VERSION 0 + +/* virtcca MIG sub-ioctl() commands. */ +enum kvm_cvm_cmd_id { + /* virtcca MIG migcvm commands. */ + KVM_CVM_MIGCVM_SET_CID = 0, + KVM_CVM_MIGCVM_ATTEST, + KVM_CVM_MIGCVM_ATTEST_DST, + KVM_CVM_GET_BIND_STATUS, + KVM_CVM_MIG_EXPORT_ABORT, + /* virtcca MIG stream commands. */ + KVM_CVM_MIG_STREAM_START, + KVM_CVM_MIG_EXPORT_STATE_IMMUTABLE, + KVM_CVM_MIG_IMPORT_STATE_IMMUTABLE, + KVM_CVM_MIG_EXPORT_MEM, + KVM_CVM_MIG_IMPORT_MEM, + KVM_CVM_MIG_EXPORT_TRACK, + KVM_CVM_MIG_IMPORT_TRACK, + KVM_CVM_MIG_EXPORT_PAUSE, + KVM_CVM_MIG_EXPORT_STATE_TEC, + KVM_CVM_MIG_IMPORT_STATE_TEC, + KVM_CVM_MIG_IMPORT_END, + KVM_CVM_MIG_CRC, + KVM_CVM_MIG_GET_MIG_INFO, + KVM_CVM_MIG_IS_ZERO_PAGE, + KVM_CVM_MIG_IMPORT_ZERO_PAGE, + KVM_CVM_MIG_CMD_NR_MAX, +}; + +struct kvm_virtcca_mig_cmd { + /* enum kvm_tdx_cmd_id */ + __u32 id; + /* flags for sub-commend. If sub-command doesn't use this, set zero. */ + __u32 flags; + /* + * data for each sub-command. An immediate or a pointer to the actual + * data in process virtual address. If sub-command doesn't use it, + * set zero. + */ + __u64 data; + /* + * Auxiliary error code. The sub-command may return TDX SEAMCALL + * status code in addition to -Exxx. + * Defined for consistency with struct kvm_sev_cmd. + */ + __u64 error; +}; + #endif #endif /* __ARM_KVM_H__ */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 422a811f7e99c0fd451d05066ab3c8dc63d109f6..aba954479a9fa72363fbfa5b859d26941acc48e0 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -1500,6 +1500,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_RISCV_AIA, #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_VIRTCCA_MIG_STREAM = 0x00C, +#define KVM_DEV_TYPE_VIRTCCA_MIG_STREAM KVM_DEV_TYPE_VIRTCCA_MIG_STREAM KVM_DEV_TYPE_LOONGARCH_IPI, #define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_EIOINTC, @@ -1769,6 +1771,9 @@ struct kvm_enc_region { __u64 size; }; +/* virtcca migration */ +#define KVM_CVM_MIG_IOCTL _IOWR(KVMIO, 0xf2, struct kvm_virtcca_mig_cmd) + #define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region) #define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region) diff --git a/meson.build b/meson.build index e9c45426bde10eb10d1d9e948da0da035b9d3cdf..5dfdb7b5a47ec5c91a261caee44b9085e4c0154b 100644 --- a/meson.build +++ b/meson.build @@ -591,8 +591,13 @@ if cpu in ['aarch64'] have_ham_migration = get_option('ham_migration') \ .require(targetos == 'linux', error_message: 'ham_migration is supported only on Linux') \ .allowed() + # virtcca migration + have_virtcca_migration = get_option('virtcca_migration') \ + .require(targetos == 'linux', error_message: 'virtcca_migration is supported only on Linux') \ + .allowed() else have_ham_migration = false + have_virtcca_migration = false endif # urma migration @@ -2310,6 +2315,7 @@ config_host_data.set('CONFIG_VHOST_USER', have_vhost_user) config_host_data.set('CONFIG_VHOST_CRYPTO', have_vhost_user_crypto) config_host_data.set('CONFIG_UB', have_ub) config_host_data.set('CONFIG_HAM_MIGRATION', have_ham_migration) +config_host_data.set('CONFIG_VIRTCCA_MIGRATION', have_virtcca_migration) config_host_data.set('CONFIG_URMA_MIGRATION', have_urma_migration) config_host_data.set('CONFIG_VHOST_VDPA', have_vhost_vdpa) config_host_data.set('CONFIG_VMNET', vmnet.found()) @@ -3025,6 +3031,7 @@ host_kconfig = \ (x11.found() ? ['CONFIG_X11=y'] : []) + \ (have_ub ? ['CONFIG_UB=y'] : []) + \ (have_ham_migration ? ['CONFIG_HAM_MIGRATION=y'] : []) + \ + (have_virtcca_migration ? ['CONFIG_VIRTCCA_MIGRATION=y'] : []) + \ (have_urma_migration ? ['CONFIG_URMA_MIGRATION=y'] : []) + \ (have_vhost_user ? ['CONFIG_VHOST_USER=y'] : []) + \ (have_vhost_vdpa ? ['CONFIG_VHOST_VDPA=y'] : []) + \ @@ -4257,6 +4264,7 @@ summary_info += {'QOM debugging': get_option('qom_cast_debug')} summary_info += {'Relocatable install': get_option('relocatable')} summary_info += {'ub support': have_ub} summary_info += {'ham migration support': have_ham_migration} +summary_info += {'virtcca migration support': have_virtcca_migration} summary_info += {'urma migration support': have_urma_migration} summary_info += {'vhost-kernel support': have_vhost_kernel} summary_info += {'vhost-net support': have_vhost_net} diff --git a/meson_options.txt b/meson_options.txt index a2fb02d3a9541449a5f54c9621c934d4487b9289..a118fbce4b25be198d2fe684052c9fbae0b94fc9 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -300,6 +300,9 @@ option('urma_migration', type: 'feature', value: 'disabled', option('ham_migration', type: 'feature', value: 'auto', description: 'live migration via memory semantics') +option('virtcca_migration', type: 'feature', value: 'auto', + description: 'live migration of arm virtcca CVM') + option('vhost_kernel', type: 'feature', value: 'auto', description: 'vhost kernel backend support') option('vhost_net', type: 'feature', value: 'auto', diff --git a/migration/cgs-virtcca.c b/migration/cgs-virtcca.c new file mode 100644 index 0000000000000000000000000000000000000000..11fcc5d1cda0dee99b678bbdb2a521c67fb2d26b --- /dev/null +++ b/migration/cgs-virtcca.c @@ -0,0 +1,854 @@ +#include "qemu/osdep.h" +#include "qemu-file.h" +#include "cgs.h" +#include "target/arm/kvm_arm.h" +#include "migration/misc.h" +#include "qemu/error-report.h" +#include "hw/boards.h" + +/* MBMD, gpa_list and 2 pages of mac_list */ +#define MULTIFD_EXTRA_IOV_NUM 4 + +/* Bytes of the MBMD for memory page, calculated from the spec */ +#define VIRTCCA_MBMD_MEM_BYTES 48 + +/* qemufile header mbmd msg */ +#define KVM_VIRTCCA_MIG_MBMD_TYPE_IMMUTABLE_STATE 0 +#define KVM_VIRTCCA_MIG_MBMD_TYPE_VCPU_STATE 2 +#define KVM_VIRTCCA_MIG_MBMD_TYPE_MEMORY_STATE 16 +#define KVM_VIRTCCA_MIG_MBMD_TYPE_EPOCH_TOKEN 32 +#define KVM_VIRTCCA_MIG_MBMD_TYPE_ABORT_TOKEN 33 + +#define GPA_LIST_OP_EXPORT 1 +#define GPA_LIST_OP_CANCEL 2 +#define GPA_LIST_OP_CHECK_ZERO_PAGE 3 + +#define CVM_MIG_F_CONTINUE 0x1 + +#define VIRTCCA_SYSFS_MIG_CHECK_SRC "/sys/kernel/tmm/migration/mig_check_src" +#define VIRTCCA_SYSFS_MIG_CHECK_DST "/sys/kernel/tmm/migration/mig_check_dst" +#define UINT64_LEN 20 +#define MB_SHIFT 20 +#define SYSFS_RESULT_LEN 10 + +struct virtcca_bind_info { + int16_t version; + bool premig_done; +}; + +struct virtcca_dst_host_info { + char dst_ip[16]; + uint16_t dst_port; + uint8_t version; +}; + +typedef struct virtCCAMigHdr { + uint16_t flags; + uint16_t buf_list_num; +} virtCCAMigHdr; + +typedef union GpaListEntry { + uint64_t val; + struct { + uint64_t level : 2; + uint64_t pending : 1; + uint64_t reserved_0 : 4; + uint64_t l2_map : 3; +#define GPA_LIST_ENTRY_MIG_TYPE_4KB 0 + uint64_t mig_type : 2; + uint64_t gfn : 40; +/* every ipa operation flags including nop, export, cancel */ + uint64_t operation : 2; + uint64_t reserved_1 : 2; + uint64_t status : 5; + uint64_t reserved_2 : 3; + }; +} GpaListEntry; + + +virtCCAMigState virtCCA_mig; + +static int virtcca_mig_stream_ioctl(virtCCAMigStream *stream, int cmd_id, + __u32 metadata, void *data) +{ + struct kvm_virtcca_mig_cmd cmd; + int ret; + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.id = cmd_id; + cmd.flags = metadata; + cmd.data = (__u64)(unsigned long)data; + + ret = kvm_device_ioctl(stream->fd, KVM_CVM_MIG_IOCTL, &cmd); + if (ret) { + error_report("Failed to send migration cmd %d to the driver: %s", + cmd_id, strerror(ret)); + } + + return ret; +} + +static uint64_t virtcca_mig_put_mig_hdr(QEMUFile *f, uint64_t num, uint16_t flags) +{ + virtCCAMigHdr hdr = { + .flags = flags, + .buf_list_num = (uint16_t)num, + }; + + qemu_put_buffer(f, (uint8_t *)&hdr, sizeof(hdr)); + + return sizeof(hdr); +} + +static inline uint64_t virtcca_mig_stream_get_mbmd_bytes(virtCCAMigStream *stream) +{ + /* + * The first 2 bytes in MBMD buffer tells the overall size of the mbmd + */ + uint16_t bytes = *(uint16_t *)stream->mbmd; + + return (uint64_t)bytes; +} + +static uint8_t virtcca_mig_stream_get_mbmd_type(virtCCAMigStream *stream) +{ + /* MB_TYPE at byte offset 6, virtcca temporarily reuse this structure */ + return *((uint8_t *)stream->mbmd + 6); +} + +int append_number_to_string(char *result, size_t *current_len, uint64_t number, uint64_t node_mask) { + char buffer[UINT64_LEN + 1]; + size_t buffer_len = 0; + + if (!result || !current_len) { + error_report("Invalid pointer"); + return -1; + } + + snprintf(buffer, sizeof(buffer), "%lu-%lu", number, node_mask); + buffer_len = strlen(buffer); + if (*current_len > 0) { + result[*current_len] = ' '; + *current_len += 1; + } + + strcpy(result + *current_len, buffer); + *current_len += buffer_len; + return 0; +} + +int write_numa_info_to_sysfs(struct kvm_numa_info *numa_info, bool is_src) +{ + const char *path = is_src ? VIRTCCA_SYSFS_MIG_CHECK_SRC : VIRTCCA_SYSFS_MIG_CHECK_DST; + struct kvm_numa_node *numa_node; + char *numa_str = NULL; + size_t current_len = 0; + size_t estimated_size = numa_info->numa_cnt * (UINT64_LEN + 1) * 2; + int ret = 0; + struct stat buffer; + FILE *sysfs_file = NULL; + + if (stat(path, &buffer) != 0) { + info_report("Unable to get tmm driver, skip check."); + return 0; + } + + numa_str = (char *)g_malloc0(estimated_size); + for (int idx = 0; idx < numa_info->numa_cnt; idx++) { + numa_node = &(numa_info->numa_nodes[idx]); + if (append_number_to_string(numa_str, ¤t_len, + (numa_node->ipa_size >> MB_SHIFT), numa_node->host_numa_nodes[0])) { + ret = -EINVAL; + goto out; + } + } + + sysfs_file = fopen(path, "w"); + if (!sysfs_file) { + error_report("Failed to open sysfs file"); + ret = -EIO; + goto out; + } + + if (fprintf(sysfs_file, "%s\n", numa_str) < 0) { + error_report("Failed to write to sysfs file"); + ret = -EIO; + goto out; + } + + ret = 0; +out: + if (sysfs_file) + fclose(sysfs_file); + if (numa_str) + free(numa_str); + + return ret; +} + +int get_migration_result_from_sysfs(bool is_src) +{ + const char *path = is_src ? VIRTCCA_SYSFS_MIG_CHECK_SRC : VIRTCCA_SYSFS_MIG_CHECK_DST; + int ret = 0; + struct stat buffer; + char buf[SYSFS_RESULT_LEN]; + FILE *sysfs_file = NULL; + + if (stat(path, &buffer) != 0) { + info_report("Unable to get tmm driver, skip check."); + return 0; + } + + sysfs_file = fopen(path, "r"); + if (!sysfs_file) { + error_report("Failed to open sysfs file"); + return -1; + } + + if (fgets(buf, sizeof(buf), sysfs_file) == NULL) { + error_report("Error reading sysfs file"); + ret = -1; + goto out; + } + + if (strncmp(buf, "1", 1) == 0) { + ret = 1; + info_report("Migration check succeeded"); + } else { + ret = -1; + info_report("Migration check failed"); + } + +out: + if (sysfs_file) + fclose(sysfs_file); + return ret; +} + +int virtcca_check_mig_mem(bool is_src) +{ + MachineState *ms = (MachineState *)qdev_get_machine(); + struct kvm_numa_info *numa_info = NULL; + int ret = 0; + + numa_info = g_malloc0(sizeof(struct kvm_numa_info)); + if (ms->numa_state != NULL && ms->numa_state->num_nodes > 0) { + numa_info->numa_cnt = ms->numa_state->num_nodes; + for (int64_t i = 0; i < ms->numa_state->num_nodes && i < MAX_NUMA_NODE; i++) { + numa_info->numa_nodes[i].numa_id = i; + numa_info->numa_nodes[i].ipa_size = ms->numa_state->nodes[i].node_mem; + numa_info->numa_nodes[i].host_numa_nodes[0] = ms->numa_state->nodes[i].node_memdev->host_nodes[0]; + } + } else { + numa_info->numa_cnt = 1; + numa_info->numa_nodes[0].numa_id = 0; + numa_info->numa_nodes[0].ipa_size = ms->ram_size; + memset(numa_info->numa_nodes[0].host_numa_nodes, 0, MAX_NODES / BITS_PER_LONG * sizeof(uint64_t)); + } + + ret = write_numa_info_to_sysfs(numa_info, is_src); + if (ret < 0) { + goto out; + } + ret = get_migration_result_from_sysfs(is_src); + +out: + if (numa_info) + g_free(numa_info); + return ret; +} + +static int virtcca_mig_savevm_state_start(QEMUFile *f) +{ + info_report("Entering virtcca_mig_savevm_state_start"); + virtCCAMigStream *stream = &virtCCA_mig.streams[0]; + uint64_t mbmd_bytes, buf_list_bytes, exported_num = 0; + int ret; + + /* Export mbmd and buf_list */ + ret = virtcca_mig_stream_ioctl(stream, KVM_CVM_MIG_EXPORT_STATE_IMMUTABLE, + 0, &exported_num); + if (ret) { + error_report("Failed to export immutable states: %s", strerror(ret)); + return ret; + } + + mbmd_bytes = virtcca_mig_stream_get_mbmd_bytes(stream); + buf_list_bytes = exported_num * TARGET_PAGE_SIZE; + + virtcca_mig_put_mig_hdr(f, exported_num, 0); + qemu_put_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes); + qemu_put_buffer(f, (uint8_t *)stream->buf_list, buf_list_bytes); + + return 0; +} + +static long virtcca_mig_save_epoch(QEMUFile *f, bool in_order_done) +{ + virtCCAMigStream *stream = &virtCCA_mig.streams[0]; + uint64_t flags = in_order_done ? VIRTCCA_MIG_EXPORT_TRACK_F_IN_ORDER_DONE : 0; + long virtcca_hdr_bytes, mbmd_bytes; + int ret; + + ret = virtcca_mig_stream_ioctl(stream, KVM_CVM_MIG_EXPORT_TRACK, 0, &flags); + if (ret) { + return ret; + } + + mbmd_bytes = virtcca_mig_stream_get_mbmd_bytes(stream); + + /* Epoch only has mbmd data */ + virtcca_hdr_bytes = virtcca_mig_put_mig_hdr(f, 0, 0); + qemu_put_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes); + + return virtcca_hdr_bytes + mbmd_bytes; +} + +static long virtcca_mig_savevm_state_ram_start_epoch(QEMUFile *f) +{ + return virtcca_mig_save_epoch(f, false); +} + +static void virtcca_mig_gpa_list_setup(union GpaListEntry *gpa_list, hwaddr *gpa, + uint64_t gpa_num, int operation) +{ + int i; + + for (i = 0; i < gpa_num; i++) { + gpa_list[i].val = 0; + gpa_list[i].gfn = gpa[i] >> TARGET_PAGE_BITS; + gpa_list[i].mig_type = GPA_LIST_ENTRY_MIG_TYPE_4KB; + gpa_list[i].operation = operation; + } +} + +bool virtcca_is_zero_page(uint32_t channel_id, hwaddr cgs_private_gpa, size_t len) +{ + int ret; + bool is_zero_page; + virtCCAMigStream *stream = &virtCCA_mig.streams[channel_id]; + + virtcca_mig_gpa_list_setup((GpaListEntry *)stream->gpa_list, + &cgs_private_gpa, 1, GPA_LIST_OP_CHECK_ZERO_PAGE); + + ret = virtcca_mig_stream_ioctl(stream, KVM_CVM_MIG_IS_ZERO_PAGE, 0, &is_zero_page); + if (ret) { + error_report("%s: failed: failed to check zero page %d", __func__, ret); + return false; + } + + return is_zero_page; +} + +int virtcca_import_zero_page(uint32_t channel_id, void *host) +{ + int ret = 0; + virtCCAMigStream *stream = &virtCCA_mig.streams[channel_id]; + hwaddr cgs_private_gpa; + + ret = kvm_physical_memory_addr_from_host(kvm_state, host, + &cgs_private_gpa); + if (!ret) { + return 0; + } + + ret = virtcca_mig_stream_ioctl(stream, KVM_CVM_MIG_IMPORT_ZERO_PAGE, 0, (void *)cgs_private_gpa); + if (ret) { + error_report("%s: failed: failed to import zero page %d", __func__, ret); + return -1; + } + + return ret; +} + +static long virtcca_mig_save_ram(QEMUFile *f, virtCCAMigStream *stream) +{ + uint64_t num = 1; + uint64_t hdr_bytes, mbmd_bytes, gpa_list_bytes, + buf_list_bytes, mac_list_bytes; + int ret; + + /* Export mbmd, buf list, mac list and gpa list */ + ret = virtcca_mig_stream_ioctl(stream, KVM_CVM_MIG_EXPORT_MEM, 0, &num); + if (ret) { + return ret; + } + + mbmd_bytes = virtcca_mig_stream_get_mbmd_bytes(stream); + buf_list_bytes = TARGET_PAGE_SIZE; + mac_list_bytes = sizeof(Int128); + gpa_list_bytes = sizeof(GpaListEntry); + + hdr_bytes = virtcca_mig_put_mig_hdr(f, 1, 0); + qemu_put_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes); + qemu_put_buffer(f, (uint8_t *)stream->buf_list, buf_list_bytes); + qemu_put_buffer(f, (uint8_t *)stream->gpa_list, gpa_list_bytes); + qemu_put_buffer(f, (uint8_t *)stream->mac_list, mac_list_bytes); + + return hdr_bytes + mbmd_bytes + gpa_list_bytes + + buf_list_bytes + mac_list_bytes; +} + +static long virtcca_mig_savevm_state_ram(QEMUFile *f, uint32_t channel_id, + hwaddr gpa) +{ + virtCCAMigStream *stream = &virtCCA_mig.streams[channel_id]; + + virtcca_mig_gpa_list_setup((GpaListEntry *)stream->gpa_list, + &gpa, 1, GPA_LIST_OP_EXPORT); + return virtcca_mig_save_ram(f, stream); +} + +static int virtcca_mig_savevm_state_pause(void) +{ + virtCCAMigStream *stream = &virtCCA_mig.streams[0]; + + return virtcca_mig_stream_ioctl(stream, KVM_CVM_MIG_EXPORT_PAUSE, 0, 0); +} + +static int virtcca_mig_save_one_tec(QEMUFile *f, virtCCAMigStream *stream) +{ + uint64_t mbmd_bytes, buf_list_bytes, exported_num = 0; + int ret; + + ret = virtcca_mig_stream_ioctl(stream, KVM_CVM_MIG_EXPORT_STATE_TEC, 0, + &exported_num); + if (ret) { + return ret; + } + + mbmd_bytes = virtcca_mig_stream_get_mbmd_bytes(stream); + buf_list_bytes = exported_num * TARGET_PAGE_SIZE; + /* Ask the destination to continue to load the next vCPU states */ + virtcca_mig_put_mig_hdr(f, exported_num, CVM_MIG_F_CONTINUE); + + qemu_put_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes); + qemu_put_buffer(f, (uint8_t *)stream->buf_list, buf_list_bytes); + + return 0; +} + +static int virtcca_mig_save_tecs(QEMUFile *f, virtCCAMigStream *stream) +{ + CPUState *cpu; + int ret; + + CPU_FOREACH(cpu) { + ret = virtcca_mig_save_one_tec(f, stream); + if (ret) { + return ret; + } + } + + return 0; +} + +int virtcca_mig_crc(void) +{ + virtCCAMigStream *stream = &virtCCA_mig.streams[0]; + int ret; + + ret = virtcca_mig_stream_ioctl(stream, KVM_CVM_MIG_CRC, 0, 0); + if (ret) { + return ret; + } + return 0; +} + +static int virtcca_mig_savevm_state_end(QEMUFile *f) +{ + virtCCAMigStream *stream = &virtCCA_mig.streams[0]; + int ret; + + ret = virtcca_mig_save_tecs(f, stream); + if (ret) { + return ret; + } + + ret = virtcca_mig_save_epoch(f, true); + if (ret < 0) { + return ret; + } + + return 0; +} + +static bool virtcca_migvm_agent_attest(bool is_src, const char *dst_ip, uint16_t dst_port) +{ + struct virtcca_dst_host_info info; + struct kvm_virtcca_mig_cmd cmd; + int ret = 0; + + memset(&info, 0, sizeof(struct virtcca_dst_host_info)); + cmd.flags = 0; + info.version = KVM_CVM_MIGVM_VERSION; + cmd.data = (__u64)(unsigned long)&info; + if (is_src) { + if (dst_ip == NULL) { + error_report("migration dst ip is NULL"); + return false; + } + + if (strlen(dst_ip) >= sizeof(info.dst_ip)) { + error_report("migration dst ip too long"); + return false; + } + strncpy(info.dst_ip, dst_ip, sizeof(info.dst_ip) - 1); + + info.dst_ip[sizeof(info.dst_ip) - 1] = '\0'; + info.dst_port = dst_port; + cmd.id = KVM_CVM_MIGCVM_ATTEST; + } else { + cmd.id = KVM_CVM_MIGCVM_ATTEST_DST; + } + + ret = kvm_vm_ioctl(kvm_state, KVM_CVM_MIG_IOCTL, &cmd); + if (ret) { + error_report("Failed to virtcca_migvm_agent_attest: %d", ret); + return false; + } + + return true; +} + +static bool virtcca_premig_is_done(bool is_src) +{ + struct virtcca_bind_info info; + struct kvm_virtcca_mig_cmd cmd; + int ret; + + if(!is_src) { + return true; + } + cmd.id = KVM_CVM_GET_BIND_STATUS; + cmd.flags = 0; + memset(&info, 0, sizeof(struct virtcca_bind_info)); + info.version = KVM_CVM_MIGVM_VERSION; + cmd.data = (__u64)(unsigned long)&info; + + ret = kvm_vm_ioctl(kvm_state, KVM_CVM_MIG_IOCTL, &cmd); + if (ret) { + error_report("Failed to get the migration info: %d", ret); + return false; + } + + return !!info.premig_done; +} + +/* check the mig */ +static bool virtcca_mig_is_ready(bool is_src, const char *dst_ip, uint16_t dst_port) +{ + int ret; + ret = virtcca_check_mig_mem(is_src); + if (ret < 0) { + error_report("Failed to migrate cvm, secure memory is insufficient."); + return false; + } + + if(virtcca_migvm_agent_attest(is_src, dst_ip, dst_port)) + return virtcca_premig_is_done(is_src); + return false; +} + +static int virtcca_get_mig_info(void) +{ + int ret; + virtCCAMigStream *stream = &virtCCA_mig.streams[0]; + virtCCAMigInfo virtca_mig_info; + + ret = virtcca_mig_stream_ioctl(stream, KVM_CVM_MIG_GET_MIG_INFO, 0, &virtca_mig_info); + if (ret) { + error_report("virtcca_get_mig_info failed!"); + } + + virtCCA_mig.swiotlb_start = virtca_mig_info.swiotlb_start; + virtCCA_mig.swiotlb_end = virtca_mig_info.swiotlb_end; + return ret; +} + +/* Enable the ko creation */ +static int virtcca_mig_stream_create(virtCCAMigStream *stream) +{ + info_report("Entering and calling virtcca_mig_stream_create"); + int ret; + + ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_VIRTCCA_MIG_STREAM, false); + if (ret < 0) { + error_report("Failed to create virtcca mig stream due to %s", strerror(errno)); + return ret; + } + stream->fd = ret; + + return 0; +} + +/* Set up the stream buffer data and create the mig ko */ +static int virtcca_mig_do_stream_setup(virtCCAMigStream *stream, uint32_t nr_pages) +{ + int ret; + struct kvm_dev_virtcca_mig_attr virtcca_mig_attr; + struct kvm_device_attr attr = { + .group = KVM_DEV_VIRTCCA_MIG_ATTR, + .addr = (uint64_t)&virtcca_mig_attr, + .attr = sizeof(struct kvm_dev_virtcca_mig_attr), + }; + size_t map_size; + off_t map_offset; + + ret = virtcca_mig_stream_create(stream); + if (ret) { + return ret; + } + + /* + * Tell the virtCCA_mig driver the number of pages to add to buffer list for + * private page export/import. + */ + virtcca_mig_attr.buf_list_pages = nr_pages; + virtcca_mig_attr.version = KVM_DEV_VIRTCCA_MIG_ATTR_VERSION; + if (kvm_device_ioctl(stream->fd, KVM_SET_DEVICE_ATTR, &attr) < 0) { + return -EIO; + } + + /* check the set is ok */ + memset(&virtcca_mig_attr, 0, sizeof(struct kvm_dev_virtcca_mig_attr)); + virtcca_mig_attr.version = KVM_DEV_VIRTCCA_MIG_ATTR_VERSION; + if (kvm_device_ioctl(stream->fd, KVM_GET_DEVICE_ATTR, &attr) < 0) { + return -EIO; + } + + /* four metadata map offset and size setup */ + map_offset = VIRTCCA_MIG_STREAM_MBMD_MAP_OFFSET; + map_size = (VIRTCCA_MIG_STREAM_GPA_LIST_MAP_OFFSET - + VIRTCCA_MIG_STREAM_MBMD_MAP_OFFSET) * TARGET_PAGE_SIZE; + stream->mbmd = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, + stream->fd, map_offset); + if (stream->mbmd == MAP_FAILED) { + ret = -errno; + error_report("Failed to map mbmd due to %s", strerror(ret)); + return ret; + } + + map_offset = VIRTCCA_MIG_STREAM_GPA_LIST_MAP_OFFSET * TARGET_PAGE_SIZE; + map_size = (VIRTCCA_MIG_STREAM_MAC_LIST_MAP_OFFSET - + VIRTCCA_MIG_STREAM_GPA_LIST_MAP_OFFSET) * TARGET_PAGE_SIZE; + stream->gpa_list = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, + stream->fd, map_offset); + if (stream->gpa_list == MAP_FAILED) { + ret = -errno; + error_report("Failed to map gpa list due to %s", strerror(ret)); + return ret; + } + + map_offset = VIRTCCA_MIG_STREAM_MAC_LIST_MAP_OFFSET * TARGET_PAGE_SIZE; + map_size = (VIRTCCA_MIG_STREAM_BUF_LIST_MAP_OFFSET - + VIRTCCA_MIG_STREAM_MAC_LIST_MAP_OFFSET) * TARGET_PAGE_SIZE; + stream->mac_list = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, + stream->fd, map_offset); + if (stream->mac_list == MAP_FAILED) { + ret = -errno; + error_report("Failed to map mac list due to %s", strerror(ret)); + return ret; + } + + map_offset = VIRTCCA_MIG_STREAM_BUF_LIST_MAP_OFFSET * TARGET_PAGE_SIZE; + map_size = virtcca_mig_attr.buf_list_pages * TARGET_PAGE_SIZE; + stream->buf_list = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, + stream->fd, map_offset); + if (stream->buf_list == MAP_FAILED) { + ret = -errno; + error_report("Failed to map buf list due to %s", strerror(ret)); + return ret; + } + + return 0; +} + +/* after the cgs savevm setup, enter the virtcca stream setup procedure */ +static int virtcca_mig_stream_setup(uint32_t nr_channels, uint32_t nr_pages) +{ + info_report("Entering and calling virtcca_mig_stream_setup"); + virtCCAMigStream *stream; + int i, ret; + + virtCCA_mig.streams = g_malloc0(sizeof(struct virtCCAMigStream) * nr_channels); + + for (i = 0; i < nr_channels; i++) { + stream = &virtCCA_mig.streams[i]; + ret = virtcca_mig_do_stream_setup(stream, nr_pages); + if (!ret) { + virtCCA_mig.nr_streams++; + } else { + return ret; + } + } + + virtcca_get_mig_info(); + return 0; +} + +static void virtcca_mig_stream_cleanup(virtCCAMigStream *stream) +{ + info_report("Entering and calling virtcca_mig_stream_cleanup"); + struct kvm_dev_virtcca_mig_attr virtcca_mig_attr; + struct kvm_device_attr attr = { + .group = KVM_DEV_VIRTCCA_MIG_ATTR, /* add the ko clean attr */ + .addr = (uint64_t)&virtcca_mig_attr, + .attr = sizeof(struct kvm_dev_virtcca_mig_attr), + }; + size_t unmap_size; + int ret; + + memset(&virtcca_mig_attr, 0, sizeof(struct kvm_dev_virtcca_mig_attr)); + ret = kvm_device_ioctl(stream->fd, KVM_GET_DEVICE_ATTR, &attr); + if (ret < 0) { + error_report("virtcca mig cleanup failed: %s", strerror(ret)); + return; + } + + unmap_size = (VIRTCCA_MIG_STREAM_GPA_LIST_MAP_OFFSET - + VIRTCCA_MIG_STREAM_MBMD_MAP_OFFSET) * TARGET_PAGE_SIZE; + munmap(stream->mbmd, unmap_size); + + unmap_size = (VIRTCCA_MIG_STREAM_MAC_LIST_MAP_OFFSET - + VIRTCCA_MIG_STREAM_GPA_LIST_MAP_OFFSET) * TARGET_PAGE_SIZE; + munmap(stream->gpa_list, unmap_size); + + unmap_size = (VIRTCCA_MIG_STREAM_BUF_LIST_MAP_OFFSET - + VIRTCCA_MIG_STREAM_MAC_LIST_MAP_OFFSET) * TARGET_PAGE_SIZE; + munmap(stream->mac_list, unmap_size); + + unmap_size = virtcca_mig_attr.buf_list_pages * TARGET_PAGE_SIZE; + munmap(stream->buf_list, unmap_size); + close(stream->fd); +} + +static void virtcca_mig_cleanup(void) +{ + int i; + + for (i = 0; i < virtCCA_mig.nr_streams; i++) { + virtcca_mig_stream_cleanup(&virtCCA_mig.streams[i]); + } + + virtCCA_mig.nr_streams = 0; + + g_free(virtCCA_mig.streams); + virtCCA_mig.streams = NULL; +} + +static void virtcca_mig_loadvm_state_cleanup(void) +{ + virtCCAMigStream *stream = &virtCCA_mig.streams[0]; + + virtcca_mig_stream_ioctl(stream, KVM_CVM_MIG_IMPORT_END, 0, 0); + virtcca_mig_cleanup(); +} + +static int virtcca_mig_savevm_state_abort(void) +{ + int ret; + + struct kvm_virtcca_mig_cmd cmd; + cmd.id = KVM_CVM_MIG_EXPORT_ABORT; + cmd.flags = 0; + cmd.data = 0; + ret = kvm_vm_ioctl(kvm_state, KVM_CVM_MIG_IOCTL, &cmd); + + if (ret) { + error_report("%s: failed: failed to abort %d", __func__, ret); + } + + return ret; +} + + +static int virtcca_mig_loadvm_state(QEMUFile *f, uint32_t channel_id) +{ + virtCCAMigStream *stream = &virtCCA_mig.streams[channel_id]; + uint64_t mbmd_bytes, buf_list_bytes, mac_list_bytes, gpa_list_bytes; + uint64_t buf_list_num = 0; + bool should_continue = true; + uint8_t mbmd_type; + int ret, cmd_id; + virtCCAMigHdr hdr; + + while (should_continue) { + if (should_continue && qemu_peek_le16(f, sizeof(hdr)) == 0) { + continue; + } + qemu_get_buffer(f, (uint8_t *)&hdr, sizeof(hdr)); + mbmd_bytes = qemu_peek_le16(f, 0); + qemu_get_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes); + mbmd_type = virtcca_mig_stream_get_mbmd_type(stream); + + buf_list_num = hdr.buf_list_num; + buf_list_bytes = buf_list_num * TARGET_PAGE_SIZE; + if (buf_list_num) { + qemu_get_buffer(f, (uint8_t *)stream->buf_list, buf_list_bytes); + } + + switch (mbmd_type) { + case KVM_VIRTCCA_MIG_MBMD_TYPE_IMMUTABLE_STATE: + cmd_id = KVM_CVM_MIG_IMPORT_STATE_IMMUTABLE; + break; + case KVM_VIRTCCA_MIG_MBMD_TYPE_MEMORY_STATE: + cmd_id = KVM_CVM_MIG_IMPORT_MEM; + mac_list_bytes = buf_list_num * sizeof(Int128); + gpa_list_bytes = buf_list_num * sizeof(GpaListEntry); + qemu_get_buffer(f, (uint8_t *)stream->gpa_list, gpa_list_bytes); + qemu_get_buffer(f, (uint8_t *)stream->mac_list, mac_list_bytes); + break; + case KVM_VIRTCCA_MIG_MBMD_TYPE_EPOCH_TOKEN: + cmd_id = KVM_CVM_MIG_IMPORT_TRACK; + break; + case KVM_VIRTCCA_MIG_MBMD_TYPE_VCPU_STATE: + cmd_id = KVM_CVM_MIG_IMPORT_STATE_TEC; + break; + default: + error_report("%s: unsupported mb_type %d", __func__, mbmd_type); + return -1; + } + + ret = virtcca_mig_stream_ioctl(stream, cmd_id, 0, &buf_list_num); + + if (cmd_id == KVM_CVM_MIG_IMPORT_STATE_IMMUTABLE) { + virtcca_get_mig_info(); + } + + if (ret) { + ret = -1; + if (buf_list_num != 0) { + error_report("%s: buf_list_num=%lx", __func__, buf_list_num); + } + break; + } + should_continue = hdr.flags & CVM_MIG_F_CONTINUE; + } + + return ret; +} + +static int virtcca_mig_create_tec(QEMUFile *f) +{ + return tmm_create_tec(); +} + +void vircca_mig_init(CgsMig *cgs_mig) +{ + cgs_mig->is_ready = virtcca_mig_is_ready; + cgs_mig->savevm_state_setup = virtcca_mig_stream_setup; + cgs_mig->savevm_state_start = virtcca_mig_savevm_state_start; + cgs_mig->savevm_state_ram_start_epoch = + virtcca_mig_savevm_state_ram_start_epoch; + cgs_mig->savevm_state_ram = virtcca_mig_savevm_state_ram; + cgs_mig->savevm_state_pause = virtcca_mig_savevm_state_pause; + cgs_mig->savevm_state_end = virtcca_mig_savevm_state_end; + cgs_mig->savevm_state_cleanup = virtcca_mig_cleanup; + cgs_mig->savevm_state_abort = virtcca_mig_savevm_state_abort; + cgs_mig->loadvm_state_setup = virtcca_mig_stream_setup; + cgs_mig->loadvm_state = virtcca_mig_loadvm_state; + cgs_mig->loadvm_create_tec = virtcca_mig_create_tec; + cgs_mig->loadvm_state_cleanup = virtcca_mig_loadvm_state_cleanup; +} diff --git a/migration/cgs.c b/migration/cgs.c new file mode 100644 index 0000000000000000000000000000000000000000..937cc15f07d5368415620b66c59f1ee24f0d648f --- /dev/null +++ b/migration/cgs.c @@ -0,0 +1,282 @@ +/* + * QEMU Migration for Confidential Guest Support + * + * Copyright (C) 2022 Intel Corp. + * + * Authors: + * Wei Wang + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu-file.h" +#include "sysemu/kvm.h" +#include "savevm.h" +#include "ram.h" +#include "cgs.h" +#include "options.h" + +static CgsMig cgs_mig; + +#define cgs_check_error(f, ret) \ +do { \ + if (ret < 0) { \ + error_report("%s: failed: %s", __func__, strerror(ret)); \ + qemu_file_set_error(f, ret); \ + return ret; \ + } \ +} while (0) + +bool cgs_mig_is_ready(bool is_src, const char *dst_ip, uint16_t dst_port) +{ + /* + * For the legacy VM migration and some vendor specific implementations + * that don't require the check, return true to have the migration flow + * continue. + */ + if (!cgs_mig.is_ready) { + return true; + } + + return cgs_mig.is_ready(is_src, dst_ip, dst_port); +} + +int cgs_mig_savevm_state_setup(QEMUFile *f) +{ + int ret; + uint32_t nr_channels = 1, nr_pages = 1; + + if (!cgs_mig.savevm_state_setup) { + return 0; + } + + if (migrate_multifd()) { + nr_channels = migrate_multifd_channels(); + nr_pages = MULTIFD_PACKET_SIZE / TARGET_PAGE_SIZE; + } else if (migrate_postcopy_preempt()) { + nr_channels = RAM_CHANNEL_MAX; + } + + ret = cgs_mig.savevm_state_setup(nr_channels, nr_pages); + cgs_check_error(f, ret); + + return ret; +} + +int cgs_mig_savevm_state_start(QEMUFile *f) +{ + int ret; + + if (!cgs_mig.savevm_state_start) { + return 0; + } + + qemu_put_byte(f, QEMU_VM_SECTION_CGS_START); + ret = cgs_mig.savevm_state_start(f); + cgs_check_error(f, ret); + /* + * Flush the initial message (i.e. QEMU_VM_SECTION_CGS_START + vendor + * specific data if there is) immediately to have the destinatino side + * kick off the process as soon as possible. + */ + if (!ret) { + qemu_fflush(f); + } + + return ret; +} + +/* Return number of bytes sent or the error value (< 0) */ +long cgs_ram_save_start_epoch(QEMUFile *f) +{ + long ret; + + if (!cgs_mig.savevm_state_ram_start_epoch) { + return 0; + } + + if (migrate_multifd() && !migration_in_postcopy()) { + ret = multifd_send_sync_main(); + if (ret < 0) { + return ret; + } + } + + ram_save_cgs_epoch_header(f); + ret = cgs_mig.savevm_state_ram_start_epoch(f); + cgs_check_error(f, ret); + + /* 8 bytes for the cgs header */ + return ret + 8; +} + +/* Return number of bytes sent or the error value (< 0) */ +long cgs_mig_savevm_state_ram(QEMUFile *f, + RAMBlock *block, ram_addr_t offset, hwaddr gpa) +{ + long hdr_bytes, ret; + uint32_t channel_id; + + if (!cgs_mig.savevm_state_ram) { + return 0; + } + + if (migrate_postcopy_preempt() && migration_in_postcopy()) { + channel_id = RAM_CHANNEL_POSTCOPY; + } else { + channel_id = 0; + } + + hdr_bytes = ram_save_cgs_ram_header(f, block, offset, false); + ret = cgs_mig.savevm_state_ram(f, channel_id, gpa); + /* + * Returning 0 isn't expected. Either succeed with returning bytes of data + * written to the file or error with a negative error code returned. + */ + assert(ret); + cgs_check_error(f, ret); + + return hdr_bytes + ret; +} + +int cgs_mig_savevm_state_pause(void) +{ + int ret; + + if (!cgs_mig.savevm_state_pause) { + return 0; + } + + ret = cgs_mig.savevm_state_pause(); + return ret; +} + +int cgs_mig_savevm_state_end(QEMUFile *f) +{ + int ret; + + if (!cgs_mig.savevm_state_end) { + return 0; + } + + qemu_put_byte(f, QEMU_VM_SECTION_CGS_END); + ret = cgs_mig.savevm_state_end(f); + cgs_check_error(f, ret); + + return ret; +} + +int cgs_mig_savevm_state_abort(void) +{ + int ret; + + if (!cgs_mig.savevm_state_abort) { + return 0; + } + + ret = cgs_mig.savevm_state_abort(); + if (ret) + error_report("%s: failed: %s", __func__, strerror(ret)); + + return ret; +} + +bool cgs_mig_savevm_state_need_ram_cancel(void) +{ + return !!cgs_mig.savevm_state_ram_cancel; +} + +long cgs_mig_savevm_state_ram_cancel(QEMUFile *f, RAMBlock *block, + ram_addr_t offset, hwaddr gpa) +{ + long hdr_bytes, ret; + + if (!cgs_mig.savevm_state_ram_cancel) { + return 0; + } + + hdr_bytes = ram_save_cgs_ram_header(f, block, offset, true); + ret = cgs_mig.savevm_state_ram_cancel(f, gpa); + cgs_check_error(f, ret); + + return hdr_bytes + ret; +} + +void cgs_mig_savevm_state_cleanup(void) +{ + if (cgs_mig.savevm_state_cleanup) { + cgs_mig.savevm_state_cleanup(); + } +} + +int cgs_mig_loadvm_state_setup(QEMUFile *f) +{ + int ret; + uint32_t nr_channels = 1, nr_pages = 1; + + if (!cgs_mig.loadvm_state_setup) { + return 0; + } + + if (migrate_multifd()) { + nr_channels = migrate_multifd_channels(); + nr_pages = MULTIFD_PACKET_SIZE / TARGET_PAGE_SIZE; + } else if (migrate_postcopy_preempt()) { + nr_channels = RAM_CHANNEL_MAX; + } + + ret = cgs_mig.loadvm_state_setup(nr_channels, nr_pages); + cgs_check_error(f, ret); + + return ret; +} + +int cgs_mig_loadvm_state(QEMUFile *f, uint32_t channel_id) +{ + int ret; + + if (!cgs_mig.loadvm_state) { + return 0; + } + + ret = cgs_mig.loadvm_state(f, channel_id); + cgs_check_error(f, ret); + + return ret; +} + +int cgs_mig_loadvm_create_tec(QEMUFile *f) +{ + info_report("calling cgs_mig_loadvm_create_tec"); + int ret; + + if (!cgs_mig.loadvm_create_tec) { + return 0; + } + + ret = cgs_mig.loadvm_create_tec(f); + cgs_check_error(f, ret); + + return ret; +} + +void cgs_mig_loadvm_state_cleanup(void) +{ + if (cgs_mig.loadvm_state_cleanup) { + cgs_mig.loadvm_state_cleanup(); + } +} + +void cgs_mig_init(void) +{ + if (virtcca_cvm_allowed) + { + info_report("INFO: cgs_mig_init is setup as the virtcca mode\n"); + vircca_mig_init(&cgs_mig); + } + return; +} \ No newline at end of file diff --git a/migration/cgs.h b/migration/cgs.h new file mode 100644 index 0000000000000000000000000000000000000000..8d76f530f75151e223bd2875dd83264de5b7b0be --- /dev/null +++ b/migration/cgs.h @@ -0,0 +1,104 @@ +/* + * QEMU Migration for Confidential Guest Support + * + * Copyright (C) 2022 Intel Corp. + * + * Authors: + * Wei Wang + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_MIGRATION_CGS_H +#define QEMU_MIGRATION_CGS_H +#include +#include "qemu/osdep.h" +#include "migration.h" +#include "multifd.h" + +#define CGS_PRIVATE_GPA_INVALID (~0UL) +#define UEFI_MAX_SIZE 0x8000000 + +typedef struct CgsMig { + bool (*is_ready)(bool is_src, const char *dst_ip, uint16_t dst_port); + int (*savevm_state_setup)(uint32_t nr_channels, uint32_t nr_pages); + int (*savevm_state_start)(QEMUFile *f); + long (*savevm_state_ram_start_epoch)(QEMUFile *f); + long (*savevm_state_ram)(QEMUFile *f, uint32_t channel_id, hwaddr gpa); + int (*savevm_state_pause)(void); + int (*savevm_state_end)(QEMUFile *f); + int (*savevm_state_abort)(void); + long (*savevm_state_ram_cancel)(QEMUFile *f, hwaddr gpa); + void (*savevm_state_cleanup)(void); + int (*loadvm_state_setup)(uint32_t nr_channels, uint32_t nr_pages); + int (*loadvm_state)(QEMUFile *f, uint32_t channel_id); + int (*loadvm_create_tec)(QEMUFile *f); + void (*loadvm_state_cleanup)(void); + /* Multifd support */ + uint32_t (*iov_num)(uint32_t page_batch_num); + int (*multifd_send_prepare)(MultiFDSendParams *p, Error **errp); + int (*multifd_recv_pages)(MultiFDRecvParams *p, Error **errp); +} CgsMig; + +int virtcca_mig_crc(void); +bool cgs_mig_is_ready(bool is_src, const char *dst_ip, uint16_t dst_port); +int cgs_mig_savevm_state_setup(QEMUFile *f); +int cgs_mig_savevm_state_start(QEMUFile *f); +long cgs_ram_save_start_epoch(QEMUFile *f); +long cgs_mig_savevm_state_ram(QEMUFile *f, + RAMBlock *block, ram_addr_t offset, hwaddr gpa); +bool cgs_mig_savevm_state_need_ram_cancel(void); +long cgs_mig_savevm_state_ram_cancel(QEMUFile *f, RAMBlock *block, + ram_addr_t offset, hwaddr gpa); +int cgs_mig_savevm_state_pause(void); +int cgs_mig_savevm_state_end(QEMUFile *f); +int cgs_mig_savevm_state_abort(void); +void cgs_mig_savevm_state_cleanup(void); +int cgs_mig_loadvm_state_setup(QEMUFile *f); +int cgs_mig_loadvm_state(QEMUFile *f, uint32_t channel_id); +int cgs_mig_loadvm_create_tec(QEMUFile *f); +void cgs_mig_loadvm_state_cleanup(void); +int cgs_mig_multifd_send_prepare(MultiFDSendParams *p, Error **errp); +int cgs_mig_multifd_recv_pages(MultiFDRecvParams *p, Error **errp); +uint32_t cgs_mig_iov_num(uint32_t page_batch_num); +void cgs_mig_init(void); +int virtcca_import_zero_page(uint32_t channel_id, void *host); +void vircca_mig_init(CgsMig *cgs_mig); +bool virtcca_is_swiotlb(void *host); +int virtcca_check_mig_mem(bool is_src); +int get_migration_result_from_sysfs(bool is_src); +int write_numa_info_to_sysfs(struct kvm_numa_info *numa_info, bool is_src); +int append_number_to_string(char *result, size_t *current_len, uint64_t number, uint64_t node_mask); + +bool virtcca_is_zero_page(uint32_t channel_id, hwaddr cgs_private_gpa, size_t len); + +typedef struct virtCCAMigInfo { + uint64_t swiotlb_start; + uint64_t swiotlb_end; +} virtCCAMigInfo; + +typedef struct virtCCAMigStream { + int fd; + void *mbmd; + void *buf_list; + void *mac_list; + void *gpa_list; +} virtCCAMigStream; + +typedef struct virtCCAMigState { + uint32_t nr_streams; + virtCCAMigStream *streams; + uint64_t swiotlb_start; + uint64_t swiotlb_end; +} virtCCAMigState; + +extern virtCCAMigState virtCCA_mig; + +struct mig_cvm { + /* used by guest cvm */ + uint8_t version; /* kvm version of migcvm */ + uint64_t migvm_cid; /* hash of migcvm, from and used by guest cvm */ +}; +#endif diff --git a/migration/meson.build b/migration/meson.build index ec8cb1a7e914a3db9fb8ae1f9d1107f078f5a8b2..be45a1e4f3fb0f8809fa6433a76185df1ba7d10b 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -43,6 +43,11 @@ if get_option('replication').allowed() system_ss.add(files('colo-failover.c', 'colo.c')) endif +if have_virtcca_migration + specific_ss.add(files('cgs.c', + 'cgs-virtcca.c')) +endif + system_ss.add(when: rdma, if_true: files('rdma.c')) if get_option('live_block_migration').allowed() system_ss.add(files('block.c')) diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index 9abbb38690862e35b4f9660b18cbc25a49c5597e..91c3ef7093e335473c1be4cd42774278e2c3fef1 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -148,6 +148,17 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) "Zero-copy-send fallbacks happened: %" PRIu64 " times\n", info->ram->dirty_sync_missed_zero_copy); } + if (virtcca_cvm_enabled()) { + /* Add cgs info */ + if (info->ram->cgs_epochs) { + monitor_printf(mon, "cgs epochs: %" PRIu64 "\n", + info->ram->cgs_epochs); + } + if (info->ram->cgs_private_pages) { + monitor_printf(mon, "cgs private-pages: %" PRIu64 "\n", + info->ram->cgs_private_pages); + } + } } if (info->disk) { diff --git a/migration/migration-stats.h b/migration/migration-stats.h index 24911ce067d757bf03f28d8fbf38463f99784dfa..e03218f8563e12b799504381a7ebe7002226d74a 100644 --- a/migration/migration-stats.h +++ b/migration/migration-stats.h @@ -103,8 +103,15 @@ typedef struct { Stat64 urma_bytes; /* * Number of pages transferred that were full of zeros. + * the old one is ram_counters.duplicate */ Stat64 zero_pages; + + /* count the cgs pages */ + Stat64 cgs_private_pages; + + /* count the cgs epochs */ + Stat64 cgs_epochs; } MigrationAtomicStats; extern MigrationAtomicStats mig_stats; diff --git a/migration/migration.c b/migration/migration.c index 38ef47b2442adb9dd02a24a94ee3fb6df0ad2861..9973880fe736bfb76e672b0996d9b9446e73846d 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -22,6 +22,7 @@ #include "fd.h" #include "file.h" #include "socket.h" +#include "sysemu/kvm.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" #include "sysemu/cpu-throttle.h" @@ -74,6 +75,9 @@ #ifdef CONFIG_HAM_MIGRATION #include "ham.h" #endif +#ifdef CONFIG_VIRTCCA_MIGRATION +#include "cgs.h" +#endif #define DEFAULT_FD_MAX 4096 static NotifierList migration_state_notifiers = @@ -213,6 +217,12 @@ void migration_object_init(void) blk_mig_init(); ram_mig_init(); dirty_bitmap_mig_init(); +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + /* init the virtcca cgs mig */ + cgs_mig_init(); + } +#endif } void migration_cancel(const Error *error) @@ -1142,6 +1152,13 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) stat64_get(&mig_stats.dirty_sync_count); info->ram->dirty_sync_missed_zero_copy = stat64_get(&mig_stats.dirty_sync_missed_zero_copy); +#ifdef CONFIG_VIRTCCA_MIGRATION + /* init the cgs epoch and private pages value */ + if (virtcca_cvm_enabled()) { + info->ram->cgs_epochs = stat64_get(&mig_stats.cgs_epochs); + info->ram->cgs_private_pages = stat64_get(&mig_stats.cgs_private_pages); + } +#endif info->ram->postcopy_requests = stat64_get(&mig_stats.postcopy_requests); info->ram->page_size = page_size; @@ -1933,10 +1950,16 @@ bool migration_is_blocked(Error **errp) /* Returns true if continue to migrate, or false if error detected */ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - bool resume, Error **errp) + bool resume, const char *dst_ip, uint16_t dst_port, Error **errp) { Error *local_err = NULL; +#ifdef CONFIG_VIRTCCA_MIGRATION + if (!cgs_mig_is_ready(true, dst_ip, dst_port) && virtcca_cvm_enabled()) { + error_setg(errp, "cgs mig required, but not ready"); + return false; + } +#endif if (blk_inc) { warn_report("parameter 'inc' is deprecated;" " use blockdev-mirror with NBD instead"); @@ -2034,7 +2057,7 @@ void qmp_migrate(const char *uri, bool has_channels, MigrationState *s = migrate_get_current(); g_autoptr(MigrationChannel) channel = NULL; MigrationAddress *addr = NULL; - + const char *dst_ip = ""; /* * Having preliminary checks for uri and channel */ @@ -2066,10 +2089,17 @@ void qmp_migrate(const char *uri, bool has_channels, if (!migration_channels_and_transport_compatible(addr, errp)) { return; } - +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled() && addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { + SocketAddress *saddr = &addr->u.socket; + if (saddr->type == SOCKET_ADDRESS_TYPE_INET) { + dst_ip = saddr->u.inet.host; + } + } +#endif resume_requested = has_resume && resume; if (!migrate_prepare(s, has_blk && blk, has_inc && inc, - resume_requested, errp)) { + resume_requested, dst_ip, 0, errp)) { /* Error detected, put into errp */ return; } @@ -2677,6 +2707,7 @@ static int migration_maybe_pause(MigrationState *s, int *current_active_state, int new_state) { + if (!migrate_pause_before_switchover()) { return 0; } @@ -2709,6 +2740,7 @@ static int migration_maybe_pause(MigrationState *s, } return s->state == new_state ? 0 : -EINVAL; + } static int migration_completion_precopy(MigrationState *s, @@ -2734,7 +2766,14 @@ static int migration_completion_precopy(MigrationState *s, if (ret < 0) { goto out_unlock; } - +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + ret = cgs_mig_savevm_state_pause(); + if (ret) { + goto out_unlock; + } + } +#endif /* * Inactivate disks except in COLO, and track that we have done so in order * to remember to reactivate them if migration fails or is cancelled. @@ -2743,6 +2782,11 @@ static int migration_completion_precopy(MigrationState *s, migration_rate_set(RATE_LIMIT_DISABLED); ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, s->block_inactive); +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + virtcca_mig_crc(); + } +#endif out_unlock: qemu_mutex_unlock_iothread(); return ret; @@ -3256,6 +3300,13 @@ static void migration_iteration_finish(MigrationState *s) case MIGRATION_STATUS_FAILED: case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_CANCELLING: +#ifdef CONFIG_VIRTCCA_MIGRATION + /* Cancel the current cgs migration */ + if (virtcca_cvm_enabled()) { + info_report("live migrate failed, goto abort data"); + cgs_mig_savevm_state_abort(); + } +#endif if (s->vm_old_state == RUN_STATE_RUNNING) { if (!runstate_check(RUN_STATE_SHUTDOWN)) { vm_start(); diff --git a/migration/multifd.c b/migration/multifd.c index 4c310deb61b080bf91a7eee6321bcfc1d2ac1c34..56d0e065b5d81cec2bce0a1fac47ec1625c49dc0 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -1268,7 +1268,7 @@ void multifd_recv_cleanup(void) multifd_recv_cleanup_state(); } -void multifd_recv_sync_main(void) +void multifd_recv_barrier(void) { int thread_count = migrate_multifd_channels(); int i; @@ -1286,6 +1286,16 @@ void multifd_recv_sync_main(void) trace_multifd_recv_sync_main_wait(i); qemu_sem_wait(&multifd_recv_state->sem_sync); } +} + +void multifd_recv_unbarrier(void) +{ + int thread_count = migrate_multifd_channels(); + int i; + + if (!migrate_multifd() || !multifd_use_packets()) { + return; + } /* * Sync done. Release the channels for the next iteration. @@ -1304,6 +1314,12 @@ void multifd_recv_sync_main(void) trace_multifd_recv_sync_main(multifd_recv_state->packet_num); } +void multifd_recv_sync_main(void) +{ + multifd_recv_barrier(); + multifd_recv_unbarrier(); +} + static void *multifd_recv_thread(void *opaque) { MultiFDRecvParams *p = opaque; diff --git a/migration/multifd.h b/migration/multifd.h index 57c13347881eb557f1dcb032a6674cb3ca0e8036..23f4595781d3875df77697875dd2e6cc988fd661 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -22,6 +22,8 @@ void multifd_recv_cleanup(void); void multifd_recv_shutdown(void); bool multifd_recv_all_channels_created(void); void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); +void multifd_recv_barrier(void); +void multifd_recv_unbarrier(void); void multifd_recv_sync_main(void); int multifd_send_sync_main(void); bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); diff --git a/migration/qemu-file.c b/migration/qemu-file.c index bd1dbc3db175ff75e08ef65fac7978ac802dafb7..63c8c77a3a6c814f5f960620dff41126aac443e9 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -600,6 +600,16 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset) return f->buf[index]; } +/* Peeks 2 bytes in little endian from the buffer */ +int qemu_peek_le16(QEMUFile *f, int offset) +{ + int v; + v = qemu_peek_byte(f, offset + 1) << 8; + v |= qemu_peek_byte(f, offset); + + return v; +} + int coroutine_mixed_fn qemu_get_byte(QEMUFile *f) { int result; diff --git a/migration/qemu-file.h b/migration/qemu-file.h index 8afa95732bc8b4ba59792c599c699d95585c9448..ca5a116295569c368f47c33053354c757e2a97a1 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -65,6 +65,7 @@ bool qemu_file_buffer_empty(QEMUFile *file); * previously peeked +n-1. */ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset); +int qemu_peek_le16(QEMUFile *f, int offset); void qemu_file_skip(QEMUFile *f, int size); int qemu_file_get_error_obj_any(QEMUFile *f1, QEMUFile *f2, Error **errp); void qemu_file_set_error_obj(QEMUFile *f, int ret, Error *err); diff --git a/migration/ram.c b/migration/ram.c index 6422b3e4b6c37b6069fe96d75c7aa74cc00c49b5..c05802f9ff08a395652ae4e9c3547361d116ffb5 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -68,6 +68,9 @@ #ifdef CONFIG_HAM_MIGRATION #include "ham.h" #endif +#ifdef CONFIG_VIRTCCA_MIGRATION +#include "cgs.h" +#endif /* Defines RAM_SAVE_ENCRYPTED_PAGE and RAM_SAVE_SHARED_REGION_LIST */ #include "target/i386/sev.h" #include "target/i386/csv.h" @@ -107,6 +110,14 @@ #define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 #define RAM_SAVE_FLAG_ENCRYPTED_DATA 0x400 +/* Current migration flag conflicts with the nvm. + * To switch between nvm and cvm migrations and avoid conflicts, + * you can use the flag commented below. + */ +#define RAM_SAVE_FLAG_CGS_EPOCH 0x100 +#define RAM_SAVE_FLAG_CGS_STATE 0x400 +#define RAM_SAVE_FLAG_CGS_STATE_CANCEL 0x800 + bool memcrypt_enabled(void) { MachineState *ms = MACHINE(qdev_get_machine()); @@ -130,6 +141,10 @@ struct PageSearchStatus { RAMBlock *block; /* Current page to search from */ unsigned long page; +#ifdef CONFIG_VIRTCCA_MIGRATION + /* Guest-physical address of the current page if it is private */ + hwaddr cgs_private_gpa; +#endif /* Set once we wrap around */ bool complete_round; /* Whether we're sending a host page */ @@ -395,6 +410,8 @@ struct RAMState { bool xbzrle_started; /* Are we on the last stage of migration */ bool last_stage; + /* Used by cgs migration and set to request for the start of a new epoch */ + bool cgs_start_epoch; /* total handled target pages at the beginning of period */ uint64_t target_page_count_prev; @@ -491,6 +508,11 @@ static void pss_init(PageSearchStatus *pss, RAMBlock *rb, ram_addr_t page) pss->block = rb; pss->page = page; pss->complete_round = false; +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + pss->cgs_private_gpa = CGS_PRIVATE_GPA_INVALID; + } +#endif } /* @@ -708,6 +730,42 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss, return 1; } +#ifdef CONFIG_VIRTCCA_MIGRATION +static hwaddr virtcca_ram_get_private_gpa(RAMBlock *rb, unsigned long page) +{ + int ret; + ram_addr_t offset = ((ram_addr_t)page) << TARGET_PAGE_BITS; + hwaddr gpa; + + /* ROM devices contain unencrypted data */ + if (migrate_ram_is_ignored(rb) || + memory_region_is_romd(rb->mr) || + memory_region_is_rom(rb->mr) || + !memory_region_is_ram(rb->mr)) { + return CGS_PRIVATE_GPA_INVALID; + } + + if (offset >= rb->used_length) { + return CGS_PRIVATE_GPA_INVALID; + } + + ret = kvm_physical_memory_addr_from_host(kvm_state, rb->host + offset, + &gpa); + if (!ret) { + error_report("failed to finf gpa, page=%lx", page); + return CGS_PRIVATE_GPA_INVALID; + } + + /* Check if the GPA is within the range of the CVM RAM */ + if ((gpa >= UEFI_MAX_SIZE && gpa < virtcca_cvm_gpa_start) || + gpa >= virtcca_cvm_gpa_start + virtcca_cvm_ram_size) { + return CGS_PRIVATE_GPA_INVALID; + } + + return gpa; +} +#endif + /** * pss_find_next_dirty: find the next dirty page of current ramblock * @@ -741,6 +799,12 @@ static void pss_find_next_dirty(PageSearchStatus *pss) } pss->page = find_next_bit(bitmap, size, pss->page); + +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + pss->cgs_private_gpa = virtcca_ram_get_private_gpa(pss->block, pss->page); + } +#endif } static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb, @@ -1316,7 +1380,7 @@ static int ram_save_shared_region_list(RAMState *rs, QEMUFile *f) ConfidentialGuestSupportClass *cgs_class = (ConfidentialGuestSupportClass *) object_get_class(OBJECT(ms->cgs)); struct ConfidentialGuestMemoryEncryptionOps *ops = - cgs_class->memory_encryption_ops; + cgs_class->memory_encryption_ops; ram_transferred_add(save_page_header(pss, f, pss->last_sent_block, @@ -1520,6 +1584,11 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) { /* Didn't find anything in this RAM Block */ pss->page = 0; +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + pss->cgs_private_gpa = virtcca_ram_get_private_gpa(pss->block, pss->page); + } +#endif pss->block = QLIST_NEXT_RCU(pss->block, next); if (!pss->block) { if (migrate_multifd() && @@ -1547,6 +1616,11 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) pss->block = QLIST_FIRST_RCU(&ram_list.blocks); /* Flag that we've looped */ pss->complete_round = true; +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + rs->cgs_start_epoch = true; + } +#endif /* After the first round, enable XBZRLE. */ if (migrate_xbzrle()) { rs->xbzrle_started = true; @@ -2042,7 +2116,11 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) */ pss->block = block; pss->page = offset >> TARGET_PAGE_BITS; - +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + pss->cgs_private_gpa = virtcca_ram_get_private_gpa(pss->block, pss->page); + } +#endif /* * This unqueued page would break the "one round" check, even is * really rare. @@ -2273,6 +2351,124 @@ static bool encrypted_test_list(RAMState *rs, RAMBlock *block, return ops->is_gfn_in_unshared_region(gfn); } +#ifdef CONFIG_VIRTCCA_MIGRATION +static int ram_save_cgs_private_page(RAMState *rs, + PageSearchStatus *pss, bool cancel) +{ + RAMBlock *block = pss->block; + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; + long res; + + if (cancel) { + res = cgs_mig_savevm_state_ram_cancel(pss->pss_channel, block, offset, + pss->cgs_private_gpa); + } else { + res = cgs_mig_savevm_state_ram(pss->pss_channel, + block, offset, pss->cgs_private_gpa); + } + if (res > 0) { + stat64_add(&mig_stats.qemu_file_transferred, res); + stat64_add(&mig_stats.cgs_private_pages, 1); + } else { + /* Return the negative error code */ + return res; + } + + /* Return the number of pages (i.e. 1) succeeded to be saved/cancelled */ + return 1; +} + +static int virtcca_save_zero_page(RAMState *rs, PageSearchStatus *pss, + ram_addr_t offset) +{ + QEMUFile *file = pss->pss_channel; + int len = 0; + + if (!virtcca_is_zero_page(0, pss->cgs_private_gpa, TARGET_PAGE_SIZE)) { + return 0; + } + + len += save_page_header(pss, file, pss->block, offset | RAM_SAVE_FLAG_ZERO); + qemu_put_byte(file, 0); + len += 1; + ram_release_page(pss->block->idstr, offset); + + stat64_add(&mig_stats.zero_pages, 1); + ram_transferred_add(len); + + return len; +} + + +static int virtcca_save_target_page(ram_addr_t offset, RAMState *rs, PageSearchStatus *pss) +{ + if ((pss->cgs_private_gpa < virtCCA_mig.swiotlb_start || pss->cgs_private_gpa >= virtCCA_mig.swiotlb_end) && pss->cgs_private_gpa != CGS_PRIVATE_GPA_INVALID) { + if (virtcca_save_zero_page(rs, pss, offset)) { + return 1; + } + return ram_save_cgs_private_page(rs, pss, false); + } + + if (pss->cgs_private_gpa >= virtCCA_mig.swiotlb_start && pss->cgs_private_gpa < virtCCA_mig.swiotlb_end) { + if (save_zero_page(rs, pss, offset)) { + return 1; + } + return ram_save_page(rs, pss); + } + + return 1; +} + +bool virtcca_is_swiotlb(void *host) +{ + hwaddr cgs_private_gpa; + + if (!kvm_physical_memory_addr_from_host(kvm_state, host, + &cgs_private_gpa)) + return false; + + if (cgs_private_gpa >= virtCCA_mig.swiotlb_start && cgs_private_gpa < virtCCA_mig.swiotlb_end) { + return true; + } + + return false; +} + + +/* enable the CGS state in the ram header and the CGS epoch in the ram header */ +size_t ram_save_cgs_ram_header(QEMUFile *f, RAMBlock *block, + ram_addr_t offset, bool cancel) +{ + uint64_t flags = cancel ? RAM_SAVE_FLAG_CGS_STATE_CANCEL : + RAM_SAVE_FLAG_CGS_STATE; + + return save_page_header(ram_state->pss, f, block, offset | flags); +} + +/* start the CGS epoch */ +void ram_save_cgs_epoch_header(QEMUFile *f) +{ + qemu_put_be64(f, RAM_SAVE_FLAG_CGS_EPOCH); +} + +static int ram_save_cgs_start_epoch(RAMState *rs, PageSearchStatus *pss) +{ + long res; + QEMUFile *f = pss->pss_channel; + + res = cgs_ram_save_start_epoch(f); + if (res < 0) { + return (int)res; + } else if (res > 0) { + stat64_add(&mig_stats.qemu_file_transferred, res); + stat64_add(&mig_stats.cgs_epochs, 1); + rs->cgs_start_epoch = false; + } + + return 0; +} +#endif + /** * ram_save_target_page_legacy: save one target page * @@ -2286,6 +2482,12 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; int res; +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + return virtcca_save_target_page(offset, rs, pss); + } +#endif + if (control_save_page(pss, offset, &res)) { return res; } @@ -2710,6 +2912,19 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) goto completed; } #endif + +#ifdef CONFIG_VIRTCCA_MIGRATION + /* + * If the migration is not in postcopy mode, prepare the epoch of cgs pages + * to be sent. + */ + if (virtcca_cvm_enabled()) { + if (!migration_in_postcopy() && rs->cgs_start_epoch) { + ram_save_cgs_start_epoch(rs, pss); + } + } +#endif + do { page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page); @@ -2792,6 +3007,7 @@ static int ram_find_and_save_block(RAMState *rs) rs->last_page = 0; } + /* init the cgs_private_gpa of pss */ pss_init(pss, rs->last_seen_block, rs->last_page); while (true){ @@ -2936,6 +3152,11 @@ static void ram_state_reset(RAMState *rs) rs->last_page = 0; rs->last_version = ram_list.version; rs->xbzrle_started = false; +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + rs->cgs_start_epoch = true; + } +#endif } #define MAX_WAIT 50 /* ms, half buffered_file limit */ @@ -3237,6 +3458,7 @@ static int ram_state_init(RAMState **rsp) qemu_mutex_init(&(*rsp)->src_page_req_mutex); QSIMPLEQ_INIT(&(*rsp)->src_page_requests); (*rsp)->ram_bytes_total = ram_bytes_total(); + info_report("Total RAM: %lx bytes", (*rsp)->ram_bytes_total); /* * Count the total number of pages used by ram blocks not including any @@ -4196,7 +4418,7 @@ int ram_load_postcopy(QEMUFile *f, int channel) trace_ram_load_postcopy_loop(channel, (uint64_t)addr, flags); if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | - RAM_SAVE_FLAG_COMPRESS_PAGE)) { + RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_CGS_STATE)) { block = ram_block_from_stream(mis, f, flags, channel); if (!block) { ret = -EINVAL; @@ -4487,7 +4709,6 @@ static int parse_ramblocks(QEMUFile *f, ram_addr_t total_ram_bytes) * * Called in precopy mode by ram_load(). * rcu_read_lock is taken prior to this being called. - * * @f: QEMUFile where to send the data */ static int ram_load_precopy(QEMUFile *f) @@ -4582,6 +4803,7 @@ static int ram_load_precopy(QEMUFile *f) ret = -EINVAL; break; } + ram_handle_zero(host, TARGET_PAGE_SIZE); break; @@ -4607,6 +4829,7 @@ static int ram_load_precopy(QEMUFile *f) break; } break; + case RAM_SAVE_FLAG_MULTIFD_FLUSH: multifd_recv_sync_main(); break; @@ -4645,6 +4868,134 @@ static int ram_load_precopy(QEMUFile *f) return ret; } +#ifdef CONFIG_VIRTCCA_MIGRATION +/** + * virtcca_ram_load_precopy: load pages in precopy case + * + * Returns 0 for success or -errno in case of error + * + * Called in precopy mode by ram_load(). + * rcu_read_lock is taken prior to this being called. + * @f: QEMUFile where to send the data + */ +static int virtcca_ram_load_precopy(QEMUFile *f) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); + int flags = 0, ret = 0, i = 0; + + while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { + ram_addr_t addr; + void *host = NULL, *host_bak = NULL; + bool need_sync = false; + uint8_t ch; + + /* + * Yield periodically to let main loop run, but an iteration of + * the main loop is expensive, so do it each some iterations + */ + if ((i & 32767) == 0 && qemu_in_coroutine()) { + aio_co_schedule(qemu_get_current_aio_context(), + qemu_coroutine_self()); + qemu_coroutine_yield(); + } + i++; + + addr = qemu_get_be64(f); + flags = addr & ~TARGET_PAGE_MASK; + addr &= TARGET_PAGE_MASK; + + if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE + | RAM_SAVE_FLAG_CGS_STATE)) { + RAMBlock *block = ram_block_from_stream(mis, f, flags, + RAM_CHANNEL_PRECOPY); + + host = host_from_ram_block_offset(block, addr); + + if (!host) { + error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); + ret = -EINVAL; + break; + } + ramblock_recv_bitmap_set(block, host); + + trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); + } + + switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { + case RAM_SAVE_FLAG_MEM_SIZE: + ret = parse_ramblocks(f, addr); + break; + + case RAM_SAVE_FLAG_ZERO: + ch = qemu_get_byte(f); + if (ch != 0) { + error_report("Found a zero page with value %d", ch); + ret = -EINVAL; + break; + } + if (!virtcca_is_swiotlb(host)) { + virtcca_import_zero_page(0, host); + break; + } + + ram_handle_zero(host, TARGET_PAGE_SIZE); + break; + + case RAM_SAVE_FLAG_PAGE: + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + break; + + case RAM_SAVE_FLAG_CGS_EPOCH: + need_sync = true; + QEMU_FALLTHROUGH; + case RAM_SAVE_FLAG_CGS_STATE: + case RAM_SAVE_FLAG_CGS_STATE_CANCEL: + if (need_sync) { + multifd_recv_barrier(); + } + + if (cgs_mig_loadvm_state(f, 0) < 0) { + error_report(" Failed to load cgs state"); + ret = -EINVAL; + } + + if (need_sync) { + multifd_recv_unbarrier(); + } + break; + case RAM_SAVE_FLAG_MULTIFD_FLUSH: + multifd_recv_sync_main(); + break; + case RAM_SAVE_FLAG_EOS: + /* normal exit */ + if (migrate_multifd() && + migrate_multifd_flush_after_each_section()) { + multifd_recv_sync_main(); + } + break; + case RAM_SAVE_FLAG_HOOK: + ret = rdma_registration_handle(f); + if (ret < 0) { + qemu_file_set_error(f, ret); + } + break; + default: + error_report("Unknown combination of migration flags: 0x%x", flags); + ret = -EINVAL; + } + if (!ret) { + ret = qemu_file_get_error(f); + } + if (!ret && host_bak) { + memcpy(host_bak, host, TARGET_PAGE_SIZE); + } + } + + ret |= wait_for_decompress_done(); + return ret; +} +#endif + static int ram_load(QEMUFile *f, void *opaque, int version_id) { int ret = 0; @@ -4676,7 +5027,22 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) */ ret = ram_load_postcopy(f, RAM_CHANNEL_PRECOPY); } else { - ret = ram_load_precopy(f); + /* + * The RAM_SAVE_FLAG_CGS* flag are only supported in Kunpeng TEE + * virtcca, which are conflict with RAM_SAVE_FLAG_COMPRESS_PAGE + * and RAM_SAVE_FLAG_ENCRYPTED_DATA. + * So, we need to handle them separately. If Kunpeng TEE virtcca + * is not enabled, the packaged flag_switch function should be + * used for normal migration. Otherwise, use flag_switch_cgs for it. + */ + if (!virtcca_cvm_enabled()) { + ret = ram_load_precopy(f); + } +#ifdef CONFIG_VIRTCCA_MIGRATION + else { + ret = virtcca_ram_load_precopy(f); + } +#endif } } trace_ram_load_complete(ret, seq_iter); diff --git a/migration/ram.h b/migration/ram.h index cd263df02662d0d69180c22e0c6950f27f86abea..a177d8502b32b0af4e54f39f4ca915e50fa24d09 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -77,6 +77,12 @@ bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start); void postcopy_preempt_shutdown_file(MigrationState *s); void *postcopy_preempt_thread(void *opaque); +#ifdef CONFIG_VIRTCCA_MIGRATION +/* cgs handlers */ +void ram_save_cgs_epoch_header(QEMUFile *f); +size_t ram_save_cgs_ram_header(QEMUFile *f, RAMBlock *block, + ram_addr_t offset, bool cancel); +#endif /* ram cache */ int colo_init_ram_cache(void); void colo_flush_ram_cache(void); diff --git a/migration/savevm.c b/migration/savevm.c index 4b847060d197c8a91360a11233fd8a485c15bc5a..551574641253c384f53f51c71e14d10a099b3274 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -50,6 +50,7 @@ #include "sysemu/cpus.h" #include "exec/memory.h" #include "exec/target_page.h" +#include "exec/confidential-guest-support.h" #include "trace.h" #include "qemu/iov.h" #include "qemu/job.h" @@ -73,6 +74,9 @@ #ifdef CONFIG_HAM_MIGRATION #include "ham.h" #endif +#ifdef CONFIG_VIRTCCA_MIGRATION +#include "cgs.h" +#endif const unsigned int postcopy_ram_discard_version; @@ -95,6 +99,7 @@ enum qemu_vm_cmd { MIG_CMD_ENABLE_COLO, /* Enable COLO */ MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */ MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */ + MIG_CMD_CGS_MIG_CREATE_TEC, /* Create TEC */ MIG_CMD_MAX }; @@ -1205,6 +1210,18 @@ void qemu_savevm_send_postcopy_run(QEMUFile *f) qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL); } +#ifdef CONFIG_VIRTCCA_MIGRATION +/* Kick the destination to create tec */ +void qemu_savevm_send_create_tec(QEMUFile *f) +{ + if (virtcca_cvm_enabled()) { + info_report("qemu_savevm_send_create_tec send cmd"); + trace_savevm_send_postcopy_run(); + qemu_savevm_command_send(f, MIG_CMD_CGS_MIG_CREATE_TEC, 0, NULL); + } +} +#endif + void qemu_savevm_send_postcopy_resume(QEMUFile *f) { trace_savevm_send_postcopy_resume(); @@ -1315,6 +1332,7 @@ int qemu_savevm_state_prepare(Error **errp) return 0; } +/* for cgs mig, this savevm state func should be cut some func */ void qemu_savevm_state_setup(QEMUFile *f) { MigrationState *ms = migrate_get_current(); @@ -1324,7 +1342,17 @@ void qemu_savevm_state_setup(QEMUFile *f) json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size()); json_writer_start_array(ms->vmdesc, "devices"); +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + if (cgs_mig_savevm_state_setup(f)) { + return; + } + if (cgs_mig_savevm_state_start(f)) { + return; + } + } +#endif trace_savevm_state_setup(); QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (se->vmsd && se->vmsd->early_setup) { @@ -1613,6 +1641,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, bool inactivate_disks) { + info_report("debug : calling qemu_savevm_state_complete_precopy"); int ret; Error *local_err = NULL; bool in_postcopy = migration_in_postcopy(); @@ -1632,6 +1661,17 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, } } +#ifdef CONFIG_VIRTCCA_MIGRATION + /* save the vcpu states */ + if (virtcca_cvm_enabled()) { + qemu_savevm_send_create_tec(f); + ret = cgs_mig_savevm_state_end(f); + if (ret) { + return ret; + } + } +#endif + if (iterable_only) { goto flush; } @@ -1702,6 +1742,9 @@ void qemu_savevm_state_cleanup(void) } trace_savevm_state_cleanup(); +#ifdef CONFIG_VIRTCCA_MIGRATION + cgs_mig_savevm_state_cleanup(); +#endif QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (se->ops && se->ops->save_cleanup) { se->ops->save_cleanup(se->opaque); @@ -1767,7 +1810,9 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) void qemu_savevm_live_state(QEMUFile *f) { /* save QEMU_VM_SECTION_END section */ +#ifdef CONFIG_VIRTCCA_MIGRATION qemu_savevm_state_complete_precopy(f, true, false); +#endif qemu_put_byte(f, QEMU_VM_EOF); } @@ -2504,7 +2549,10 @@ static int loadvm_process_command(QEMUFile *f) case MIG_CMD_POSTCOPY_RUN: return loadvm_postcopy_handle_run(mis); - +#ifdef CONFIG_VIRTCCA_MIGRATION + case MIG_CMD_CGS_MIG_CREATE_TEC: + return cgs_mig_loadvm_create_tec(f); +#endif case MIG_CMD_POSTCOPY_RAM_DISCARD: return loadvm_postcopy_ram_handle_discard(mis, len); @@ -2757,6 +2805,14 @@ static int qemu_loadvm_state_setup(QEMUFile *f) int ret; trace_loadvm_state_setup(); +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + ret = cgs_mig_loadvm_state_setup(f); /* virtcca mig embed point */ + if (ret) { + return ret; + } + } +#endif QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || !se->ops->load_setup) { continue; @@ -2787,6 +2843,9 @@ void qemu_loadvm_state_cleanup(void) SaveStateEntry *se; trace_loadvm_state_cleanup(); +#ifdef CONFIG_VIRTCCA_MIGRATION + cgs_mig_loadvm_state_cleanup(); +#endif QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (se->ops && se->ops->load_cleanup) { se->ops->load_cleanup(se->opaque); @@ -2870,9 +2929,11 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) uint8_t section_type; int ret = 0; - if (qemu_mutex_iothread_locked()) { +#ifdef CONFIG_VIRTCCA_MIGRATION + if (!virtcca_cvm_enabled() && qemu_mutex_iothread_locked()) { memory_region_transaction_begin(); } +#endif retry: while (true) { @@ -2899,6 +2960,14 @@ retry: goto out; } break; +#ifdef CONFIG_VIRTCCA_MIGRATION + case QEMU_VM_SECTION_CGS_START: + case QEMU_VM_SECTION_CGS_END: + if (virtcca_cvm_enabled()) { + ret = cgs_mig_loadvm_state(f, 0); + } + break; +#endif case QEMU_VM_COMMAND: ret = loadvm_process_command(f); trace_qemu_loadvm_state_section_command(ret); @@ -2907,6 +2976,11 @@ retry: } break; case QEMU_VM_EOF: +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + virtcca_mig_crc(); + } +#endif /* This is the end of migration */ goto out; default: @@ -2917,9 +2991,11 @@ retry: } out: - if (qemu_mutex_iothread_locked()) { +#ifdef CONFIG_VIRTCCA_MIGRATION + if (!virtcca_cvm_enabled() && qemu_mutex_iothread_locked()) { memory_region_transaction_commit(); } +#endif if (ret < 0) { qemu_file_set_error(f, ret); @@ -3067,11 +3143,6 @@ int qemu_loadvm_approve_switchover(void) bool save_snapshot(const char *name, bool overwrite, const char *vmstate, bool has_devices, strList *devices, Error **errp) { - if (virtcca_cvm_enabled()) { - error_setg(errp, "The savevm command is temporarily unsupported in cvm."); - return false; - } - BlockDriverState *bs; QEMUSnapshotInfo sn1, *sn = &sn1; int ret = -1, ret2; @@ -3083,6 +3154,13 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, GLOBAL_STATE_CODE(); +#ifdef CONFIG_VIRTCCA_MIGRATION + if (virtcca_cvm_enabled()) { + error_setg(errp, "The savevm command is temporarily unsupported in cvm."); + return false; + } +#endif + if (migration_is_blocked(errp)) { return false; } diff --git a/migration/savevm.h b/migration/savevm.h index 74669733dd63a080b765866c703234a5c4939223..4487034bd22fd2c25f7bbee6c0730ae2959d3fe7 100644 --- a/migration/savevm.h +++ b/migration/savevm.h @@ -27,6 +27,8 @@ #define QEMU_VM_VMDESCRIPTION 0x06 #define QEMU_VM_CONFIGURATION 0x07 #define QEMU_VM_COMMAND 0x08 +#define QEMU_VM_SECTION_CGS_START 0x09 +#define QEMU_VM_SECTION_CGS_END 0x0a #define QEMU_VM_SECTION_FOOTER 0x7e bool qemu_savevm_state_blocked(Error **errp); @@ -39,6 +41,7 @@ void qemu_savevm_state_header(QEMUFile *f); int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy); void qemu_savevm_state_cleanup(void); void qemu_savevm_state_complete_postcopy(QEMUFile *f); +/* add cgs param into qemu_savevm_state_complete_precopy */ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, bool inactivate_disks); void qemu_savevm_state_pending_exact(uint64_t *must_precopy, @@ -51,6 +54,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len); void qemu_savevm_send_postcopy_advise(QEMUFile *f); void qemu_savevm_send_postcopy_listen(QEMUFile *f); void qemu_savevm_send_postcopy_run(QEMUFile *f); +void qemu_savevm_send_create_tec(QEMUFile *f); void qemu_savevm_send_postcopy_resume(QEMUFile *f); void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name); diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c index c0b66f11bf88ab61a8a195ead686acb94a9e1548..5592ed1096d3368a2558d1c00ee4c481026c00a0 100644 --- a/monitor/qmp-cmds.c +++ b/monitor/qmp-cmds.c @@ -51,11 +51,6 @@ void qmp_quit(Error **errp) void qmp_stop(Error **errp) { - if (virtcca_cvm_enabled()) { - error_setg(errp, "The stop command is temporarily unsupported in cvm."); - return; - } - /* if there is a dump in background, we should wait until the dump * finished */ if (qemu_system_dump_in_progress()) { diff --git a/qapi/migration.json b/qapi/migration.json index 6a435f4c745e9658d60b72330694295eda2eb651..b5c66f22b98da0c032863e387b1413969960b747 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -81,7 +81,8 @@ 'multifd-bytes': 'uint64', 'pages-per-second': 'uint64', 'precopy-bytes': 'uint64', 'downtime-bytes': 'uint64', 'postcopy-bytes': 'uint64', - 'dirty-sync-missed-zero-copy': 'uint64' } } + 'dirty-sync-missed-zero-copy': 'uint64', + 'cgs-epochs' : 'uint64', 'cgs-private-pages' : 'uint64'} } ## # @XBZRLECacheStats: diff --git a/qapi/qom.json b/qapi/qom.json index d1fbe2b0a28d8313aa668e3b30e005b0dbe19eed..aa6016b4b823144c65d36079f4b64303bff50279 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -938,6 +938,16 @@ { 'enum': 'TmmGuestMeasurementAlgo', 'data': ['default', 'sha256', 'sha512'] } +## +# @TmmMigVmCap: +# +# Algorithm to use for cvm measurements +# +# Since: FIXME +## +{ 'enum': 'TmmMigVmCap', +'data': ['default', 'migvm'] } + ## # @TmmGuestProperties: # @@ -951,6 +961,8 @@ 'data': { '*sve-vector-length': 'uint32', '*num-pmu-counters': 'uint32', '*kae': 'uint32', + '*virtcca-migration-cap': 'uint32', + '*migvm-cap' : 'TmmMigVmCap', '*measurement-algo': 'TmmGuestMeasurementAlgo' } } ## diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index fd3eecbb5e1a4fdf0abd2fe707ffd3c422ffb63f..b2a92ed18886646393a1d952b26d7f652ed56ee4 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -528,6 +528,8 @@ _meson_option_parse() { --disable-ham-migration) printf "%s" -Dham_migration=disabled ;; --enable-urma-migration) printf "%s" -Durma_migration=enabled ;; --disable-urma-migration) printf "%s" -Durma_migration=disabled ;; + --enable-virtcca-migration) printf "%s" -Dvirtcca_migration=enabled ;; + --disable-virtcca-migration) printf "%s" -Dvirtcca_migration=disabled ;; --enable-usb-redir) printf "%s" -Dusb_redir=enabled ;; --disable-usb-redir) printf "%s" -Dusb_redir=disabled ;; --enable-vde) printf "%s" -Dvde=enabled ;; diff --git a/target/arm/kvm-tmm.c b/target/arm/kvm-tmm.c index d6dc8342c4deafb630024a4fce7a11c8808b3461..cb3b5c2083c31d884f69c1039c81eb98349a40dc 100644 --- a/target/arm/kvm-tmm.c +++ b/target/arm/kvm-tmm.c @@ -16,19 +16,23 @@ #include "migration/blocker.h" #include "qapi/error.h" #include "qapi/qapi-commands-misc-target.h" +#include "qemu/osdep.h" +#include "qom/object.h" #include "qom/object_interfaces.h" #include "sysemu/kvm.h" #include "sysemu/runstate.h" #include "hw/loader.h" #include "linux-headers/asm-arm64/kvm.h" #include - +#ifdef CONFIG_VIRTCCA_MIGRATION +#include "migration/cgs.h" +#endif #define TYPE_TMM_GUEST "tmm-guest" OBJECT_DECLARE_SIMPLE_TYPE(TmmGuest, TMM_GUEST) #define TMM_PAGE_SIZE qemu_real_host_page_size() #define TMM_MAX_PMU_CTRS 0x20 -#define TMM_MAX_CFG 6 +#define TMM_MAX_CFG 8 #define TMM_MEMORY_INFO_SYSFS "/sys/kernel/tmm/memory_info" typedef struct { @@ -37,6 +41,11 @@ typedef struct { hwaddr hpre_addr[KVM_ARM_TMM_MAX_KAE_VF_NUM]; } KaeDeviceInfo; +/* add the migration cap */ +typedef struct { + uint32_t mig_enable; +} MigrationCap; + struct TmmGuest { ConfidentialGuestSupport parent_obj; GSList *ram_regions; @@ -44,6 +53,8 @@ struct TmmGuest { uint32_t sve_vl; uint32_t num_pmu_cntrs; KaeDeviceInfo kae_device_info; + MigrationCap migration_cap; + TmmMigVmCap migvm_cap; }; typedef struct { @@ -55,12 +66,13 @@ typedef struct { } TmmRamRegion; static TmmGuest *tmm_guest; - +bool virtcca_mig_migcvm_allowed = false; + bool kvm_arm_tmm_enabled(void) { return !!tmm_guest; } - + static int tmm_configure_one(TmmGuest *guest, uint32_t cfg, Error **errp) { int ret = 1; @@ -96,6 +108,40 @@ static int tmm_configure_one(TmmGuest *guest, uint32_t cfg, Error **errp) break; case KVM_CAP_ARM_TMM_CFG_DBG: return 0; + case KVM_CAP_ARM_TMM_CFG_MIG: + if (!guest->migration_cap.mig_enable) { + info_report("\n Qemu-KVM:\n\tMigration disabled\n\n"); + return 0; + } + args.mig_src = !runstate_check(RUN_STATE_INMIGRATE); + if (args.mig_src) { + info_report("\n Migration Version: Dev(Live).\n \ + WARNING: you are using Live Migration Version of virtCCA, this is src.\n\n"); + } else { + info_report("\n Migration Version: Dev(Live).\n \ + WARNING: you are using Live Migration Version of virtCCA, this is dest.\n\n"); + } + args.mig_enable = guest->migration_cap.mig_enable ? 1 : 0; + cfg_str = "Migration"; + break; + case KVM_CAP_ARM_TMM_CFG_MIG_CVM: + if (!guest->migvm_cap) { + return 0; + } + switch (guest->migvm_cap) { + case KVM_CAP_ARM_TMM_MIGVM_DEFAULT: + args.migration_migvm_cap = KVM_CAP_ARM_TMM_MIGVM_DEFAULT; + break; + case KVM_CAP_ARM_TMM_MIGVM_ENABLE: + args.migration_migvm_cap = KVM_CAP_ARM_TMM_MIGVM_ENABLE; + info_report("Migration Version: the migvm is enabled \n\n"); + virtcca_mig_migcvm_allowed = true; + break; + default: + g_assert_not_reached(); + } + cfg_str = "migvm enabled"; + break; case KVM_CAP_ARM_TMM_CFG_PMU: if (!guest->num_pmu_cntrs) { return 0; @@ -123,7 +169,7 @@ static int tmm_configure_one(TmmGuest *guest, uint32_t cfg, Error **errp) if (ret) { error_setg_errno(errp, -ret, "TMM: failed to configure %s", cfg_str); } - + return ret; } @@ -187,10 +233,156 @@ static int tmm_create_rd(Error **errp) return ret; } +int tmm_create_tec(void) +{ + CPUState *cs; + int ret = 0; + + CPU_FOREACH(cs) { + ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_REC); + if (ret) { + error_report("TMM: failed to finalize vCPU: %s", strerror(-ret)); + return ret; + } + } + return ret; +} + +#ifdef CONFIG_VIRTCCA_MIGRATION +static int virtcca_save_migvm_cid(uint64_t cid) +{ + info_report("calling virtcca_binding_with_migcvm_pid"); + struct kvm_virtcca_mig_cmd cmd; + struct mig_cvm guest_mig_cvm_info; + int ret; + + cmd.id = KVM_CVM_MIGCVM_SET_CID; + cmd.flags = 0; + guest_mig_cvm_info.version = KVM_CVM_MIGVM_VERSION; + guest_mig_cvm_info.migvm_cid = cid; /* vsock cid of migvm */ + cmd.data = (__u64)(unsigned long)&guest_mig_cvm_info; + + ret = kvm_vm_ioctl(kvm_state, KVM_CVM_MIG_IOCTL, &cmd); + if (ret) { + error_report("failed to bind migcvm: %d", ret); + } + + return ret; +} + +typedef struct search_cid_ctx { + const char *target_type; + Object *result; +} search_cid_ctx_t; + +static int recursive_search_cb(Object *obj, void *opaque) +{ + search_cid_ctx_t *ctx = (search_cid_ctx_t *)opaque; + const char *obj_type = object_get_typename(obj); + + if (!ctx->result && strcmp(obj_type, ctx->target_type) == 0) { + ctx->result = obj; + return 1; + } + object_child_foreach(obj, recursive_search_cb, ctx); + return 0; +} + +static Object *find_vsock_backend(Object *vsock_obj) +{ + Error *err = NULL; + Object *backend = object_property_get_link(vsock_obj, "vhost-vsock-device", &err); + + if (!err && backend) { + return backend; + } + error_free(err); + + search_cid_ctx_t ctx = { + .target_type = "vhost-vsock-device", + .result = NULL, + }; + object_child_foreach(vsock_obj, recursive_search_cb, &ctx); + return ctx.result; +} + +static Object *find_vsock_device(Object *root) +{ + const char *vsock_types[] = { + "vhost-vsock-pci", + "virtio-vsock-pci", + NULL + }; + + for (int i = 0; vsock_types[i]; i++) { + search_cid_ctx_t ctx = { + .target_type = vsock_types[i], + .result = NULL, + }; + + object_child_foreach(root, recursive_search_cb, &ctx); + + if (ctx.result) { + info_report("Found VSOCK device of type '%s'", vsock_types[i]); + return ctx.result; + } + } + + return NULL; +} + +static uint64_t parse_migcvm_cid(void) +{ + Error *err = NULL; + uint64_t cid = 0; + info_report("calling parse_migcvm_cid"); + + Object *machine = object_resolve_path("/machine", NULL); + if (!machine) { + error_report("Failed to find /machine object"); + return 0; + } + + Object *vsock_obj = find_vsock_device(machine); + if (!vsock_obj) { + error_report("No VSOCK PCI device found"); + return 0; + } + + Object *backend = find_vsock_backend(vsock_obj); + if (!backend) { + error_report("No vhost-vsock-device backend found"); + return 0; + } + + cid = object_property_get_uint(backend, "guest-cid", &err); + if (err) { + error_report_err(err); + return 0; + } + info_report("Detected guest-cid: %" PRIu64, cid); + return cid; +} + +void virtcca_migvm_save_cid(void) +{ + uint64_t cid = 0; + + cid = parse_migcvm_cid(); + if (!cid) { + error_report("Failed to parse migcvm cid"); + exit(1); + } + + if (virtcca_save_migvm_cid(cid)) { + error_report("Failed to save migcvm cid"); + exit(1); + } +} +#endif static void tmm_vm_state_change(void *opaque, bool running, RunState state) { int ret; - CPUState *cs; if (!running) { return; @@ -199,12 +391,8 @@ static void tmm_vm_state_change(void *opaque, bool running, RunState state) g_slist_foreach(tmm_guest->ram_regions, tmm_populate_region, NULL); g_slist_free_full(g_steal_pointer(&tmm_guest->ram_regions), g_free); - CPU_FOREACH(cs) { - ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_REC); - if (ret) { - error_report("TMM: failed to finalize vCPU: %s", strerror(-ret)); - exit(1); - } + if (tmm_create_tec()) { + exit(0); } ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, @@ -219,7 +407,7 @@ int kvm_arm_tmm_init(ConfidentialGuestSupport *cgs, Error **errp) { int ret; int cfg; - + if (!tmm_guest) { return -ENODEV; } @@ -235,13 +423,23 @@ int kvm_arm_tmm_init(ConfidentialGuestSupport *cgs, Error **errp) return ret; } } - + ret = tmm_create_rd(&error_abort); if (ret) { return ret; } - +#ifdef CONFIG_VIRTCCA_MIGRATION + if (runstate_check(RUN_STATE_INMIGRATE)) { + ret = !cgs_mig_is_ready(false, NULL, 0); + } +#endif + if (ret) { + error_setg(errp, "cgs mig required, but not ready"); + return ret; + } + qemu_add_vm_change_state_handler(tmm_vm_state_change, NULL); + info_report("TMM initialized"); return 0; } @@ -352,6 +550,45 @@ void tmm_set_hpre_addr(hwaddr base, int num) tmm_guest->kae_device_info.hpre_addr[num] = base; } +#ifdef CONFIG_VIRTCCA_MIGRATION +/* get the mig ability config */ +static void tmm_get_mig_cap(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + TmmGuest *guest = TMM_GUEST(obj); + + visit_type_uint32(v, name, &guest->migration_cap.mig_enable, errp); +} + +/* enable mig cap into qemu */ +static void tmm_set_mig_cap(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + TmmGuest *guest = TMM_GUEST(obj); + uint32_t value; + + if (!visit_type_uint32(v, name, &value, errp)) { + return; + } + + guest->migration_cap.mig_enable = value; +} + +static int tmm_get_migvm_algo(Object *obj, Error **errp G_GNUC_UNUSED) +{ + TmmGuest *guest = TMM_GUEST(obj); + + return guest->migvm_cap; +} + +static void tmm_set_migvm_algo(Object *obj, int algo, Error **errp G_GNUC_UNUSED) +{ + TmmGuest *guest = TMM_GUEST(obj); + + guest->migvm_cap = algo; +} +#endif + static void tmm_guest_class_init(ObjectClass *oc, void *data) { object_class_property_add_enum(oc, "measurement-algo", @@ -372,6 +609,17 @@ static void tmm_guest_class_init(ObjectClass *oc, void *data) tmm_set_sve_vl, NULL, NULL); object_class_property_set_description(oc, "sve-vector-length", "SVE vector length. 0 disables SVE (the default)"); +#ifdef CONFIG_VIRTCCA_MIGRATION + /* Add the migration enable func */ + object_class_property_add(oc, "virtcca-migration-cap", "uint32", tmm_get_mig_cap, + tmm_set_mig_cap, NULL, NULL); + object_class_property_set_description(oc, "virtcca-migration-cap", + "Config of virtcca migration. 0 disables mig (the default)"); + object_class_property_add_enum(oc, "migvm-cap", "TmmMigVmCap", &TmmMigVmCap_lookup, + tmm_get_migvm_algo, tmm_set_migvm_algo); + object_class_property_set_description(oc, "migvm-cap", + "Config of migCVM of virtcca migration. Options are ('default', 'migvm')"); +#endif object_class_property_add(oc, "num-pmu-counters", "uint32", tmm_get_num_pmu_cntrs, tmm_set_num_pmu_cntrs, NULL, NULL); diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 1bb6e332e7d87958e71237c1ceb8e81ab022b775..a9dd94e9a53ee3d75a07bbb367ab09ba92fbe1e4 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -303,6 +303,8 @@ void kvm_arm_add_vcpu_properties(Object *obj) if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { cpu->kvm_adjvtime = true; + if (virtcca_cvm_enabled()) + cpu->kvm_adjvtime = false; object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, kvm_no_adjvtime_set); object_property_set_description(obj, "kvm-no-adjvtime", diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index 13509b7836d7311ed730924b9f63abae8e366cdf..bf84dde67372633a610f926df8b3c9e1cbd1f129 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -170,7 +170,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu); * shall be parked for use when ARM vCPUs are actually realized. */ void kvm_arm_create_host_vcpu(ARMCPU *cpu); - +/** + * tmm_create_tec: + * + * When performing live migration, create the tec structure. + */ +int tmm_create_tec(void); /** * kvm_arm_init_serror_injection: * @cs: CPUState @@ -419,7 +424,7 @@ void tmm_set_hpre_addr(hwaddr base, int num); int kvm_arm_tmm_init(ConfidentialGuestSupport *cgs, Error **errp); bool kvm_arm_tmm_enabled(void); - +void virtcca_migvm_save_cid(void); /** * kvm_arm_set_smccc_filter * @func: funcion