Update version for 8.0.5 release

Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
tpm: fix crash when FD >= 1024 and unnecessary errors due to EINTR
2023-09-21 19:23:20 +03:00 · 2023-09-20 10:18:14 +03:00 · 2023-09-20 10:18:14 +03:00 · 2023-09-20 10:18:14 +03:00 · 2023-09-20 10:18:14 +03:00 · 2023-09-20 10:18:14 +03:00
302 changed files with 3414 additions and 1341 deletions
@@ -102,8 +102,8 @@ crash-test-debian:
    IMAGE: debian-amd64
  script:
    - cd build
-    - make check-venv
-    - tests/venv/bin/python3 scripts/device-crash-test -q ./qemu-system-i386
+    - make NINJA=":" check-venv
+    - tests/venv/bin/python3 scripts/device-crash-test -q --tcg-only ./qemu-system-i386

 build-system-fedora:
  extends:
@@ -145,7 +145,7 @@ crash-test-fedora:
    IMAGE: fedora
  script:
    - cd build
-    - make check-venv
+    - make NINJA=":" check-venv
    - tests/venv/bin/python3 scripts/device-crash-test -q ./qemu-system-ppc
    - tests/venv/bin/python3 scripts/device-crash-test -q ./qemu-system-riscv32

@@ -1 +1 @@
-8.0.0
+8.0.5
@@ -2382,7 +2382,7 @@ static int kvm_init(MachineState *ms)
    KVMState *s;
    const KVMCapabilityInfo *missing_cap;
    int ret;
-    int type = 0;
+    int type;
    uint64_t dirty_log_manual_caps;

    qemu_mutex_init(&kml_slots_lock);
@@ -2447,6 +2447,8 @@ static int kvm_init(MachineState *ms)
        type = mc->kvm_type(ms, kvm_type);
    } else if (mc->kvm_type) {
        type = mc->kvm_type(ms, NULL);
+    } else {
+        type = kvm_arch_get_default_type(ms);
    }

    do {
@@ -1830,7 +1830,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
    } else /* if (prot & PAGE_READ) */ {
        tlb_addr = tlbe->addr_read;
        if (!tlb_hit(tlb_addr, addr)) {
-            if (!VICTIM_TLB_HIT(addr_write, addr)) {
+            if (!VICTIM_TLB_HIT(addr_read, addr)) {
                tlb_fill(env_cpu(env), addr, size,
                         MMU_DATA_LOAD, mmu_idx, retaddr);
                index = tlb_index(env, mmu_idx, addr);
@@ -1093,6 +1093,9 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
    TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
 #endif /* TARGET_HAS_PRECISE_SMC */

+    /* Range may not cross a page. */
+    tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
+
    /*
     * We remove all the TBs in the range [start, last].
     * XXX: see if in some cases it could be faster to invalidate all the code
@@ -1183,15 +1186,17 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
    index_last = last >> TARGET_PAGE_BITS;
    for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
        PageDesc *pd = page_find(index);
-        tb_page_addr_t bound;
+        tb_page_addr_t page_start, page_last;

        if (pd == NULL) {
            continue;
        }
        assert_page_locked(pd);
-        bound = (index << TARGET_PAGE_BITS) | ~TARGET_PAGE_MASK;
-        bound = MIN(bound, last);
-        tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
+        page_start = index << TARGET_PAGE_BITS;
+        page_last = page_start | ~TARGET_PAGE_MASK;
+        page_last = MIN(page_last, last);
+        tb_invalidate_phys_page_range__locked(pages, pd,
+                                              page_start, page_last, 0);
    }
    page_collection_unlock(pages);
 }
@@ -191,6 +191,11 @@ static int cryptodev_backend_account(CryptoDevBackend *backend,
    if (algtype == QCRYPTODEV_BACKEND_ALG_ASYM) {
        CryptoDevBackendAsymOpInfo *asym_op_info = op_info->u.asym_op_info;
        len = asym_op_info->src_len;
+
+        if (unlikely(!backend->asym_stat)) {
+            error_report("cryptodev: Unexpected asym operation");
+            return -VIRTIO_CRYPTO_NOTSUPP;
+        }
        switch (op_info->op_code) {
        case VIRTIO_CRYPTO_AKCIPHER_ENCRYPT:
            CryptodevAsymStatIncEncrypt(backend, len);
@@ -210,6 +215,11 @@ static int cryptodev_backend_account(CryptoDevBackend *backend,
    } else if (algtype == QCRYPTODEV_BACKEND_ALG_SYM) {
        CryptoDevBackendSymOpInfo *sym_op_info = op_info->u.sym_op_info;
        len = sym_op_info->src_len;
+
+        if (unlikely(!backend->sym_stat)) {
+            error_report("cryptodev: Unexpected sym operation");
+            return -VIRTIO_CRYPTO_NOTSUPP;
+        }
        switch (op_info->op_code) {
        case VIRTIO_CRYPTO_CIPHER_ENCRYPT:
            CryptodevSymStatIncEncrypt(backend, len);
@@ -112,12 +112,8 @@ static int tpm_util_request(int fd,
                            void *response,
                            size_t responselen)
 {
-    fd_set readfds;
+    GPollFD fds[1] = { {.fd = fd, .events = G_IO_IN } };
    int n;
-    struct timeval tv = {
-        .tv_sec = 1,
-        .tv_usec = 0,
-    };

    n = write(fd, request, requestlen);
    if (n < 0) {
@@ -127,11 +123,8 @@ static int tpm_util_request(int fd,
        return -EFAULT;
    }

-    FD_ZERO(&readfds);
-    FD_SET(fd, &readfds);
-
    /* wait for a second */
-    n = select(fd + 1, &readfds, NULL, NULL, &tv);
+    n = RETRY_ON_EINTR(g_poll(fds, 1, 1000));
    if (n != 1) {
        return -errno;
    }
@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,

    ret = 0;
 out:
-    blk_unref(blk);
+    blk_co_unref(blk);
    return ret;
 }

@@ -2018,7 +2018,15 @@ void blk_activate(BlockBackend *blk, Error **errp)
        return;
    }

-    bdrv_activate(bs, errp);
+    /*
+     * Migration code can call this function in coroutine context, so leave
+     * coroutine context if necessary.
+     */
+    if (qemu_in_coroutine()) {
+        bdrv_co_activate(bs, errp);
+    } else {
+        bdrv_activate(bs, errp);
+    }
 }

 bool coroutine_fn blk_co_is_inserted(BlockBackend *blk)
@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
    ret = 0;
 cleanup:
    qcrypto_block_free(crypto);
-    blk_unref(blk);
+    blk_co_unref(blk);
    return ret;
 }

@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)

    ret = 0;
 fail:
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
    return ret;
 }

@@ -730,7 +730,7 @@ fail:
        bdrv_co_delete_file_noerr(bs);
    }

-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
    qapi_free_QCryptoBlockCreateOptions(create_opts);
    qobject_unref(cryptoopts);
    return ret;
@@ -192,7 +192,10 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
    return exp;

 fail:
-    blk_unref(blk);
+    if (blk) {
+        blk_set_dev_ops(blk, NULL, NULL);
+        blk_unref(blk);
+    }
    aio_context_release(ctx);
    if (exp) {
        g_free(exp->id);
@@ -219,6 +222,7 @@ static void blk_exp_delete_bh(void *opaque)
    assert(exp->refcount == 0);
    QLIST_REMOVE(exp, next);
    exp->drv->delete(exp);
+    blk_set_dev_ops(exp->blk, NULL, NULL);
    blk_unref(exp->blk);
    qapi_event_send_block_export_deleted(exp->id);
    g_free(exp->id);
@@ -346,7 +346,6 @@ static void vduse_blk_exp_delete(BlockExport *exp)

    blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
                                    vblk_exp);
-    blk_set_dev_ops(exp->blk, NULL, NULL);
    ret = vduse_dev_destroy(vblk_exp->dev);
    if (ret != -EBUSY) {
        unlink(vblk_exp->recon_file);
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
 /* Protects the list of aiocontext and orphaned_reader_count */
 static QemuMutex aio_context_list_lock;

+#if 0
 /* Written and read with atomic operations. */
 static int has_writer;
+#endif

 /*
 * A reader coroutine could move from an AioContext to another.
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
    g_free(ctx->bdrv_graph);
 }

+#if 0
 static uint32_t reader_count(void)
 {
    BdrvGraphRWlock *brdv_graph;
@@ -105,10 +108,17 @@ static uint32_t reader_count(void)
    assert((int32_t)rd >= 0);
    return rd;
 }
+#endif

 void bdrv_graph_wrlock(void)
 {
    GLOBAL_STATE_CODE();
+    /*
+     * TODO Some callers hold an AioContext lock when this is called, which
+     * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
+     * AioContext locks are gone).
+     */
+#if 0
    assert(!qatomic_read(&has_writer));

    /* Make sure that constantly arriving new I/O doesn't cause starvation */
@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void)
    } while (reader_count() >= 1);

    bdrv_drain_all_end();
+#endif
 }

 void bdrv_graph_wrunlock(void)
 {
    GLOBAL_STATE_CODE();
+#if 0
    QEMU_LOCK_GUARD(&aio_context_list_lock);
    assert(qatomic_read(&has_writer));

@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void)

    /* Wake up all coroutine that are waiting to read the graph */
    qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+#endif
 }

 void coroutine_fn bdrv_graph_co_rdlock(void)
 {
+    /* TODO Reenable when wrlock is reenabled */
+#if 0
    BdrvGraphRWlock *bdrv_graph;
    bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;

@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
            qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
        }
    }
+#endif
 }

 void coroutine_fn bdrv_graph_co_rdunlock(void)
 {
+#if 0
    BdrvGraphRWlock *bdrv_graph;
    bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;

@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
    if (qatomic_read(&has_writer)) {
        aio_wait_kick();
    }
+#endif
 }

 void bdrv_graph_rdlock_main_loop(void)
@@ -265,11 +283,20 @@ void bdrv_graph_rdunlock_main_loop(void)

 void assert_bdrv_graph_readable(void)
 {
+    /* reader_count() is slow due to aio_context_list_lock lock contention */
+    /* TODO Reenable when wrlock is reenabled */
+#if 0
+#ifdef CONFIG_DEBUG_GRAPH_LOCK
    assert(qemu_in_main_thread() || reader_count());
+#endif
+#endif
 }

 void assert_bdrv_graph_writable(void)
 {
    assert(qemu_in_main_thread());
+    /* TODO Reenable when wrlock is reenabled */
+#if 0
    assert(qatomic_read(&has_writer));
+#endif
 }
@@ -214,15 +214,17 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
            error_report("Device '%s' not found", device);
            return;
        }
-        if (!blk_is_available(blk)) {
-            error_report("Device '%s' has no medium", device);
-            return;
-        }

        bs = bdrv_skip_implicit_filters(blk_bs(blk));
        aio_context = bdrv_get_aio_context(bs);
        aio_context_acquire(aio_context);

+        if (!blk_is_available(blk)) {
+            error_report("Device '%s' has no medium", device);
+            aio_context_release(aio_context);
+            return;
+        }
+
        ret = bdrv_commit(bs);

        aio_context_release(aio_context);
@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,

    ret = 0;
 out:
-    blk_unref(blk);
-    bdrv_unref(bs);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs);
    return ret;

 exit:
@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename,

 done:
    qobject_unref(qdict);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
 }
@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
    g_free(tmp);
    ret = 0;
 exit:
-    blk_unref(qcow_blk);
-    bdrv_unref(bs);
+    blk_co_unref(qcow_blk);
+    bdrv_co_unref(bs);
    qcrypto_block_free(crypto);
    return ret;
 }
@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename,
 fail:
    g_free(backing_fmt);
    qobject_unref(qdict);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
 }
@@ -3705,7 +3705,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
        goto out;
    }

-    blk_unref(blk);
+    blk_co_unref(blk);
    blk = NULL;

    /*
@@ -3785,7 +3785,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
        }
    }

-    blk_unref(blk);
+    blk_co_unref(blk);
    blk = NULL;

    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
@@ -3810,9 +3810,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)

    ret = 0;
 out:
-    blk_unref(blk);
-    bdrv_unref(bs);
-    bdrv_unref(data_bs);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs);
+    bdrv_co_unref(data_bs);
    return ret;
 }

@@ -3943,8 +3943,8 @@ finish:
    }

    qobject_unref(qdict);
-    bdrv_unref(bs);
-    bdrv_unref(data_bs);
+    bdrv_co_unref(bs);
+    bdrv_co_unref(data_bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
 }
@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
    ret = 0; /* success */
 out:
    g_free(l1_table);
-    blk_unref(blk);
-    bdrv_unref(bs);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs);
    return ret;
 }

@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,

 fail:
    qobject_unref(qdict);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
 }
@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,

    ret = 0;
 exit:
-    blk_unref(blk);
-    bdrv_unref(bs_file);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs_file);
    g_free(bmap);
    return ret;
 }
@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename,
 done:
    qobject_unref(qdict);
    qapi_free_BlockdevCreateOptions(create_options);
-    bdrv_unref(bs_file);
+    bdrv_co_unref(bs_file);
    return ret;
 }

@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,

    ret = 0;
 delete_and_exit:
-    blk_unref(blk);
-    bdrv_unref(bs);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs);
    g_free(creator);
    return ret;
 }
@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename,

 fail:
    qobject_unref(qdict);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
 }
@@ -2306,7 +2306,7 @@ exit:
        if (pbb) {
            *pbb = blk;
        } else {
-            blk_unref(blk);
+            blk_co_unref(blk);
            blk = NULL;
        }
    }
@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size,
        if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
            error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
                       blk_bs(backing)->drv->format_name);
-            blk_unref(backing);
+            blk_co_unref(backing);
            ret = -EINVAL;
            goto exit;
        }
        ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
-        blk_unref(backing);
+        blk_co_unref(backing);
        if (ret) {
            error_setg(errp, "Failed to read parent CID");
            goto exit;
@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size,
                             blk_bs(extent_blk)->filename);
        created_size += cur_size;
        extent_idx++;
-        blk_unref(extent_blk);
+        blk_co_unref(extent_blk);
    }

    /* Check whether we got excess extents */
    extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
                           opaque, NULL);
    if (extent_blk) {
-        blk_unref(extent_blk);
+        blk_co_unref(extent_blk);
        error_setg(errp, "List of extents contains unused extents");
        ret = -EINVAL;
        goto exit;
@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size,
    ret = 0;
 exit:
    if (blk) {
-        blk_unref(blk);
+        blk_co_unref(blk);
    }
    g_free(desc);
    g_free(parent_desc_line);
@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
                           errp)) {
        goto exit;
    }
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
 exit:
    g_free(ext_filename);
    return blk;
@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
        return NULL;
    }
    blk_set_allow_write_beyond_eof(blk, true);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);

    if (size != -1) {
        ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
        if (ret) {
-            blk_unref(blk);
+            blk_co_unref(blk);
            blk = NULL;
        }
    }
@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
    }

 out:
-    blk_unref(blk);
-    bdrv_unref(bs);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs);
    return ret;
 }

@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename,

 fail:
    qobject_unref(qdict);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
    qapi_free_BlockdevCreateOptions(create_options);
    return ret;
 }
@@ -153,12 +153,22 @@ void blockdev_mark_auto_del(BlockBackend *blk)

    JOB_LOCK_GUARD();

-    for (job = block_job_next_locked(NULL); job;
-         job = block_job_next_locked(job)) {
-        if (block_job_has_bdrv(job, blk_bs(blk))) {
+    do {
+        job = block_job_next_locked(NULL);
+        while (job && (job->job.cancelled ||
+                       job->job.deferred_to_main_loop ||
+                       !block_job_has_bdrv(job, blk_bs(blk))))
+        {
+            job = block_job_next_locked(job);
+        }
+        if (job) {
+            /*
+             * This drops the job lock temporarily and polls, so we need to
+             * restart processing the list from the start after this.
+             */
            job_cancel_locked(&job->job, false);
        }
-    }
+    } while (job);

    dinfo->auto_del = 1;
 }
@@ -2430,7 +2440,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
        return;
    }

-    blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
    if (!blk) {
        return;
    }
@@ -2445,7 +2455,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,

    bdrv_co_lock(bs);
    bdrv_drained_end(bs);
-    blk_unref(blk);
+    blk_co_unref(blk);
    bdrv_co_unlock(bs);
 }

@@ -806,6 +806,7 @@ for opt do
  --enable-debug)
      # Enable debugging options that aren't excessively noisy
      debug_tcg="yes"
+      meson_option_parse --enable-debug-graph-lock ""
      meson_option_parse --enable-debug-mutex ""
      meson_option_add -Doptimization=0
      fortify_source="no"
@@ -111,6 +111,10 @@ Use ``-machine acpi=off`` instead.
 The HAXM project has been retired (see https://github.com/intel/haxm#status).
 Use "whpx" (on Windows) or "hvf" (on macOS) instead.

+``-async-teardown`` (since 8.1)
+'''''''''''''''''''''''''''''''
+
+Use ``-run-with async-teardown=on`` instead.

 QEMU Machine Protocol (QMP) commands
 ------------------------------------
@@ -219,8 +223,8 @@ Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead.
 System emulator machines
 ------------------------

-Arm ``virt`` machine ``dtb-kaslr-seed`` property
-''''''''''''''''''''''''''''''''''''''''''''''''
+Arm ``virt`` machine ``dtb-kaslr-seed`` property (since 7.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

 The ``dtb-kaslr-seed`` property on the ``virt`` board has been
 deprecated; use the new name ``dtb-randomness`` instead. The new name
@@ -99,7 +99,7 @@ depending on the guest architecture.
    - Yes
    - A configurable 32 bit soft core now owned by Cadence

-A number of features are are only available when running under
+A number of features are only available when running under
 emulation including :ref:`Record/Replay<replay>` and :ref:`TCG Plugins`.

 .. _Semihosting:
@@ -8,4 +8,4 @@ QEMU is a trademark of Fabrice Bellard.
 QEMU is released under the `GNU General Public
 License <https://www.gnu.org/licenses/gpl-2.0.txt>`__, version 2. Parts
 of QEMU have specific licenses, see file
-`LICENSE <https://git.qemu.org/?p=qemu.git;a=blob_plain;f=LICENSE>`__.
+`LICENSE <https://gitlab.com/qemu-project/qemu/-/raw/master/LICENSE>`__.
@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext:
 * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
 * LEGACY timer_new_ms() - create a timer
 * LEGACY qemu_bh_new() - create a BH
+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard
 * LEGACY qemu_aio_wait() - run an event loop iteration

 Since they implicitly work on the main loop they cannot be used in code that
@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h):
 * aio_set_event_notifier() - monitor an event notifier
 * aio_timer_new() - create a timer
 * aio_bh_new() - create a BH
+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard
 * aio_poll() - run an event loop iteration

+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard"
+argument, which is used to check for and prevent re-entrancy problems. For
+BHs associated with devices, the reentrancy-guard is contained in the
+corresponding DeviceState and named "mem_reentrancy_guard".
+
 The AioContext can be obtained from the IOThread using
 iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
 Code that takes an AioContext argument works both in IOThreads or the main
@@ -117,13 +117,13 @@ to support the multiple thread compression migration:
    {qemu} migrate_set_capability compress on

 3. Set the compression thread count on source:
-    {qemu} migrate_set_parameter compress_threads 12
+    {qemu} migrate_set_parameter compress-threads 12

 4. Set the compression level on the source:
-    {qemu} migrate_set_parameter compress_level 1
+    {qemu} migrate_set_parameter compress-level 1

 5. Set the decompression thread count on destination:
-    {qemu} migrate_set_parameter decompress_threads 3
+    {qemu} migrate_set_parameter decompress-threads 3

 6. Start outgoing migration:
    {qemu} migrate -d tcp:destination.host:4444
@@ -133,9 +133,9 @@ to support the multiple thread compression migration:

 The following are the default settings:
    compress: off
-    compress_threads: 8
-    decompress_threads: 2
-    compress_level: 1 (which means best speed)
+    compress-threads: 8
+    decompress-threads: 2
+    compress-level: 1 (which means best speed)

 So, only the first two steps are required to use the multiple
 thread compression in migration. You can do more if the default
@@ -89,7 +89,7 @@ RUNNING:
 First, set the migration speed to match your hardware's capabilities:

 QEMU Monitor Command:
-$ migrate_set_parameter max_bandwidth 40g # or whatever is the MAX of your RDMA device
+$ migrate_set_parameter max-bandwidth 40g # or whatever is the MAX of your RDMA device

 Next, on the destination machine, add the following to the QEMU command line:

@@ -4,7 +4,7 @@ Multi-process QEMU
 ==================

 This document describes how to configure and use multi-process qemu.
-For the design document refer to docs/devel/qemu-multiprocess.
+For the design document refer to docs/devel/multi-process.rst.

 1) Configuration
 ----------------
@@ -1293,8 +1293,8 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,

            memcpy(buf + addr % page_size, hbuf, n);
            addr += n;
-            if (addr % page_size == 0) {
-                /* we filled up the page */
+            if (addr % page_size == 0 || addr >= block->target_end) {
+                /* we filled up the page or the current block is finished */
                break;
            }
        } else {
@@ -5135,7 +5135,7 @@ float32 float32_exp2(float32 a, float_status *status)
    float64_unpack_canonical(&rp, float64_one, status);
    for (i = 0 ; i < 15 ; i++) {
        float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
-        rp = *parts_muladd(&tp, &xp, &rp, 0, status);
+        rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
        xnp = *parts_mul(&xnp, &xp, status);
    }

@@ -26,6 +26,7 @@
 #include "qemu/xattr.h"
 #include "9p-iov-marshal.h"
 #include "hw/9pfs/9p-proxy.h"
+#include "hw/9pfs/9p-util.h"
 #include "fsdev/9p-iov-marshal.h"

 #define PROGNAME "virtfs-proxy-helper"
@@ -338,6 +339,28 @@ static void resetugid(int suid, int sgid)
    }
 }

+/*
+ * Open regular file or directory. Attempts to open any special file are
+ * rejected.
+ *
+ * returns file descriptor or -1 on error
+ */
+static int open_regular(const char *pathname, int flags, mode_t mode)
+{
+    int fd;
+
+    fd = open(pathname, flags, mode);
+    if (fd < 0) {
+        return fd;
+    }
+
+    if (close_if_special_file(fd) < 0) {
+        return -1;
+    }
+
+    return fd;
+}
+
 /*
 * send response in two parts
 * 1) ProxyHeader
@@ -682,7 +705,7 @@ static int do_create(struct iovec *iovec)
    if (ret < 0) {
        goto unmarshal_err_out;
    }
-    ret = open(path.data, flags, mode);
+    ret = open_regular(path.data, flags, mode);
    if (ret < 0) {
        ret = -errno;
    }
@@ -707,7 +730,7 @@ static int do_open(struct iovec *iovec)
    if (ret < 0) {
        goto err_out;
    }
-    ret = open(path.data, flags);
+    ret = open_regular(path.data, flags, 0);
    if (ret < 0) {
        ret = -errno;
    }
@@ -13,6 +13,8 @@
 #ifndef QEMU_9P_UTIL_H
 #define QEMU_9P_UTIL_H

+#include "qemu/error-report.h"
+
 #ifdef O_PATH
 #define O_PATH_9P_UTIL O_PATH
 #else
@@ -95,6 +97,7 @@ static inline int errno_to_dotl(int err) {
 #endif

 #define qemu_openat     openat
+#define qemu_fstat      fstat
 #define qemu_fstatat    fstatat
 #define qemu_mkdirat    mkdirat
 #define qemu_renameat   renameat
@@ -108,6 +111,38 @@ static inline void close_preserve_errno(int fd)
    errno = serrno;
 }

+/**
+ * close_if_special_file() - Close @fd if neither regular file nor directory.
+ *
+ * @fd: file descriptor of open file
+ * Return: 0 on regular file or directory, -1 otherwise
+ *
+ * CVE-2023-2861: Prohibit opening any special file directly on host
+ * (especially device files), as a compromised client could potentially gain
+ * access outside exported tree under certain, unsafe setups. We expect
+ * client to handle I/O on special files exclusively on guest side.
+ */
+static inline int close_if_special_file(int fd)
+{
+    struct stat stbuf;
+
+    if (qemu_fstat(fd, &stbuf) < 0) {
+        close_preserve_errno(fd);
+        return -1;
+    }
+    if (!S_ISREG(stbuf.st_mode) && !S_ISDIR(stbuf.st_mode)) {
+        error_report_once(
+            "9p: broken or compromised client detected; attempt to open "
+            "special file (i.e. neither regular file, nor directory)"
+        );
+        close(fd);
+        errno = ENXIO;
+        return -1;
+    }
+
+    return 0;
+}
+
 static inline int openat_dir(int dirfd, const char *name)
 {
    return qemu_openat(dirfd, name,
@@ -142,6 +177,10 @@ again:
        return -1;
    }

+    if (close_if_special_file(fd) < 0) {
+        return -1;
+    }
+
    serrno = errno;
    /* O_NONBLOCK was only needed to open the file. Let's drop it. We don't
     * do that with O_PATH since fcntl(F_SETFL) isn't supported, and openat()
@@ -48,3 +48,9 @@ v9fs_readlink(uint16_t tag, uint8_t id, int32_t fid) "tag %d id %d fid %d"
 v9fs_readlink_return(uint16_t tag, uint8_t id, char* target) "tag %d id %d name %s"
 v9fs_setattr(uint16_t tag, uint8_t id, int32_t fid, int32_t valid, int32_t mode, int32_t uid, int32_t gid, int64_t size, int64_t atime_sec, int64_t mtime_sec) "tag %u id %u fid %d iattr={valid %d mode %d uid %d gid %d size %"PRId64" atime=%"PRId64" mtime=%"PRId64" }"
 v9fs_setattr_return(uint16_t tag, uint8_t id) "tag %u id %u"
+
+# xen-9p-backend.c
+xen_9pfs_alloc(char *name) "name %s"
+xen_9pfs_connect(char *name) "name %s"
+xen_9pfs_disconnect(char *name) "name %s"
+xen_9pfs_free(char *name) "name %s"
@@ -25,6 +25,8 @@
 #include "qemu/iov.h"
 #include "fsdev/qemu-fsdev.h"

+#include "trace.h"
+
 #define VERSIONS "1"
 #define MAX_RINGS 8
 #define MAX_RING_ORDER 9
@@ -61,6 +63,7 @@ typedef struct Xen9pfsDev {

    int num_rings;
    Xen9pfsRing *rings;
+    MemReentrancyGuard mem_reentrancy_guard;
 } Xen9pfsDev;

 static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
@@ -336,6 +339,8 @@ static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev)
    Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
    int i;

+    trace_xen_9pfs_disconnect(xendev->name);
+
    for (i = 0; i < xen_9pdev->num_rings; i++) {
        if (xen_9pdev->rings[i].evtchndev != NULL) {
            qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
@@ -344,40 +349,41 @@ static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev)
                                   xen_9pdev->rings[i].local_port);
            xen_9pdev->rings[i].evtchndev = NULL;
        }
-    }
-}
-
-static int xen_9pfs_free(struct XenLegacyDevice *xendev)
-{
-    Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
-    int i;
-
-    if (xen_9pdev->rings[0].evtchndev != NULL) {
-        xen_9pfs_disconnect(xendev);
-    }
-
-    for (i = 0; i < xen_9pdev->num_rings; i++) {
        if (xen_9pdev->rings[i].data != NULL) {
            xen_be_unmap_grant_refs(&xen_9pdev->xendev,
                                    xen_9pdev->rings[i].data,
                                    xen_9pdev->rings[i].intf->ref,
                                    (1 << xen_9pdev->rings[i].ring_order));
+            xen_9pdev->rings[i].data = NULL;
        }
        if (xen_9pdev->rings[i].intf != NULL) {
            xen_be_unmap_grant_ref(&xen_9pdev->xendev,
                                   xen_9pdev->rings[i].intf,
                                   xen_9pdev->rings[i].ref);
+            xen_9pdev->rings[i].intf = NULL;
        }
        if (xen_9pdev->rings[i].bh != NULL) {
            qemu_bh_delete(xen_9pdev->rings[i].bh);
+            xen_9pdev->rings[i].bh = NULL;
        }
    }

    g_free(xen_9pdev->id);
+    xen_9pdev->id = NULL;
    g_free(xen_9pdev->tag);
+    xen_9pdev->tag = NULL;
    g_free(xen_9pdev->path);
+    xen_9pdev->path = NULL;
    g_free(xen_9pdev->security_model);
+    xen_9pdev->security_model = NULL;
    g_free(xen_9pdev->rings);
+    xen_9pdev->rings = NULL;
+}
+
+static int xen_9pfs_free(struct XenLegacyDevice *xendev)
+{
+    trace_xen_9pfs_free(xendev->name);
+
    return 0;
 }

@@ -389,6 +395,8 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
    V9fsState *s = &xen_9pdev->state;
    QemuOpts *fsdev;

+    trace_xen_9pfs_connect(xendev->name);
+
    if (xenstore_read_fe_int(&xen_9pdev->xendev, "num-rings",
                             &xen_9pdev->num_rings) == -1 ||
        xen_9pdev->num_rings > MAX_RINGS || xen_9pdev->num_rings < 1) {
@@ -443,7 +451,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
        xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
                                       XEN_FLEX_RING_SIZE(ring_order);

-        xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
+        xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh,
+                                                     &xen_9pdev->rings[i],
+                                                     &xen_9pdev->mem_reentrancy_guard);
        xen_9pdev->rings[i].out_cons = 0;
        xen_9pdev->rings[i].out_size = 0;
        xen_9pdev->rings[i].inprogress = false;
@@ -496,6 +506,8 @@ out:

 static void xen_9pfs_alloc(struct XenLegacyDevice *xendev)
 {
+    trace_xen_9pfs_alloc(xendev->name);
+
    xenstore_write_be_str(xendev, "versions", VERSIONS);
    xenstore_write_be_int(xendev, "max-rings", MAX_RINGS);
    xenstore_write_be_int(xendev, "max-ring-page-order", MAX_RING_ORDER);
@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
     * acpi_pcihp_eject_slot() when the operation is completed.
     */
    pdev->qdev.pending_deleted_event = true;
+    /* if unplug was requested before OSPM is initialized,
+     * linux kernel will clear GPE0.sts[] bits during boot, which effectively
+     * hides unplug event. And than followup qmp_device_del() calls remain
+     * blocked by above flag permanently.
+     * Unblock qmp_device_del() by setting expire limit, so user can
+     * repeat unplug request later when OSPM has been booted.
+     */
+    pdev->qdev.pending_deleted_expires_ms =
+        qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */
+
    s->acpi_pcihp_pci_status[bsel].down |= (1U << slot);
    acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
 }
@@ -200,33 +200,35 @@ struct AspeedMachineState {
 static void aspeed_write_smpboot(ARMCPU *cpu,
                                 const struct arm_boot_info *info)
 {
-    static const uint32_t poll_mailbox_ready[] = {
+    AddressSpace *as = arm_boot_address_space(cpu, info);
+    static const ARMInsnFixup poll_mailbox_ready[] = {
        /*
         * r2 = per-cpu go sign value
         * r1 = AST_SMP_MBOX_FIELD_ENTRY
         * r0 = AST_SMP_MBOX_FIELD_GOSIGN
         */
-        0xee100fb0,  /* mrc     p15, 0, r0, c0, c0, 5 */
-        0xe21000ff,  /* ands    r0, r0, #255          */
-        0xe59f201c,  /* ldr     r2, [pc, #28]         */
-        0xe1822000,  /* orr     r2, r2, r0            */
+        { 0xee100fb0 },  /* mrc     p15, 0, r0, c0, c0, 5 */
+        { 0xe21000ff },  /* ands    r0, r0, #255          */
+        { 0xe59f201c },  /* ldr     r2, [pc, #28]         */
+        { 0xe1822000 },  /* orr     r2, r2, r0            */

-        0xe59f1018,  /* ldr     r1, [pc, #24]         */
-        0xe59f0018,  /* ldr     r0, [pc, #24]         */
+        { 0xe59f1018 },  /* ldr     r1, [pc, #24]         */
+        { 0xe59f0018 },  /* ldr     r0, [pc, #24]         */

-        0xe320f002,  /* wfe                           */
-        0xe5904000,  /* ldr     r4, [r0]              */
-        0xe1520004,  /* cmp     r2, r4                */
-        0x1afffffb,  /* bne     <wfe>                 */
-        0xe591f000,  /* ldr     pc, [r1]              */
-        AST_SMP_MBOX_GOSIGN,
-        AST_SMP_MBOX_FIELD_ENTRY,
-        AST_SMP_MBOX_FIELD_GOSIGN,
+        { 0xe320f002 },  /* wfe                           */
+        { 0xe5904000 },  /* ldr     r4, [r0]              */
+        { 0xe1520004 },  /* cmp     r2, r4                */
+        { 0x1afffffb },  /* bne     <wfe>                 */
+        { 0xe591f000 },  /* ldr     pc, [r1]              */
+        { AST_SMP_MBOX_GOSIGN },
+        { AST_SMP_MBOX_FIELD_ENTRY },
+        { AST_SMP_MBOX_FIELD_GOSIGN },
+        { 0, FIXUP_TERMINATOR }
    };
+    static const uint32_t fixupcontext[FIXUP_MAX] = { 0 };

-    rom_add_blob_fixed("aspeed.smpboot", poll_mailbox_ready,
-                       sizeof(poll_mailbox_ready),
-                       info->smp_loader_start);
+    arm_write_bootloader("aspeed.smpboot", as, info->smp_loader_start,
+                         poll_mailbox_ready, fixupcontext);
 }

 static void aspeed_reset_secondary(ARMCPU *cpu,
@@ -60,26 +60,6 @@ AddressSpace *arm_boot_address_space(ARMCPU *cpu,
    return cpu_get_address_space(cs, asidx);
 }

-typedef enum {
-    FIXUP_NONE = 0,     /* do nothing */
-    FIXUP_TERMINATOR,   /* end of insns */
-    FIXUP_BOARDID,      /* overwrite with board ID number */
-    FIXUP_BOARD_SETUP,  /* overwrite with board specific setup code address */
-    FIXUP_ARGPTR_LO,    /* overwrite with pointer to kernel args */
-    FIXUP_ARGPTR_HI,    /* overwrite with pointer to kernel args (high half) */
-    FIXUP_ENTRYPOINT_LO, /* overwrite with kernel entry point */
-    FIXUP_ENTRYPOINT_HI, /* overwrite with kernel entry point (high half) */
-    FIXUP_GIC_CPU_IF,   /* overwrite with GIC CPU interface address */
-    FIXUP_BOOTREG,      /* overwrite with boot register address */
-    FIXUP_DSB,          /* overwrite with correct DSB insn for cpu */
-    FIXUP_MAX,
-} FixupType;
-
-typedef struct ARMInsnFixup {
-    uint32_t insn;
-    FixupType fixup;
-} ARMInsnFixup;
-
 static const ARMInsnFixup bootloader_aarch64[] = {
    { 0x580000c0 }, /* ldr x0, arg ; Load the lower 32-bits of DTB */
    { 0xaa1f03e1 }, /* mov x1, xzr */
@@ -150,9 +130,10 @@ static const ARMInsnFixup smpboot[] = {
    { 0, FIXUP_TERMINATOR }
 };

-static void write_bootloader(const char *name, hwaddr addr,
-                             const ARMInsnFixup *insns, uint32_t *fixupcontext,
-                             AddressSpace *as)
+void arm_write_bootloader(const char *name,
+                          AddressSpace *as, hwaddr addr,
+                          const ARMInsnFixup *insns,
+                          const uint32_t *fixupcontext)
 {
    /* Fix up the specified bootloader fragment and write it into
     * guest memory using rom_add_blob_fixed(). fixupcontext is
@@ -214,8 +195,8 @@ static void default_write_secondary(ARMCPU *cpu,
        fixupcontext[FIXUP_DSB] = CP15_DSB_INSN;
    }

-    write_bootloader("smpboot", info->smp_loader_start,
-                     smpboot, fixupcontext, as);
+    arm_write_bootloader("smpboot", as, info->smp_loader_start,
+                         smpboot, fixupcontext);
 }

 void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu,
@@ -1186,8 +1167,8 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
        fixupcontext[FIXUP_ENTRYPOINT_LO] = entry;
        fixupcontext[FIXUP_ENTRYPOINT_HI] = entry >> 32;

-        write_bootloader("bootloader", info->loader_start,
-                         primary_loader, fixupcontext, as);
+        arm_write_bootloader("bootloader", as, info->loader_start,
+                             primary_loader, fixupcontext);

        if (info->write_board_setup) {
            info->write_board_setup(cpu, info);
@@ -16,6 +16,7 @@
 #include "qemu/units.h"
 #include "qemu/cutils.h"
 #include "qapi/error.h"
+#include "hw/arm/boot.h"
 #include "hw/arm/bcm2836.h"
 #include "hw/registerfields.h"
 #include "qemu/error-report.h"
@@ -124,20 +125,22 @@ static const char *board_type(uint32_t board_rev)

 static void write_smpboot(ARMCPU *cpu, const struct arm_boot_info *info)
 {
-    static const uint32_t smpboot[] = {
-        0xe1a0e00f, /*    mov     lr, pc */
-        0xe3a0fe00 + (BOARDSETUP_ADDR >> 4), /* mov pc, BOARDSETUP_ADDR */
-        0xee100fb0, /*    mrc     p15, 0, r0, c0, c0, 5;get core ID */
-        0xe7e10050, /*    ubfx    r0, r0, #0, #2       ;extract LSB */
-        0xe59f5014, /*    ldr     r5, =0x400000CC      ;load mbox base */
-        0xe320f001, /* 1: yield */
-        0xe7953200, /*    ldr     r3, [r5, r0, lsl #4] ;read mbox for our core*/
-        0xe3530000, /*    cmp     r3, #0               ;spin while zero */
-        0x0afffffb, /*    beq     1b */
-        0xe7853200, /*    str     r3, [r5, r0, lsl #4] ;clear mbox */
-        0xe12fff13, /*    bx      r3                   ;jump to target */
-        0x400000cc, /* (constant: mailbox 3 read/clear base) */
+    static const ARMInsnFixup smpboot[] = {
+        { 0xe1a0e00f }, /*    mov     lr, pc */
+        { 0xe3a0fe00 + (BOARDSETUP_ADDR >> 4) }, /* mov pc, BOARDSETUP_ADDR */
+        { 0xee100fb0 }, /*    mrc     p15, 0, r0, c0, c0, 5;get core ID */
+        { 0xe7e10050 }, /*    ubfx    r0, r0, #0, #2       ;extract LSB */
+        { 0xe59f5014 }, /*    ldr     r5, =0x400000CC      ;load mbox base */
+        { 0xe320f001 }, /* 1: yield */
+        { 0xe7953200 }, /*    ldr     r3, [r5, r0, lsl #4] ;read mbox for our core */
+        { 0xe3530000 }, /*    cmp     r3, #0               ;spin while zero */
+        { 0x0afffffb }, /*    beq     1b */
+        { 0xe7853200 }, /*    str     r3, [r5, r0, lsl #4] ;clear mbox */
+        { 0xe12fff13 }, /*    bx      r3                   ;jump to target */
+        { 0x400000cc }, /* (constant: mailbox 3 read/clear base) */
+        { 0, FIXUP_TERMINATOR }
    };
+    static const uint32_t fixupcontext[FIXUP_MAX] = { 0 };

    /* check that we don't overrun board setup vectors */
    QEMU_BUILD_BUG_ON(SMPBOOT_ADDR + sizeof(smpboot) > MVBAR_ADDR);
@@ -145,9 +148,8 @@ static void write_smpboot(ARMCPU *cpu, const struct arm_boot_info *info)
    QEMU_BUILD_BUG_ON((BOARDSETUP_ADDR & 0xf) != 0
                      || (BOARDSETUP_ADDR >> 4) >= 0x100);

-    rom_add_blob_fixed_as("raspi_smpboot", smpboot, sizeof(smpboot),
-                          info->smp_loader_start,
-                          arm_boot_address_space(cpu, info));
+    arm_write_bootloader("raspi_smpboot", arm_boot_address_space(cpu, info),
+                         info->smp_loader_start, smpboot, fixupcontext);
 }

 static void write_smpboot64(ARMCPU *cpu, const struct arm_boot_info *info)
@@ -161,26 +163,28 @@ static void write_smpboot64(ARMCPU *cpu, const struct arm_boot_info *info)
     * the primary CPU goes into the kernel. We put these variables inside
     * a rom blob, so that the reset for ROM contents zeroes them for us.
     */
-    static const uint32_t smpboot[] = {
-        0xd2801b05, /*        mov     x5, 0xd8 */
-        0xd53800a6, /*        mrs     x6, mpidr_el1 */
-        0x924004c6, /*        and     x6, x6, #0x3 */
-        0xd503205f, /* spin:  wfe */
-        0xf86678a4, /*        ldr     x4, [x5,x6,lsl #3] */
-        0xb4ffffc4, /*        cbz     x4, spin */
-        0xd2800000, /*        mov     x0, #0x0 */
-        0xd2800001, /*        mov     x1, #0x0 */
-        0xd2800002, /*        mov     x2, #0x0 */
-        0xd2800003, /*        mov     x3, #0x0 */
-        0xd61f0080, /*        br      x4 */
+    static const ARMInsnFixup smpboot[] = {
+        { 0xd2801b05 }, /*        mov     x5, 0xd8 */
+        { 0xd53800a6 }, /*        mrs     x6, mpidr_el1 */
+        { 0x924004c6 }, /*        and     x6, x6, #0x3 */
+        { 0xd503205f }, /* spin:  wfe */
+        { 0xf86678a4 }, /*        ldr     x4, [x5,x6,lsl #3] */
+        { 0xb4ffffc4 }, /*        cbz     x4, spin */
+        { 0xd2800000 }, /*        mov     x0, #0x0 */
+        { 0xd2800001 }, /*        mov     x1, #0x0 */
+        { 0xd2800002 }, /*        mov     x2, #0x0 */
+        { 0xd2800003 }, /*        mov     x3, #0x0 */
+        { 0xd61f0080 }, /*        br      x4 */
+        { 0, FIXUP_TERMINATOR }
    };
+    static const uint32_t fixupcontext[FIXUP_MAX] = { 0 };

    static const uint64_t spintables[] = {
        0, 0, 0, 0
    };

-    rom_add_blob_fixed_as("raspi_smpboot", smpboot, sizeof(smpboot),
-                          info->smp_loader_start, as);
+    arm_write_bootloader("raspi_smpboot", as, info->smp_loader_start,
+                         smpboot, fixupcontext);
    rom_add_blob_fixed_as("raspi_spintables", spintables, sizeof(spintables),
                          SPINTABLE_ADDR, as);
 }
@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte,
    dma_addr_t addr = baseaddr + index * sizeof(*pte);

    /* TODO: guarantee 64-bit single-copy atomicity */
-    ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte),
-                          MEMTXATTRS_UNSPECIFIED);
+    ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED);

    if (ret != MEMTX_OK) {
        info->type = SMMU_PTW_ERR_WALK_EABT;
@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn)
    trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn);
 }

-static inline MemTxResult queue_read(SMMUQueue *q, void *data)
+static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd)
 {
    dma_addr_t addr = Q_CONS_ENTRY(q);
+    MemTxResult ret;
+    int i;

-    return dma_memory_read(&address_space_memory, addr, data, q->entry_size,
-                           MEMTXATTRS_UNSPECIFIED);
+    ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd),
+                          MEMTXATTRS_UNSPECIFIED);
+    if (ret != MEMTX_OK) {
+        return ret;
+    }
+    for (i = 0; i < ARRAY_SIZE(cmd->word); i++) {
+        le32_to_cpus(&cmd->word[i]);
+    }
+    return ret;
 }

-static MemTxResult queue_write(SMMUQueue *q, void *data)
+static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in)
 {
    dma_addr_t addr = Q_PROD_ENTRY(q);
    MemTxResult ret;
+    Evt evt = *evt_in;
+    int i;

-    ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size,
+    for (i = 0; i < ARRAY_SIZE(evt.word); i++) {
+        cpu_to_le32s(&evt.word[i]);
+    }
+    ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt),
                           MEMTXATTRS_UNSPECIFIED);
    if (ret != MEMTX_OK) {
        return ret;
@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s)
 static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
                        SMMUEventInfo *event)
 {
-    int ret;
+    int ret, i;

    trace_smmuv3_get_ste(addr);
    /* TODO: guarantee 64-bit single-copy atomicity */
@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
        event->u.f_ste_fetch.addr = addr;
        return -EINVAL;
    }
+    for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
+        le32_to_cpus(&buf->word[i]);
+    }
    return 0;

 }
@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
                       CD *buf, SMMUEventInfo *event)
 {
    dma_addr_t addr = STE_CTXPTR(ste);
-    int ret;
+    int ret, i;

    trace_smmuv3_get_cd(addr);
    /* TODO: guarantee 64-bit single-copy atomicity */
@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
        event->u.f_ste_fetch.addr = addr;
        return -EINVAL;
    }
+    for (i = 0; i < ARRAY_SIZE(buf->word); i++) {
+        le32_to_cpus(&buf->word[i]);
+    }
    return 0;
 }

@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
        return -EINVAL;
    }
    if (s->features & SMMU_FEATURE_2LVL_STE) {
-        int l1_ste_offset, l2_ste_offset, max_l2_ste, span;
+        int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i;
        dma_addr_t l1ptr, l2ptr;
        STEDesc l1std;

@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
            event->u.f_ste_fetch.addr = l1ptr;
            return -EINVAL;
        }
+        for (i = 0; i < ARRAY_SIZE(l1std.word); i++) {
+            le32_to_cpus(&l1std.word[i]);
+        }

        span = L1STD_SPAN(&l1std);

@@ -213,7 +213,7 @@ static void xlnx_zynqmp_create_rpu(MachineState *ms, XlnxZynqMPState *s,
                                   const char *boot_cpu, Error **errp)
 {
    int i;
-    int num_rpus = MIN(ms->smp.cpus - XLNX_ZYNQMP_NUM_APU_CPUS,
+    int num_rpus = MIN((int)(ms->smp.cpus - XLNX_ZYNQMP_NUM_APU_CPUS),
                       XLNX_ZYNQMP_NUM_RPU_CPUS);

    if (num_rpus <= 0) {
@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
    } else {
        s->ctx = qemu_get_aio_context();
    }
-    s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
+    s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s,
+                               &DEVICE(vdev)->mem_reentrancy_guard);
    s->batch_notify_vqs = bitmap_new(conf->num_queues);

    *dataplane = s;
@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
    } else {
        dataplane->ctx = qemu_get_aio_context();
    }
-    dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh,
-                               dataplane);
+    dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh,
+                                       dataplane,
+                                       &DEVICE(xendev)->mem_reentrancy_guard);

    return dataplane;
 }
@@ -763,14 +763,15 @@ static XenBlockDrive *xen_block_drive_create(const char *id,
    drive = g_new0(XenBlockDrive, 1);
    drive->id = g_strdup(id);

-    file_layer = qdict_new();
-    driver_layer = qdict_new();
-
    rc = stat(filename, &st);
    if (rc) {
        error_setg_errno(errp, errno, "Could not stat file '%s'", filename);
        goto done;
    }
+
+    file_layer = qdict_new();
+    driver_layer = qdict_new();
+
    if (S_ISBLK(st.st_mode)) {
        qdict_put_str(file_layer, "driver", "host_device");
    } else {
@@ -778,7 +779,6 @@ static XenBlockDrive *xen_block_drive_create(const char *id,
    }

    qdict_put_str(file_layer, "filename", filename);
-    g_free(filename);

    if (mode && *mode != 'w') {
        qdict_put_bool(file_layer, "read-only", true);
@@ -813,7 +813,6 @@ static XenBlockDrive *xen_block_drive_create(const char *id,
    qdict_put_str(file_layer, "locking", "off");

    qdict_put_str(driver_layer, "driver", driver);
-    g_free(driver);

    qdict_put(driver_layer, "file", file_layer);

@@ -824,6 +823,8 @@ static XenBlockDrive *xen_block_drive_create(const char *id,
    qobject_unref(driver_layer);

 done:
+    g_free(filename);
+    g_free(driver);
    if (*errp) {
        xen_block_drive_destroy(drive, NULL);
        return NULL;
@@ -29,6 +29,7 @@
 #include "chardev/char-fe.h"
 #include "qemu/timer.h"
 #include "qemu/error-report.h"
+#include "exec/tswap.h"

 #define RISCV_DEBUG_HTIF 0
 #define HTIF_DEBUG(fmt, ...)                                                   \
@@ -167,11 +168,11 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
            } else {
                uint64_t syscall[8];
                cpu_physical_memory_read(payload, syscall, sizeof(syscall));
-                if (syscall[0] == PK_SYS_WRITE &&
-                    syscall[1] == HTIF_DEV_CONSOLE &&
-                    syscall[3] == HTIF_CONSOLE_CMD_PUTC) {
+                if (tswap64(syscall[0]) == PK_SYS_WRITE &&
+                    tswap64(syscall[1]) == HTIF_DEV_CONSOLE &&
+                    tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) {
                    uint8_t ch;
-                    cpu_physical_memory_read(syscall[2], &ch, 1);
+                    cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1);
                    qemu_chr_fe_write(&s->chr, &ch, 1);
                    resp = 0x100 | (uint8_t)payload;
                } else {
@@ -190,7 +191,8 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
            s->tohost = 0; /* clear to indicate we read */
            return;
        } else if (cmd == HTIF_CONSOLE_CMD_PUTC) {
-            qemu_chr_fe_write(&s->chr, (uint8_t *)&payload, 1);
+            uint8_t ch = (uint8_t)payload;
+            qemu_chr_fe_write(&s->chr, &ch, 1);
            resp = 0x100 | (uint8_t)payload;
        } else {
            qemu_log("HTIF device %d: unknown command\n", device);
@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
        return;
    }

-    port->bh = qemu_bh_new(flush_queued_data_bh, port);
+    port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port,
+                                   &dev->mem_reentrancy_guard);
    port->elem = NULL;
 }

@@ -197,3 +197,13 @@ void machine_parse_smp_config(MachineState *ms,
        return;
    }
 }
+
+unsigned int machine_topo_get_cores_per_socket(const MachineState *ms)
+{
+    return ms->smp.cores * ms->smp.clusters * ms->smp.dies;
+}
+
+unsigned int machine_topo_get_threads_per_socket(const MachineState *ms)
+{
+    return ms->smp.threads * machine_topo_get_cores_per_socket(ms);
+}
@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = {
    { "e1000e", "migrate-timadj", "off" },
    { "virtio-mem", "x-early-migration", "false" },
    { "migration", "x-preempt-pre-7-2", "true" },
+    { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" },
 };
 const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);

@@ -1332,6 +1333,14 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error *
        }
    } else if (machine_class->default_ram_id && machine->ram_size &&
               numa_uses_legacy_mem()) {
+        if (object_property_find(object_get_objects_root(),
+                                 machine_class->default_ram_id)) {
+            error_setg(errp, "object name '%s' is reserved for the default"
+                " RAM backend, it can't be used for any other purposes."
+                " Change the object's 'id' to something else",
+                machine_class->default_ram_id);
+            return;
+        }
        if (!create_default_memdev(current_machine, mem_path, errp)) {
            return;
        }
@@ -1591,7 +1591,10 @@ static void qxl_set_mode(PCIQXLDevice *d, unsigned int modenr, int loadvm)
    }

    d->guest_slots[0].slot = slot;
-    assert(qxl_add_memslot(d, 0, devmem, QXL_SYNC) == 0);
+    if (qxl_add_memslot(d, 0, devmem, QXL_SYNC) != 0) {
+        qxl_set_guest_bug(d, "device isn't initialized yet");
+        return;
+    }

    d->guest_primary.surface = surface;
    qxl_create_guest_primary(d, 0, QXL_SYNC);
@@ -2201,11 +2204,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)

    qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);

-    qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl);
+    qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl,
+                                          &DEVICE(qxl)->mem_reentrancy_guard);
    qxl_reset_state(qxl);

-    qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl);
-    qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd);
+    qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl,
+                                              &DEVICE(qxl)->mem_reentrancy_guard);
+    qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd,
+                                             &DEVICE(qxl)->mem_reentrancy_guard);
 }

 static void qxl_realize_primary(PCIDevice *dev, Error **errp)
@@ -498,6 +498,8 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
    struct virtio_gpu_resource_flush rf;
    struct virtio_gpu_scanout *scanout;
    pixman_region16_t flush_region;
+    bool within_bounds = false;
+    bool update_submitted = false;
    int i;

    VIRTIO_GPU_FILL_CMD(rf);
@@ -518,13 +520,28 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
                rf.r.x < scanout->x + scanout->width &&
                rf.r.x + rf.r.width >= scanout->x &&
                rf.r.y < scanout->y + scanout->height &&
-                rf.r.y + rf.r.height >= scanout->y &&
-                console_has_gl(scanout->con)) {
-                dpy_gl_update(scanout->con, 0, 0, scanout->width,
-                              scanout->height);
+                rf.r.y + rf.r.height >= scanout->y) {
+                within_bounds = true;
+
+                if (console_has_gl(scanout->con)) {
+                    dpy_gl_update(scanout->con, 0, 0, scanout->width,
+                                  scanout->height);
+                    update_submitted = true;
+                }
            }
        }
-        return;
+
+        if (update_submitted) {
+            return;
+        }
+        if (!within_bounds) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: flush bounds outside scanouts"
+                          " bounds for flush %d: %d %d %d %d\n",
+                          __func__, rf.resource_id, rf.r.x, rf.r.y,
+                          rf.r.width, rf.r.height);
+            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+            return;
+        }
    }

    if (!res->blob &&
@@ -1339,8 +1356,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)

    g->ctrl_vq = virtio_get_queue(vdev, 0);
    g->cursor_vq = virtio_get_queue(vdev, 1);
-    g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
-    g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
+    g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g,
+                                     &qdev->mem_reentrancy_guard);
+    g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g,
+                                       &qdev->mem_reentrancy_guard);
    QTAILQ_INIT(&g->reslist);
    QTAILQ_INIT(&g->cmdq);
    QTAILQ_INIT(&g->fenceq);
@@ -168,6 +168,11 @@ static inline int stream_idle(struct Stream *s)
    return !!(s->regs[R_DMASR] & DMASR_IDLE);
 }

+static inline int stream_halted(struct Stream *s)
+{
+    return !!(s->regs[R_DMASR] & DMASR_HALTED);
+}
+
 static void stream_reset(struct Stream *s)
 {
    s->regs[R_DMASR] = DMASR_HALTED;  /* starts up halted.  */
@@ -269,7 +274,7 @@ static void stream_process_mem2s(struct Stream *s, StreamSink *tx_data_dev,
    uint64_t addr;
    bool eop;

-    if (!stream_running(s) || stream_idle(s)) {
+    if (!stream_running(s) || stream_idle(s) || stream_halted(s)) {
        return;
    }

@@ -326,7 +331,7 @@ static size_t stream_process_s2mem(struct Stream *s, unsigned char *buf,
    unsigned int rxlen;
    size_t pos = 0;

-    if (!stream_running(s) || stream_idle(s)) {
+    if (!stream_running(s) || stream_idle(s) || stream_halted(s)) {
        return 0;
    }

@@ -407,7 +412,7 @@ xilinx_axidma_data_stream_can_push(StreamSink *obj,
    XilinxAXIDMAStreamSink *ds = XILINX_AXI_DMA_DATA_STREAM(obj);
    struct Stream *s = &ds->dma->streams[1];

-    if (!stream_running(s) || stream_idle(s)) {
+    if (!stream_running(s) || stream_idle(s) || stream_halted(s)) {
        ds->dma->notify = notify;
        ds->dma->notify_opaque = notify_opaque;
        return false;
@@ -122,6 +122,7 @@ static FWCfgState *create_fw_cfg(MachineState *ms)
 {
    FWCfgState *fw_cfg;
    uint64_t val;
+    const char qemu_version[] = QEMU_VERSION;

    fw_cfg = fw_cfg_init_mem(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4);
    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, ms->smp.cpus);
@@ -147,6 +148,10 @@ static FWCfgState *create_fw_cfg(MachineState *ms)
    fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ms->boot_config.order[0]);
    qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);

+    fw_cfg_add_file(fw_cfg, "/etc/qemu-version",
+                    g_memdup(qemu_version, sizeof(qemu_version)),
+                    sizeof(qemu_version));
+
    return fw_cfg;
 }

@@ -417,10 +422,16 @@ static void hppa_machine_reset(MachineState *ms, ShutdownCause reason)

    /* Start all CPUs at the firmware entry point.
     *  Monarch CPU will initialize firmware, secondary CPUs
-     *  will enter a small idle look and wait for rendevouz. */
+     *  will enter a small idle loop and wait for rendevouz. */
    for (i = 0; i < smp_cpus; i++) {
-        cpu_set_pc(CPU(cpu[i]), firmware_entry);
+        CPUState *cs = CPU(cpu[i]);
+
+        cpu_set_pc(cs, firmware_entry);
+        cpu[i]->env.psw = PSW_Q;
        cpu[i]->env.gr[5] = CPU_HPA + i * 0x1000;
+
+        cs->exception_index = -1;
+        cs->halted = 0;
    }

    /* already initialized by machine_hppa_init()? */
@@ -226,7 +226,7 @@ static int aspeed_i2c_dma_read(AspeedI2CBus *bus, uint8_t *data)
    return 0;
 }

-static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start)
+static int aspeed_i2c_bus_send(AspeedI2CBus *bus)
 {
    AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(bus->controller);
    int ret = -1;
@@ -236,10 +236,10 @@ static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start)
    uint32_t reg_byte_buf = aspeed_i2c_bus_byte_buf_offset(bus);
    uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);
    int pool_tx_count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl,
-                                                TX_COUNT);
+                                                TX_COUNT) + 1;

    if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) {
-        for (i = pool_start; i < pool_tx_count; i++) {
+        for (i = 0; i < pool_tx_count; i++) {
            uint8_t *pool_base = aic->bus_pool_base(bus);

            trace_aspeed_i2c_bus_send("BUF", i + 1, pool_tx_count,
@@ -273,7 +273,7 @@ static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start)
        }
        SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, TX_DMA_EN, 0);
    } else {
-        trace_aspeed_i2c_bus_send("BYTE", pool_start, 1,
+        trace_aspeed_i2c_bus_send("BYTE", 0, 1,
                                  bus->regs[reg_byte_buf]);
        ret = i2c_send(bus->bus, bus->regs[reg_byte_buf]);
    }
@@ -293,7 +293,7 @@ static void aspeed_i2c_bus_recv(AspeedI2CBus *bus)
    uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);
    uint32_t reg_dma_addr = aspeed_i2c_bus_dma_addr_offset(bus);
    int pool_rx_count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl,
-                                                RX_COUNT);
+                                                RX_SIZE) + 1;

    if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_BUFF_EN)) {
        uint8_t *pool_base = aic->bus_pool_base(bus);
@@ -418,7 +418,7 @@ static void aspeed_i2c_bus_cmd_dump(AspeedI2CBus *bus)
    uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus);
    uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);
    if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_BUFF_EN)) {
-        count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT);
+        count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT) + 1;
    } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_DMA_EN)) {
        count = bus->regs[reg_dma_len];
    } else { /* BYTE mode */
@@ -446,10 +446,8 @@ static void aspeed_i2c_bus_cmd_dump(AspeedI2CBus *bus)
 */
 static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value)
 {
-    uint8_t pool_start = 0;
    uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus);
    uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus);
-    uint32_t reg_pool_ctrl = aspeed_i2c_bus_pool_ctrl_offset(bus);
    uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);

    if (!aspeed_i2c_check_sram(bus)) {
@@ -483,27 +481,11 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value)

        SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_START_CMD, 0);

-        /*
-         * The START command is also a TX command, as the slave
-         * address is sent on the bus. Drop the TX flag if nothing
-         * else needs to be sent in this sequence.
-         */
-        if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) {
-            if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT)
-                == 1) {
-                SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0);
-            } else {
-                /*
-                 * Increase the start index in the TX pool buffer to
-                 * skip the address byte.
-                 */
-                pool_start++;
-            }
-        } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN)) {
+        if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN)) {
            if (bus->regs[reg_dma_len] == 0) {
                SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0);
            }
-        } else {
+        } else if (!SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) {
            SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0);
        }

@@ -520,7 +502,7 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value)

    if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_TX_CMD)) {
        aspeed_i2c_set_state(bus, I2CD_MTXD);
-        if (aspeed_i2c_bus_send(bus, pool_start)) {
+        if (aspeed_i2c_bus_send(bus)) {
            SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, TX_NAK, 1);
            i2c_end_transfer(bus->bus);
        } else {
@@ -70,7 +70,7 @@ static int bitbang_i2c_ret(bitbang_i2c_interface *i2c, int level)
    return level & i2c->last_data;
 }

-/* Leave device data pin unodified.  */
+/* Leave device data pin unmodified.  */
 static int bitbang_i2c_nop(bitbang_i2c_interface *i2c)
 {
    return bitbang_i2c_ret(i2c, i2c->device_out);
@@ -5,7 +5,7 @@ bitbang_i2c_state(const char *old_state, const char *new_state) "state %s -> %s"
 bitbang_i2c_addr(uint8_t addr) "Address 0x%02x"
 bitbang_i2c_send(uint8_t byte) "TX byte 0x%02x"
 bitbang_i2c_recv(uint8_t byte) "RX byte 0x%02x"
-bitbang_i2c_data(unsigned dat, unsigned clk, unsigned old_out, unsigned new_out) "dat %u clk %u out %u -> %u"
+bitbang_i2c_data(unsigned clk, unsigned dat, unsigned old_out, unsigned new_out) "clk %u dat %u out %u -> %u"

 # core.c

@@ -755,6 +755,8 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
        return -VTD_FR_PASID_TABLE_INV;
    }

+    pdire->val = le64_to_cpu(pdire->val);
+
    return 0;
 }

@@ -779,6 +781,9 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
                        pe, entry_size, MEMTXATTRS_UNSPECIFIED)) {
        return -VTD_FR_PASID_TABLE_INV;
    }
+    for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) {
+        pe->val[i] = le64_to_cpu(pe->val[i]);
+    }

    /* Do translation type check */
    if (!vtd_pe_type_check(x86_iommu, pe)) {
@@ -3322,14 +3327,15 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
        return -VTD_FR_IR_ROOT_INVAL;
    }

-    trace_vtd_ir_irte_get(index, le64_to_cpu(entry->data[1]),
-                          le64_to_cpu(entry->data[0]));
+    entry->data[0] = le64_to_cpu(entry->data[0]);
+    entry->data[1] = le64_to_cpu(entry->data[1]);
+
+    trace_vtd_ir_irte_get(index, entry->data[1], entry->data[0]);

    if (!entry->irte.present) {
        error_report_once("%s: detected non-present IRTE "
                          "(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")",
-                          __func__, index, le64_to_cpu(entry->data[1]),
-                          le64_to_cpu(entry->data[0]));
+                          __func__, index, entry->data[1], entry->data[0]);
        return -VTD_FR_IR_ENTRY_P;
    }

@@ -3337,14 +3343,13 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
        entry->irte.__reserved_2) {
        error_report_once("%s: detected non-zero reserved IRTE "
                          "(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")",
-                          __func__, index, le64_to_cpu(entry->data[1]),
-                          le64_to_cpu(entry->data[0]));
+                          __func__, index, entry->data[1], entry->data[0]);
        return -VTD_FR_IR_IRTE_RSVD;
    }

    if (sid != X86_IOMMU_SID_INVALID) {
        /* Validate IRTE SID */
-        source_id = le32_to_cpu(entry->irte.source_id);
+        source_id = entry->irte.source_id;
        switch (entry->irte.sid_vtype) {
        case VTD_SVT_NONE:
            break;
@@ -3398,7 +3403,7 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
    irq->trigger_mode = irte.irte.trigger_mode;
    irq->vector = irte.irte.vector;
    irq->delivery_mode = irte.irte.delivery_mode;
-    irq->dest = le32_to_cpu(irte.irte.dest_id);
+    irq->dest = irte.irte.dest_id;
    if (!iommu->intr_eime) {
 #define  VTD_IR_APIC_DEST_MASK         (0xff00ULL)
 #define  VTD_IR_APIC_DEST_SHIFT        (8)
@@ -3453,7 +3458,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
        goto out;
    }

-    index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l);
+    index = addr.addr.index_h << 15 | addr.addr.index_l;

 #define  VTD_IR_MSI_DATA_SUBHANDLE       (0x0000ffff)
 #define  VTD_IR_MSI_DATA_RESERVED        (0xffff0000)
@@ -321,12 +321,21 @@ typedef enum VTDFaultReason {

 /* Interrupt Entry Cache Invalidation Descriptor: VT-d 6.5.2.7. */
 struct VTDInvDescIEC {
+#if HOST_BIG_ENDIAN
+    uint64_t reserved_2:16;
+    uint64_t index:16;          /* Start index to invalidate */
+    uint64_t index_mask:5;      /* 2^N for continuous int invalidation */
+    uint64_t resved_1:22;
+    uint64_t granularity:1;     /* If set, it's global IR invalidation */
+    uint64_t type:4;            /* Should always be 0x4 */
+#else
    uint32_t type:4;            /* Should always be 0x4 */
    uint32_t granularity:1;     /* If set, it's global IR invalidation */
    uint32_t resved_1:22;
    uint32_t index_mask:5;      /* 2^N for continuous int invalidation */
    uint32_t index:16;          /* Start index to invalidate */
    uint32_t reserved_2:16;
+#endif
 };
 typedef struct VTDInvDescIEC VTDInvDescIEC;

@@ -1587,7 +1587,7 @@ static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
 found:
    pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
    if (gsi >= 0) {
-        assert(gsi <= IOAPIC_NUM_PINS);
+        assert(gsi < IOAPIC_NUM_PINS);
        s->gsi_pirq[gsi] = pirq;
    }
    s->pirq[pirq].gsi = gsi;
@@ -1601,7 +1601,7 @@ bool xen_evtchn_set_gsi(int gsi, int level)

    assert(qemu_mutex_iothread_locked());

-    if (!s || gsi < 0 || gsi > IOAPIC_NUM_PINS) {
+    if (!s || gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
        return false;
    }

@@ -1688,7 +1688,7 @@ static struct qemu_xs_handle *xs_be_open(void)
    XenXenstoreState *s = xen_xenstore_singleton;
    struct qemu_xs_handle *h;

-    if (!s && !s->impl) {
+    if (!s || !s->impl) {
        errno = -ENOSYS;
        return NULL;
    }
@@ -63,7 +63,7 @@ void x86_iommu_irq_to_msi_message(X86IOMMUIrq *irq, MSIMessage *msg_out)
    msg.redir_hint = irq->redir_hint;
    msg.dest = irq->dest;
    msg.__addr_hi = irq->dest & 0xffffff00;
-    msg.__addr_head = cpu_to_le32(0xfee);
+    msg.__addr_head = 0xfee;
    /* Keep this from original MSI address bits */
    msg.__not_used = irq->msi_addr_last_bits;

@@ -41,9 +41,10 @@
 #include "trace.h"

 static void check_cmd(AHCIState *s, int port);
-static int handle_cmd(AHCIState *s, int port, uint8_t slot);
+static void handle_cmd(AHCIState *s, int port, uint8_t slot);
 static void ahci_reset_port(AHCIState *s, int port);
-static bool ahci_write_fis_d2h(AHCIDevice *ad);
+static bool ahci_write_fis_d2h(AHCIDevice *ad, bool d2h_fis_i);
+static void ahci_clear_cmd_issue(AHCIDevice *ad, uint8_t slot);
 static void ahci_init_d2h(AHCIDevice *ad);
 static int ahci_dma_prepare_buf(const IDEDMA *dma, int32_t limit);
 static bool ahci_map_clb_address(AHCIDevice *ad);
@@ -328,6 +329,11 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
        ahci_check_irq(s);
        break;
    case AHCI_PORT_REG_CMD:
+        if ((pr->cmd & PORT_CMD_START) && !(val & PORT_CMD_START)) {
+            pr->scr_act = 0;
+            pr->cmd_issue = 0;
+        }
+
        /* Block any Read-only fields from being set;
         * including LIST_ON and FIS_ON.
         * The spec requires to set ICC bits to zero after the ICC change
@@ -591,9 +597,8 @@ static void check_cmd(AHCIState *s, int port)

    if ((pr->cmd & PORT_CMD_START) && pr->cmd_issue) {
        for (slot = 0; (slot < 32) && pr->cmd_issue; slot++) {
-            if ((pr->cmd_issue & (1U << slot)) &&
-                !handle_cmd(s, port, slot)) {
-                pr->cmd_issue &= ~(1U << slot);
+            if (pr->cmd_issue & (1U << slot)) {
+                handle_cmd(s, port, slot);
            }
        }
    }
@@ -618,7 +623,7 @@ static void ahci_init_d2h(AHCIDevice *ad)
        return;
    }

-    if (ahci_write_fis_d2h(ad)) {
+    if (ahci_write_fis_d2h(ad, true)) {
        ad->init_d2h_sent = true;
        /* We're emulating receiving the first Reg H2D Fis from the device;
         * Update the SIG register, but otherwise proceed as normal. */
@@ -801,8 +806,14 @@ static void ahci_write_fis_sdb(AHCIState *s, NCQTransferState *ncq_tfs)
    pr->scr_act &= ~ad->finished;
    ad->finished = 0;

-    /* Trigger IRQ if interrupt bit is set (which currently, it always is) */
-    if (sdb_fis->flags & 0x40) {
+    /*
+     * TFES IRQ is always raised if ERR_STAT is set, regardless of I bit.
+     * If ERR_STAT is not set, trigger SDBS IRQ if interrupt bit is set
+     * (which currently, it always is).
+     */
+    if (sdb_fis->status & ERR_STAT) {
+        ahci_trigger_irq(s, ad, AHCI_PORT_IRQ_BIT_TFES);
+    } else if (sdb_fis->flags & 0x40) {
        ahci_trigger_irq(s, ad, AHCI_PORT_IRQ_BIT_SDBS);
    }
 }
@@ -850,7 +861,7 @@ static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len, bool pio_fis_i)
    }
 }

-static bool ahci_write_fis_d2h(AHCIDevice *ad)
+static bool ahci_write_fis_d2h(AHCIDevice *ad, bool d2h_fis_i)
 {
    AHCIPortRegs *pr = &ad->port_regs;
    uint8_t *d2h_fis;
@@ -864,7 +875,7 @@ static bool ahci_write_fis_d2h(AHCIDevice *ad)
    d2h_fis = &ad->res_fis[RES_FIS_RFIS];

    d2h_fis[0] = SATA_FIS_TYPE_REGISTER_D2H;
-    d2h_fis[1] = (1 << 6); /* interrupt bit */
+    d2h_fis[1] = d2h_fis_i ? (1 << 6) : 0; /* interrupt bit */
    d2h_fis[2] = s->status;
    d2h_fis[3] = s->error;

@@ -890,7 +901,10 @@ static bool ahci_write_fis_d2h(AHCIDevice *ad)
        ahci_trigger_irq(ad->hba, ad, AHCI_PORT_IRQ_BIT_TFES);
    }

-    ahci_trigger_irq(ad->hba, ad, AHCI_PORT_IRQ_BIT_DHRS);
+    if (d2h_fis_i) {
+        ahci_trigger_irq(ad->hba, ad, AHCI_PORT_IRQ_BIT_DHRS);
+    }
+
    return true;
 }

@@ -998,7 +1012,6 @@ static void ncq_err(NCQTransferState *ncq_tfs)

    ide_state->error = ABRT_ERR;
    ide_state->status = READY_STAT | ERR_STAT;
-    ncq_tfs->drive->port_regs.scr_err |= (1 << ncq_tfs->tag);
    qemu_sglist_destroy(&ncq_tfs->sglist);
    ncq_tfs->used = 0;
 }
@@ -1008,7 +1021,7 @@ static void ncq_finish(NCQTransferState *ncq_tfs)
    /* If we didn't error out, set our finished bit. Errored commands
     * do not get a bit set for the SDB FIS ACT register, nor do they
     * clear the outstanding bit in scr_act (PxSACT). */
-    if (!(ncq_tfs->drive->port_regs.scr_err & (1 << ncq_tfs->tag))) {
+    if (ncq_tfs->used) {
        ncq_tfs->drive->finished |= (1 << ncq_tfs->tag);
    }

@@ -1120,6 +1133,24 @@ static void process_ncq_command(AHCIState *s, int port, const uint8_t *cmd_fis,
        return;
    }

+    /*
+     * A NCQ command clears the bit in PxCI after the command has been QUEUED
+     * successfully (ERROR not set, BUSY and DRQ cleared).
+     *
+     * For NCQ commands, PxCI will always be cleared here.
+     *
+     * (Once the NCQ command is COMPLETED, the device will send a SDB FIS with
+     * the interrupt bit set, which will clear PxSACT and raise an interrupt.)
+     */
+    ahci_clear_cmd_issue(ad, slot);
+
+    /*
+     * In reality, for NCQ commands, PxCI is cleared after receiving a D2H FIS
+     * without the interrupt bit set, but since ahci_write_fis_d2h() can raise
+     * an IRQ on error, we need to call them in reverse order.
+     */
+    ahci_write_fis_d2h(ad, false);
+
    ncq_tfs->used = 1;
    ncq_tfs->drive = ad;
    ncq_tfs->slot = slot;
@@ -1192,6 +1223,7 @@ static void handle_reg_h2d_fis(AHCIState *s, int port,
 {
    IDEState *ide_state = &s->dev[port].port.ifs[0];
    AHCICmdHdr *cmd = get_cmd_header(s, port, slot);
+    AHCIDevice *ad = &s->dev[port];
    uint16_t opts = le16_to_cpu(cmd->opts);

    if (cmd_fis[1] & 0x0F) {
@@ -1268,11 +1300,19 @@ static void handle_reg_h2d_fis(AHCIState *s, int port,
    /* Reset transferred byte counter */
    cmd->status = 0;

+    /*
+     * A non-NCQ command clears the bit in PxCI after the command has COMPLETED
+     * successfully (ERROR not set, BUSY and DRQ cleared).
+     *
+     * For non-NCQ commands, PxCI will always be cleared by ahci_cmd_done().
+     */
+    ad->busy_slot = slot;
+
    /* We're ready to process the command in FIS byte 2. */
    ide_bus_exec_cmd(&s->dev[port].port, cmd_fis[2]);
 }

-static int handle_cmd(AHCIState *s, int port, uint8_t slot)
+static void handle_cmd(AHCIState *s, int port, uint8_t slot)
 {
    IDEState *ide_state;
    uint64_t tbl_addr;
@@ -1283,12 +1323,12 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
    if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
        /* Engine currently busy, try again later */
        trace_handle_cmd_busy(s, port);
-        return -1;
+        return;
    }

    if (!s->dev[port].lst) {
        trace_handle_cmd_nolist(s, port);
-        return -1;
+        return;
    }
    cmd = get_cmd_header(s, port, slot);
    /* remember current slot handle for later */
@@ -1298,7 +1338,7 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
    ide_state = &s->dev[port].port.ifs[0];
    if (!ide_state->blk) {
        trace_handle_cmd_badport(s, port);
-        return -1;
+        return;
    }

    tbl_addr = le64_to_cpu(cmd->tbl_addr);
@@ -1307,7 +1347,7 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
                             DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
    if (!cmd_fis) {
        trace_handle_cmd_badfis(s, port);
-        return -1;
+        return;
    } else if (cmd_len != 0x80) {
        ahci_trigger_irq(s, &s->dev[port], AHCI_PORT_IRQ_BIT_HBFS);
        trace_handle_cmd_badmap(s, port, cmd_len);
@@ -1331,15 +1371,6 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
 out:
    dma_memory_unmap(s->as, cmd_fis, cmd_len, DMA_DIRECTION_TO_DEVICE,
                     cmd_len);
-
-    if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
-        /* async command, complete later */
-        s->dev[port].busy_slot = slot;
-        return -1;
-    }
-
-    /* done handling the command */
-    return 0;
 }

 /* Transfer PIO data between RAM and device */
@@ -1493,23 +1524,41 @@ static int ahci_dma_rw_buf(const IDEDMA *dma, bool is_write)
    return 1;
 }

+static void ahci_clear_cmd_issue(AHCIDevice *ad, uint8_t slot)
+{
+    IDEState *ide_state = &ad->port.ifs[0];
+
+    if (!(ide_state->status & ERR_STAT) &&
+        !(ide_state->status & (BUSY_STAT | DRQ_STAT))) {
+        ad->port_regs.cmd_issue &= ~(1 << slot);
+    }
+}
+
+/* Non-NCQ command is done - This function is never called for NCQ commands. */
 static void ahci_cmd_done(const IDEDMA *dma)
 {
    AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
+    IDEState *ide_state = &ad->port.ifs[0];

    trace_ahci_cmd_done(ad->hba, ad->port_no);

    /* no longer busy */
    if (ad->busy_slot != -1) {
-        ad->port_regs.cmd_issue &= ~(1 << ad->busy_slot);
+        ahci_clear_cmd_issue(ad, ad->busy_slot);
        ad->busy_slot = -1;
    }

-    /* update d2h status */
-    ahci_write_fis_d2h(ad);
+    /*
+     * In reality, for non-NCQ commands, PxCI is cleared after receiving a D2H
+     * FIS with the interrupt bit set, but since ahci_write_fis_d2h() will raise
+     * an IRQ, we need to call them in reverse order.
+     */
+    ahci_write_fis_d2h(ad, true);

-    if (ad->port_regs.cmd_issue && !ad->check_bh) {
-        ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
+    if (!(ide_state->status & ERR_STAT) &&
+        ad->port_regs.cmd_issue && !ad->check_bh) {
+        ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
+                                           &ad->mem_reentrancy_guard);
        qemu_bh_schedule(ad->check_bh);
    }
 }
@@ -321,6 +321,7 @@ struct AHCIDevice {
    bool init_d2h_sent;
    AHCICmdHdr *cur_cmd;
    NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
+    MemReentrancyGuard mem_reentrancy_guard;
 };

 struct AHCIPCIState {
@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim(
        BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
 {
    IDEState *s = opaque;
+    IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
    TrimAIOCB *iocb;

    /* Paired with a decrement in ide_trim_bh_cb() */
@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim(

    iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
    iocb->s = s;
-    iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
+    iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
+                                   &DEVICE(dev)->mem_reentrancy_guard);
    iocb->ret = 0;
    iocb->qiov = qiov;
    iocb->i = -1;
@@ -531,9 +533,9 @@ BlockAIOCB *ide_issue_trim(

 void ide_abort_command(IDEState *s)
 {
-    ide_transfer_stop(s);
    s->status = READY_STAT | ERR_STAT;
    s->error = ABRT_ERR;
+    ide_transfer_stop(s);
 }

 static void ide_set_retry(IDEState *s)
@@ -118,7 +118,7 @@ static void piix_ide_reset(DeviceState *dev)
    pci_set_word(pci_conf + PCI_COMMAND, 0x0000);
    pci_set_word(pci_conf + PCI_STATUS,
                 PCI_STATUS_DEVSEL_MEDIUM | PCI_STATUS_FAST_BACK);
-    pci_set_byte(pci_conf + 0x20, 0x01);  /* BMIBA: 20-23h */
+    pci_set_long(pci_conf + 0x20, 0x1);  /* BMIBA: 20-23h */
 }

 static bool pci_piix_init_bus(PCIIDEState *d, unsigned i, Error **errp)
@@ -49,12 +49,9 @@ static void aw_a10_pic_update(AwA10PICState *s)
 static void aw_a10_pic_set_irq(void *opaque, int irq, int level)
 {
    AwA10PICState *s = opaque;
+    uint32_t *pending_reg = &s->irq_pending[irq / 32];

-    if (level) {
-        set_bit(irq % 32, (void *)&s->irq_pending[irq / 32]);
-    } else {
-        clear_bit(irq % 32, (void *)&s->irq_pending[irq / 32]);
-    }
+    *pending_reg = deposit32(*pending_reg, irq % 32, 1, !!level);
    aw_a10_pic_update(s);
 }

@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp)
    memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi",
                          APIC_SPACE_SIZE);

+    /*
+     * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can
+     * write back to apic-msi. As such mark the apic-msi region re-entrancy
+     * safe.
+     */
+    s->io_memory.disable_reentrancy_guard = true;
+
    s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s);
    local_apics[s->id] = s;

@@ -215,6 +215,10 @@ static void loongarch_ipi_init(Object *obj)
    for (cpu = 0; cpu < MAX_IPI_CORE_NUM; cpu++) {
        memory_region_init_io(&s->ipi_iocsr_mem[cpu], obj, &loongarch_ipi_ops,
                            &lams->ipi_core[cpu], "loongarch_ipi_iocsr", 0x48);
+
+        /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */
+        s->ipi_iocsr_mem[cpu].disable_reentrancy_guard = true;
+
        sysbus_init_mmio(sbd, &s->ipi_iocsr_mem[cpu]);

        memory_region_init_io(&s->ipi64_iocsr_mem[cpu], obj, &loongarch_ipi64_ops,
@@ -64,13 +64,13 @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
    uint64_t next;
    uint64_t diff;

-    uint64_t rtc_r = cpu_riscv_read_rtc(mtimer);
+    uint64_t rtc = cpu_riscv_read_rtc(mtimer);

    /* Compute the relative hartid w.r.t the socket */
    hartid = hartid - mtimer->hartid_base;

    mtimer->timecmp[hartid] = value;
-    if (mtimer->timecmp[hartid] <= rtc_r) {
+    if (mtimer->timecmp[hartid] <= rtc) {
        /*
         * If we're setting an MTIMECMP value in the "past",
         * immediately raise the timer interrupt
@@ -81,7 +81,7 @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,

    /* otherwise, set up the future timer interrupt */
    qemu_irq_lower(mtimer->timer_irqs[hartid]);
-    diff = mtimer->timecmp[hartid] - rtc_r;
+    diff = mtimer->timecmp[hartid] - rtc;
    /* back to ns (note args switched in muldiv64) */
    uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);

@@ -208,11 +208,12 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
        return;
    } else if (addr == mtimer->time_base || addr == mtimer->time_base + 4) {
        uint64_t rtc_r = cpu_riscv_read_rtc_raw(mtimer->timebase_freq);
+        uint64_t rtc = cpu_riscv_read_rtc(mtimer);

        if (addr == mtimer->time_base) {
            if (size == 4) {
                /* time_lo for RV32/RV64 */
-                mtimer->time_delta = ((rtc_r & ~0xFFFFFFFFULL) | value) - rtc_r;
+                mtimer->time_delta = ((rtc & ~0xFFFFFFFFULL) | value) - rtc_r;
            } else {
                /* time for RV64 */
                mtimer->time_delta = value - rtc_r;
@@ -220,7 +221,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
        } else {
            if (size == 4) {
                /* time_hi for RV32/RV64 */
-                mtimer->time_delta = (value << 32 | (rtc_r & 0xFFFFFFFF)) - rtc_r;
+                mtimer->time_delta = (value << 32 | (rtc & 0xFFFFFFFF)) - rtc_r;
            } else {
                qemu_log_mask(LOG_GUEST_ERROR,
                              "aclint-mtimer: invalid time_hi write: %08x",
@@ -29,7 +29,6 @@
 #include "qemu/datadir.h"
 #include "qapi/error.h"
 #include "elf.h"
-#include "kvm_mips.h"
 #include "hw/char/serial.h"
 #include "hw/intc/loongson_liointc.h"
 #include "hw/mips/mips.h"
@@ -617,7 +616,6 @@ static void loongson3v_machine_class_init(ObjectClass *oc, void *data)
    mc->max_cpus = LOONGSON_MAX_VCPUS;
    mc->default_ram_id = "loongson3.highram";
    mc->default_ram_size = 1600 * MiB;
-    mc->kvm_type = mips_kvm_type;
    mc->minimum_page_bits = 14;
 }

@@ -629,9 +629,9 @@ static void bl_setup_gt64120_jump_kernel(void **p, uint64_t run_addr,

    /* Bus endianess is always reversed */
 #if TARGET_BIG_ENDIAN
-#define cpu_to_gt32 cpu_to_le32
+#define cpu_to_gt32(x) (x)
 #else
-#define cpu_to_gt32 cpu_to_be32
+#define cpu_to_gt32(x) bswap32(x)
 #endif

    /* setup MEM-to-PCI0 mapping as done by YAMON */
@@ -189,7 +189,7 @@ static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode,
                              bool acc_mode)
 {
    struct iovec iov[ASPEED_HACE_MAX_SG];
-    g_autofree uint8_t *digest_buf;
+    g_autofree uint8_t *digest_buf = NULL;
    size_t digest_len = 0;
    int niov = 0;
    int i;
@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj)

    memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s,
                          TYPE_BCM2835_PROPERTY, 0x10);
+
+    /*
+     * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from
+     * iomem. As such, mark iomem as re-entracy safe.
+     */
+    s->iomem.disable_reentrancy_guard = true;
+
    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
    sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq);
 }
@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp)
    sysbus_init_mmio(sbd, &s->iomem);

    sysbus_init_irq(sbd, &s->irq);
-    s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s);
-    s->seed_bh = qemu_bh_new(imx_rngc_seed, s);
+    s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s,
+                                          &dev->mem_reentrancy_guard);
+    s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s,
+                                     &dev->mem_reentrancy_guard);
 }

 static void imx_rngc_reset(DeviceState *dev)
@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp)
 {
    DBDMAState *s = MAC_DBDMA(dev);

-    s->bh = qemu_bh_new(DBDMA_run_bh, s);
+    s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard);
 }

 static void mac_dbdma_class_init(ObjectClass *oc, void *data)
@@ -350,8 +350,13 @@ static void allwinner_sun8i_emac_get_desc(AwSun8iEmacState *s,
                                          FrameDescriptor *desc,
                                          uint32_t phys_addr)
 {
-    dma_memory_read(&s->dma_as, phys_addr, desc, sizeof(*desc),
+    uint32_t desc_words[4];
+    dma_memory_read(&s->dma_as, phys_addr, &desc_words, sizeof(desc_words),
                    MEMTXATTRS_UNSPECIFIED);
+    desc->status = le32_to_cpu(desc_words[0]);
+    desc->status2 = le32_to_cpu(desc_words[1]);
+    desc->addr = le32_to_cpu(desc_words[2]);
+    desc->next = le32_to_cpu(desc_words[3]);
 }

 static uint32_t allwinner_sun8i_emac_next_desc(AwSun8iEmacState *s,
@@ -400,10 +405,15 @@ static uint32_t allwinner_sun8i_emac_tx_desc(AwSun8iEmacState *s,
 }

 static void allwinner_sun8i_emac_flush_desc(AwSun8iEmacState *s,
-                                            FrameDescriptor *desc,
+                                            const FrameDescriptor *desc,
                                            uint32_t phys_addr)
 {
-    dma_memory_write(&s->dma_as, phys_addr, desc, sizeof(*desc),
+    uint32_t desc_words[4];
+    desc_words[0] = cpu_to_le32(desc->status);
+    desc_words[1] = cpu_to_le32(desc->status2);
+    desc_words[2] = cpu_to_le32(desc->addr);
+    desc_words[3] = cpu_to_le32(desc->next);
+    dma_memory_write(&s->dma_as, phys_addr, &desc_words, sizeof(desc_words),
                     MEMTXATTRS_UNSPECIFIED);
 }

@@ -638,8 +648,7 @@ static uint64_t allwinner_sun8i_emac_read(void *opaque, hwaddr offset,
        break;
    case REG_TX_CUR_BUF:        /* Transmit Current Buffer */
        if (s->tx_desc_curr != 0) {
-            dma_memory_read(&s->dma_as, s->tx_desc_curr, &desc, sizeof(desc),
-                            MEMTXATTRS_UNSPECIFIED);
+            allwinner_sun8i_emac_get_desc(s, &desc, s->tx_desc_curr);
            value = desc.addr;
        } else {
            value = 0;
@@ -652,8 +661,7 @@ static uint64_t allwinner_sun8i_emac_read(void *opaque, hwaddr offset,
        break;
    case REG_RX_CUR_BUF:        /* Receive Current Buffer */
        if (s->rx_desc_curr != 0) {
-            dma_memory_read(&s->dma_as, s->rx_desc_curr, &desc, sizeof(desc),
-                            MEMTXATTRS_UNSPECIFIED);
+            allwinner_sun8i_emac_get_desc(s, &desc, s->rx_desc_curr);
            value = desc.addr;
        } else {
            value = 0;
@@ -637,9 +637,8 @@ xmit_seg(E1000State *s)

    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
    e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
-    s->mac_reg[GPTC] = s->mac_reg[TPT];
-    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
-    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
+    e1000x_inc_reg_if_not_full(s->mac_reg, GPTC);
+    e1000x_grow_8reg_if_not_full(s->mac_reg, GOTCL, s->tx.size + 4);
 }

 static void
@@ -827,12 +826,10 @@ receive_filter(E1000State *s, const uint8_t *buf, int size)
    }

    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
-        e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
        return 1;
    }

    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
-        e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
        return 1;
    }

@@ -923,6 +920,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
    size_t desc_offset;
    size_t desc_size;
    size_t total_size;
+    eth_pkt_types_e pkt_type;

    if (!e1000x_hw_rx_enabled(s->mac_reg)) {
        return -1;
@@ -972,6 +970,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
        size -= 4;
    }

+    pkt_type = get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf));
    rdh_start = s->mac_reg[RDH];
    desc_offset = 0;
    total_size = size + e1000x_fcs_len(s->mac_reg);
@@ -1037,7 +1036,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
        }
    } while (desc_offset < total_size);

-    e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
+    e1000x_update_rx_total_stats(s->mac_reg, pkt_type, size, total_size);

    n = E1000_ICS_RXT0;
    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
@@ -711,9 +711,8 @@ e1000e_on_tx_done_update_stats(E1000ECore *core, struct NetTxPkt *tx_pkt)
        g_assert_not_reached();
    }

-    core->mac[GPTC] = core->mac[TPT];
-    core->mac[GOTCL] = core->mac[TOTL];
-    core->mac[GOTCH] = core->mac[TOTH];
+    e1000x_inc_reg_if_not_full(core->mac, GPTC);
+    e1000x_grow_8reg_if_not_full(core->mac, GOTCL, tot_len);
 }

 static void
@@ -1488,24 +1487,10 @@ e1000e_write_to_rx_buffers(E1000ECore *core,
 }

 static void
-e1000e_update_rx_stats(E1000ECore *core,
-                       size_t data_size,
-                       size_t data_fcs_size)
+e1000e_update_rx_stats(E1000ECore *core, size_t pkt_size, size_t pkt_fcs_size)
 {
-    e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
-
-    switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
-    case ETH_PKT_BCAST:
-        e1000x_inc_reg_if_not_full(core->mac, BPRC);
-        break;
-
-    case ETH_PKT_MCAST:
-        e1000x_inc_reg_if_not_full(core->mac, MPRC);
-        break;
-
-    default:
-        break;
-    }
+    eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt);
+    e1000x_update_rx_total_stats(core->mac, pkt_type, pkt_size, pkt_fcs_size);
 }

 static inline bool
@@ -1700,12 +1685,9 @@ static ssize_t
 e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
                        bool has_vnet)
 {
-    static const int maximum_ethernet_hdr_len = (ETH_HLEN + 4);
-
    uint32_t n = 0;
-    uint8_t min_buf[ETH_ZLEN];
+    uint8_t buf[ETH_ZLEN];
    struct iovec min_iov;
-    uint8_t *filter_buf;
    size_t size, orig_size;
    size_t iov_ofs = 0;
    E1000E_RxRing rxr;
@@ -1728,24 +1710,21 @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
        net_rx_pkt_unset_vhdr(core->rx_pkt);
    }

-    filter_buf = iov->iov_base + iov_ofs;
    orig_size = iov_size(iov, iovcnt);
    size = orig_size - iov_ofs;

    /* Pad to minimum Ethernet frame length */
-    if (size < sizeof(min_buf)) {
-        iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size);
-        memset(&min_buf[size], 0, sizeof(min_buf) - size);
+    if (size < sizeof(buf)) {
+        iov_to_buf(iov, iovcnt, iov_ofs, buf, size);
+        memset(&buf[size], 0, sizeof(buf) - size);
        e1000x_inc_reg_if_not_full(core->mac, RUC);
-        min_iov.iov_base = filter_buf = min_buf;
-        min_iov.iov_len = size = sizeof(min_buf);
+        min_iov.iov_base = buf;
+        min_iov.iov_len = size = sizeof(buf);
        iovcnt = 1;
        iov = &min_iov;
        iov_ofs = 0;
-    } else if (iov->iov_len < maximum_ethernet_hdr_len) {
-        /* This is very unlikely, but may happen. */
-        iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len);
-        filter_buf = min_buf;
+    } else {
+        iov_to_buf(iov, iovcnt, iov_ofs, buf, ETH_HLEN + 4);
    }

    /* Discard oversized packets if !LPE and !SBP. */
@@ -1754,9 +1733,9 @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
    }

    net_rx_pkt_set_packet_type(core->rx_pkt,
-        get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf)));
+        get_eth_packet_type(PKT_GET_ETH_HDR(buf)));

-    if (!e1000e_receive_filter(core, filter_buf, size)) {
+    if (!e1000e_receive_filter(core, buf, size)) {
        trace_e1000e_rx_flt_dropped();
        return orig_size;
    }
@@ -80,7 +80,6 @@ bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf)
    f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
    f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
    if (mac[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
-        e1000x_inc_reg_if_not_full(mac, MPRC);
        return true;
    }

@@ -212,23 +211,36 @@ e1000x_rxbufsize(uint32_t rctl)

 void
 e1000x_update_rx_total_stats(uint32_t *mac,
-                             size_t data_size,
-                             size_t data_fcs_size)
+                             eth_pkt_types_e pkt_type,
+                             size_t pkt_size,
+                             size_t pkt_fcs_size)
 {
    static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
                                    PRC1023, PRC1522 };

-    e1000x_increase_size_stats(mac, PRCregs, data_fcs_size);
+    e1000x_increase_size_stats(mac, PRCregs, pkt_fcs_size);
    e1000x_inc_reg_if_not_full(mac, TPR);
-    mac[GPRC] = mac[TPR];
+    e1000x_inc_reg_if_not_full(mac, GPRC);
    /* TOR - Total Octets Received:
    * This register includes bytes received in a packet from the <Destination
    * Address> field through the <CRC> field, inclusively.
    * Always include FCS length (4) in size.
    */
-    e1000x_grow_8reg_if_not_full(mac, TORL, data_size + 4);
-    mac[GORCL] = mac[TORL];
-    mac[GORCH] = mac[TORH];
+    e1000x_grow_8reg_if_not_full(mac, TORL, pkt_size + 4);
+    e1000x_grow_8reg_if_not_full(mac, GORCL, pkt_size + 4);
+
+    switch (pkt_type) {
+    case ETH_PKT_BCAST:
+        e1000x_inc_reg_if_not_full(mac, BPRC);
+        break;
+
+    case ETH_PKT_MCAST:
+        e1000x_inc_reg_if_not_full(mac, MPRC);
+        break;
+
+    default:
+        break;
+    }
 }

 void
@@ -91,8 +91,9 @@ e1000x_update_regs_on_link_up(uint32_t *mac, uint16_t *phy)
 }

 void e1000x_update_rx_total_stats(uint32_t *mac,
-                                  size_t data_size,
-                                  size_t data_fcs_size);
+                                  eth_pkt_types_e pkt_type,
+                                  size_t pkt_size,
+                                  size_t pkt_fcs_size);

 void e1000x_core_prepare_eeprom(uint16_t       *eeprom,
                                const uint16_t *templ,
@@ -67,6 +67,11 @@ typedef struct IGBTxPktVmdqCallbackContext {
    NetClientState *nc;
 } IGBTxPktVmdqCallbackContext;

+typedef struct L2Header {
+    struct eth_header eth;
+    struct vlan_header vlan;
+} L2Header;
+
 static ssize_t
 igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
                     bool has_vnet, bool *external_tx);
@@ -402,7 +407,7 @@ igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
        }
    }

-    if (insert_vlan && e1000x_vlan_enabled(core->mac)) {
+    if (insert_vlan) {
        net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
            core->mac[VET] & 0xffff);
    }
@@ -538,9 +543,8 @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
        g_assert_not_reached();
    }

-    core->mac[GPTC] = core->mac[TPT];
-    core->mac[GOTCL] = core->mac[TOTL];
-    core->mac[GOTCH] = core->mac[TOTH];
+    e1000x_inc_reg_if_not_full(core->mac, GPTC);
+    e1000x_grow_8reg_if_not_full(core->mac, GOTCL, tot_len);

    if (core->mac[MRQC] & 1) {
        uint16_t pool = qn % IGB_NUM_VM_POOLS;
@@ -961,15 +965,16 @@ igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
    return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
 }

-static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
+static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
                                   size_t size, E1000E_RSSInfo *rss_info,
                                   bool *external_tx)
 {
    static const int ta_shift[] = { 4, 3, 2, 0 };
+    const struct eth_header *ehdr = &l2_header->eth;
    uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
    uint16_t queues = 0;
    uint16_t oversized = 0;
-    uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
+    uint16_t vid = be16_to_cpu(l2_header->vlan.h_tci) & VLAN_VID_MASK;
    bool accepted = false;
    int i;

@@ -1227,7 +1232,6 @@ igb_build_rx_metadata(IGBCore *core,
    struct virtio_net_hdr *vhdr;
    bool hasip4, hasip6;
    EthL4HdrProto l4hdr_proto;
-    uint32_t pkt_type;

    *status_flags = E1000_RXD_STAT_DD;

@@ -1266,28 +1270,29 @@ igb_build_rx_metadata(IGBCore *core,
        trace_e1000e_rx_metadata_ack();
    }

-    if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
-        trace_e1000e_rx_metadata_ipv6_filtering_disabled();
-        pkt_type = E1000_RXD_PKT_MAC;
-    } else if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP ||
-               l4hdr_proto == ETH_L4_HDR_PROTO_UDP) {
-        pkt_type = hasip4 ? E1000_RXD_PKT_IP4_XDP : E1000_RXD_PKT_IP6_XDP;
-    } else if (hasip4 || hasip6) {
-        pkt_type = hasip4 ? E1000_RXD_PKT_IP4 : E1000_RXD_PKT_IP6;
-    } else {
-        pkt_type = E1000_RXD_PKT_MAC;
-    }
-
-    trace_e1000e_rx_metadata_pkt_type(pkt_type);
-
    if (pkt_info) {
-        if (rss_info->enabled) {
-            *pkt_info = rss_info->type;
+        *pkt_info = rss_info->enabled ? rss_info->type : 0;
+
+        if (hasip4) {
+            *pkt_info |= E1000_ADVRXD_PKT_IP4;
        }

-        *pkt_info |= (pkt_type << 4);
-    } else {
-        *status_flags |= E1000_RXD_PKT_TYPE(pkt_type);
+        if (hasip6) {
+            *pkt_info |= E1000_ADVRXD_PKT_IP6;
+        }
+
+        switch (l4hdr_proto) {
+        case ETH_L4_HDR_PROTO_TCP:
+            *pkt_info |= E1000_ADVRXD_PKT_TCP;
+            break;
+
+        case ETH_L4_HDR_PROTO_UDP:
+            *pkt_info |= E1000_ADVRXD_PKT_UDP;
+            break;
+
+        default:
+            break;
+        }
    }

    if (hdr_info) {
@@ -1438,29 +1443,17 @@ igb_write_to_rx_buffers(IGBCore *core,

 static void
 igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
-                    size_t data_size, size_t data_fcs_size)
+                    size_t pkt_size, size_t pkt_fcs_size)
 {
-    e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
-
-    switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
-    case ETH_PKT_BCAST:
-        e1000x_inc_reg_if_not_full(core->mac, BPRC);
-        break;
-
-    case ETH_PKT_MCAST:
-        e1000x_inc_reg_if_not_full(core->mac, MPRC);
-        break;
-
-    default:
-        break;
-    }
+    eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt);
+    e1000x_update_rx_total_stats(core->mac, pkt_type, pkt_size, pkt_fcs_size);

    if (core->mac[MRQC] & 1) {
        uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;

-        core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
+        core->mac[PVFGORC0 + (pool * 64)] += pkt_size + 4;
        core->mac[PVFGPRC0 + (pool * 64)]++;
-        if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
+        if (pkt_type == ETH_PKT_MCAST) {
            core->mac[PVFMPRC0 + (pool * 64)]++;
        }
    }
@@ -1602,14 +1595,13 @@ static ssize_t
 igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
                     bool has_vnet, bool *external_tx)
 {
-    static const int maximum_ethernet_hdr_len = (ETH_HLEN + 4);
-
    uint16_t queues = 0;
    uint32_t n = 0;
-    uint8_t min_buf[ETH_ZLEN];
+    union {
+        L2Header l2_header;
+        uint8_t octets[ETH_ZLEN];
+    } buf;
    struct iovec min_iov;
-    struct eth_header *ehdr;
-    uint8_t *filter_buf;
    size_t size, orig_size;
    size_t iov_ofs = 0;
    E1000E_RxRing rxr;
@@ -1635,24 +1627,21 @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
        net_rx_pkt_unset_vhdr(core->rx_pkt);
    }

-    filter_buf = iov->iov_base + iov_ofs;
    orig_size = iov_size(iov, iovcnt);
    size = orig_size - iov_ofs;

    /* Pad to minimum Ethernet frame length */
-    if (size < sizeof(min_buf)) {
-        iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size);
-        memset(&min_buf[size], 0, sizeof(min_buf) - size);
+    if (size < sizeof(buf)) {
+        iov_to_buf(iov, iovcnt, iov_ofs, &buf, size);
+        memset(&buf.octets[size], 0, sizeof(buf) - size);
        e1000x_inc_reg_if_not_full(core->mac, RUC);
-        min_iov.iov_base = filter_buf = min_buf;
-        min_iov.iov_len = size = sizeof(min_buf);
+        min_iov.iov_base = &buf;
+        min_iov.iov_len = size = sizeof(buf);
        iovcnt = 1;
        iov = &min_iov;
        iov_ofs = 0;
-    } else if (iov->iov_len < maximum_ethernet_hdr_len) {
-        /* This is very unlikely, but may happen. */
-        iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len);
-        filter_buf = min_buf;
+    } else {
+        iov_to_buf(iov, iovcnt, iov_ofs, &buf, sizeof(buf.l2_header));
    }

    /* Discard oversized packets if !LPE and !SBP. */
@@ -1660,11 +1649,12 @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
        return orig_size;
    }

-    ehdr = PKT_GET_ETH_HDR(filter_buf);
-    net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
-    net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
+    net_rx_pkt_set_packet_type(core->rx_pkt,
+                               get_eth_packet_type(&buf.l2_header.eth));
+    net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs);

-    queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
+    queues = igb_receive_assign(core, &buf.l2_header, size,
+                                &rss_info, external_tx);
    if (!queues) {
        trace_e1000e_rx_flt_dropped();
        return orig_size;
@@ -2464,16 +2454,16 @@ igb_set_ims(IGBCore *core, int index, uint32_t val)
 static void igb_commit_icr(IGBCore *core)
 {
    /*
-     * If GPIE.NSICR = 0, then the copy of IAM to IMS will occur only if at
+     * If GPIE.NSICR = 0, then the clear of IMS will occur only if at
     * least one bit is set in the IMS and there is a true interrupt as
     * reflected in ICR.INTA.
     */
    if ((core->mac[GPIE] & E1000_GPIE_NSICR) ||
        (core->mac[IMS] && (core->mac[ICR] & E1000_ICR_INT_ASSERTED))) {
-        igb_set_ims(core, IMS, core->mac[IAM]);
-    } else {
-        igb_update_interrupt_state(core);
+        igb_clear_ims_bits(core, core->mac[IAM]);
    }
+
+    igb_update_interrupt_state(core);
 }

 static void igb_set_icr(IGBCore *core, int index, uint32_t val)
@@ -641,6 +641,11 @@ union e1000_adv_rx_desc {

 #define E1000_STATUS_NUM_VFS_SHIFT 14

+#define E1000_ADVRXD_PKT_IP4 BIT(4)
+#define E1000_ADVRXD_PKT_IP6 BIT(6)
+#define E1000_ADVRXD_PKT_TCP BIT(8)
+#define E1000_ADVRXD_PKT_UDP BIT(9)
+
 static inline uint8_t igb_ivar_entry_rx(uint8_t i)
 {
    return i < 8 ? i * 4 : (i - 8) * 4 + 2;
@@ -118,14 +118,18 @@ static void emac_load_desc(MSF2EmacState *s, EmacDesc *d, hwaddr desc)
    d->next = le32_to_cpu(d->next);
 }

-static void emac_store_desc(MSF2EmacState *s, EmacDesc *d, hwaddr desc)
+static void emac_store_desc(MSF2EmacState *s, const EmacDesc *d, hwaddr desc)
 {
-    /* Convert from host endianness into LE. */
-    d->pktaddr = cpu_to_le32(d->pktaddr);
-    d->pktsize = cpu_to_le32(d->pktsize);
-    d->next = cpu_to_le32(d->next);
+    EmacDesc outd;
+    /*
+     * Convert from host endianness into LE. We use a local struct because
+     * calling code may still want to look at the fields afterwards.
+     */
+    outd.pktaddr = cpu_to_le32(d->pktaddr);
+    outd.pktsize = cpu_to_le32(d->pktsize);
+    outd.next = cpu_to_le32(d->next);

-    address_space_write(&s->dma_as, desc, MEMTXATTRS_UNSPECIFIED, d, sizeof *d);
+    address_space_write(&s->dma_as, desc, MEMTXATTRS_UNSPECIFIED, &outd, sizeof outd);
 }

 static void msf2_dma_tx(MSF2EmacState *s)
@@ -103,7 +103,7 @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt,
                                iov, iovcnt, ploff, pkt->tot_len);
    }

-    eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->hasip4, &pkt->hasip6,
+    eth_get_protocols(pkt->vec, pkt->vec_len, 0, &pkt->hasip4, &pkt->hasip6,
                      &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
                      &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);

@@ -186,17 +186,13 @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt)
    return pkt->tot_len;
 }

-void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,
-                              size_t len)
+void net_rx_pkt_set_protocols(struct NetRxPkt *pkt,
+                              const struct iovec *iov, size_t iovcnt,
+                              size_t iovoff)
 {
-    const struct iovec iov = {
-        .iov_base = (void *)data,
-        .iov_len = len
-    };
-
    assert(pkt);

-    eth_get_protocols(&iov, 1, &pkt->hasip4, &pkt->hasip6,
+    eth_get_protocols(iov, iovcnt, iovoff, &pkt->hasip4, &pkt->hasip6,
                      &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
                      &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
 }
@@ -55,12 +55,14 @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt);
 * parse and set packet analysis results
 *
 * @pkt:            packet
- * @data:           pointer to the data buffer to be parsed
- * @len:            data length
+ * @iov:            received data scatter-gather list
+ * @iovcnt:         number of elements in iov
+ * @iovoff:         data start offset in the iov
 *
 */
-void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,
-                              size_t len);
+void net_rx_pkt_set_protocols(struct NetRxPkt *pkt,
+                              const struct iovec *iov, size_t iovcnt,
+                              size_t iovoff);

 /**
 * fetches packet analysis results
@@ -2154,6 +2154,9 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)

                int large_send_mss = (txdw0 >> CP_TC_LGSEN_MSS_SHIFT) &
                                     CP_TC_LGSEN_MSS_MASK;
+                if (large_send_mss == 0) {
+                    goto skip_offload;
+                }

                DPRINTF("+++ C+ mode offloaded task TSO IP data %d "
                    "frame data %d specified MSS=%d\n",
@@ -805,7 +805,6 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
    }

    if (!get_vhost_net(nc->peer)) {
-        virtio_add_feature(&features, VIRTIO_F_RING_RESET);
        return features;
    }

@@ -1835,9 +1834,12 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
        VIRTIO_NET_HASH_REPORT_UDPv6,
        VIRTIO_NET_HASH_REPORT_UDPv6_EX
    };
+    struct iovec iov = {
+        .iov_base = (void *)buf,
+        .iov_len = size
+    };

-    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
-                             size - n->host_hdr_len);
+    net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
    net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
                                             n->rss_data.hash_types);
@@ -2917,7 +2919,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
        n->vqs[index].tx_vq =
            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
                             virtio_net_handle_tx_bh);
-        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
+        n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
+                                                  &DEVICE(vdev)->mem_reentrancy_guard);
    }

    n->vqs[index].tx_waiting = 0;
@@ -3627,12 +3630,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
    }

    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
-        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
+        n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
        !is_power_of_2(n->net_conf.tx_queue_size)) {
        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
                   "must be a power of 2 between %d and %d",
                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
-                   VIRTQUEUE_MAX_SIZE);
+                   virtio_net_max_tx_queue_size(n));
        virtio_cleanup(vdev);
        return;
    }
@@ -3948,6 +3951,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
    vdc->vmsd = &vmstate_virtio_net_device;
    vdc->primary_unplug_pending = primary_unplug_pending;
    vdc->get_vhost = virtio_net_get_vhost;
+    vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
 }

 static const TypeInfo virtio_net_info = {
@@ -1439,7 +1439,10 @@ static void vmxnet3_activate_device(VMXNET3State *s)
    vmxnet3_setup_rx_filtering(s);
    /* Cache fields from shared memory */
    s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
-    assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu <= VMXNET3_MAX_MTU);
+    if (s->mtu < VMXNET3_MIN_MTU || s->mtu > VMXNET3_MAX_MTU) {
+        qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad MTU size: %u\n", s->mtu);
+        return;
+    }
    VMW_CFPRN("MTU is %u", s->mtu);

    s->max_rx_frags =
@@ -2001,7 +2004,12 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
        get_eth_packet_type(PKT_GET_ETH_HDR(buf)));

    if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
-        net_rx_pkt_set_protocols(s->rx_pkt, buf, size);
+        struct iovec iov = {
+            .iov_base = (void *)buf,
+            .iov_len = size
+        };
+
+        net_rx_pkt_set_protocols(s->rx_pkt, &iov, 1, 0);
        vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size);
        net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
        bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
@@ -1504,7 +1504,7 @@ static void nvme_post_cqes(void *opaque)
        req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
        req->cqe.sq_id = cpu_to_le16(sq->sqid);
        req->cqe.sq_head = cpu_to_le16(sq->head);
-        addr = cq->dma_addr + cq->tail * n->cqe_size;
+        addr = cq->dma_addr + (cq->tail << NVME_CQES);
        ret = pci_dma_write(PCI_DEVICE(n), addr, (void *)&req->cqe,
                            sizeof(req->cqe));
        if (ret) {
@@ -4333,7 +4333,13 @@ static uint16_t nvme_io_mgmt_send_ruh_update(NvmeCtrl *n, NvmeRequest *req)
    uint32_t npid = (cdw10 >> 1) + 1;
    unsigned int i = 0;
    g_autofree uint16_t *pids = NULL;
-    uint32_t maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh;
+    uint32_t maxnpid;
+
+    if (!ns->endgrp || !ns->endgrp->fdp.enabled) {
+        return NVME_FDP_DISABLED | NVME_DNR;
+    }
+
+    maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh;

    if (unlikely(npid >= MIN(NVME_FDP_MAXPIDS, maxnpid))) {
        return NVME_INVALID_FIELD | NVME_DNR;
@@ -4607,7 +4613,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
        QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
    }

-    sq->bh = qemu_bh_new(nvme_process_sq, sq);
+    sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq,
+                                 &DEVICE(sq->ctrl)->mem_reentrancy_guard);

    if (n->dbbuf_enabled) {
        sq->db_addr = n->dbbuf_dbs + (sqid << 3);
@@ -5091,6 +5098,11 @@ static uint16_t nvme_fdp_events(NvmeCtrl *n, uint32_t endgrpid,
    }

    log_size = sizeof(NvmeFdpEventsLog) + ebuf->nelems * sizeof(NvmeFdpEvent);
+
+    if (off >= log_size) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
    trans_len = MIN(log_size - off, buf_len);
    elog = g_malloc0(log_size);
    elog->num_events = cpu_to_le32(ebuf->nelems);
@@ -5253,7 +5265,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
        }
    }
    n->cq[cqid] = cq;
-    cq->bh = qemu_bh_new(nvme_post_cqes, cq);
+    cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq,
+                                 &DEVICE(cq->ctrl)->mem_reentrancy_guard);
 }

 static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
@@ -5265,10 +5278,18 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
    uint16_t qsize = le16_to_cpu(c->qsize);
    uint16_t qflags = le16_to_cpu(c->cq_flags);
    uint64_t prp1 = le64_to_cpu(c->prp1);
+    uint32_t cc = ldq_le_p(&n->bar.cc);
+    uint8_t iocqes = NVME_CC_IOCQES(cc);
+    uint8_t iosqes = NVME_CC_IOSQES(cc);

    trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
                             NVME_CQ_FLAGS_IEN(qflags) != 0);

+    if (iosqes != NVME_SQES || iocqes != NVME_CQES) {
+        trace_pci_nvme_err_invalid_create_cq_entry_size(iosqes, iocqes);
+        return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
+    }
+
    if (unlikely(!cqid || cqid > n->conf_ioqpairs || n->cq[cqid] != NULL)) {
        trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
        return NVME_INVALID_QID | NVME_DNR;
@@ -6767,6 +6788,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
    PCIDevice *pci = PCI_DEVICE(n);
    uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
    uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
+    uint32_t v;
    int i;

    /* Address should be page aligned */
@@ -6784,6 +6806,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
        NvmeCQueue *cq = n->cq[i];

        if (sq) {
+            v = cpu_to_le32(sq->tail);
+
            /*
             * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3)
             * nvme_process_db() uses this hard-coded way to calculate
@@ -6791,7 +6815,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
             */
            sq->db_addr = dbs_addr + (i << 3);
            sq->ei_addr = eis_addr + (i << 3);
-            pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail));
+            pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail));

            if (n->params.ioeventfd && sq->sqid != 0) {
                if (!nvme_init_sq_ioeventfd(sq)) {
@@ -6801,10 +6825,12 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
        }

        if (cq) {
+            v = cpu_to_le32(cq->head);
+
            /* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */
            cq->db_addr = dbs_addr + (i << 3) + (1 << 2);
            cq->ei_addr = eis_addr + (i << 3) + (1 << 2);
-            pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head));
+            pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head));

            if (n->params.ioeventfd && cq->cqid != 0) {
                if (!nvme_init_cq_ioeventfd(cq)) {
@@ -6857,7 +6883,7 @@ static uint16_t nvme_directive_receive(NvmeCtrl *n, NvmeRequest *req)
    case NVME_DIRECTIVE_IDENTIFY:
        switch (doper) {
        case NVME_DIRECTIVE_RETURN_PARAMS:
-            if (ns->endgrp->fdp.enabled) {
+            if (ns->endgrp && ns->endgrp->fdp.enabled) {
                id.supported |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
                id.enabled |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
                id.persistent |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT;
@@ -6969,7 +6995,7 @@ static void nvme_process_sq(void *opaque)
    }

    while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
-        addr = sq->dma_addr + sq->head * n->sqe_size;
+        addr = sq->dma_addr + (sq->head << NVME_SQES);
        if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
            trace_pci_nvme_err_addr_read(addr);
            trace_pci_nvme_err_cfs();
@@ -7196,34 +7222,6 @@ static int nvme_start_ctrl(NvmeCtrl *n)
                    NVME_CAP_MPSMAX(cap));
        return -1;
    }
-    if (unlikely(NVME_CC_IOCQES(cc) <
-                 NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
-        trace_pci_nvme_err_startfail_cqent_too_small(
-                    NVME_CC_IOCQES(cc),
-                    NVME_CTRL_CQES_MIN(cap));
-        return -1;
-    }
-    if (unlikely(NVME_CC_IOCQES(cc) >
-                 NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
-        trace_pci_nvme_err_startfail_cqent_too_large(
-                    NVME_CC_IOCQES(cc),
-                    NVME_CTRL_CQES_MAX(cap));
-        return -1;
-    }
-    if (unlikely(NVME_CC_IOSQES(cc) <
-                 NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
-        trace_pci_nvme_err_startfail_sqent_too_small(
-                    NVME_CC_IOSQES(cc),
-                    NVME_CTRL_SQES_MIN(cap));
-        return -1;
-    }
-    if (unlikely(NVME_CC_IOSQES(cc) >
-                 NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
-        trace_pci_nvme_err_startfail_sqent_too_large(
-                    NVME_CC_IOSQES(cc),
-                    NVME_CTRL_SQES_MAX(cap));
-        return -1;
-    }
    if (unlikely(!NVME_AQA_ASQS(aqa))) {
        trace_pci_nvme_err_startfail_asqent_sz_zero();
        return -1;
@@ -7236,8 +7234,6 @@ static int nvme_start_ctrl(NvmeCtrl *n)
    n->page_bits = page_bits;
    n->page_size = page_size;
    n->max_prp_ents = n->page_size / sizeof(uint64_t);
-    n->cqe_size = 1 << NVME_CC_IOCQES(cc);
-    n->sqe_size = 1 << NVME_CC_IOSQES(cc);
    nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1);
    nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1);

@@ -7555,7 +7551,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
 static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
 {
    PCIDevice *pci = PCI_DEVICE(n);
-    uint32_t qid;
+    uint32_t qid, v;

    if (unlikely(addr & ((1 << 2) - 1))) {
        NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned,
@@ -7622,7 +7618,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
        start_sqs = nvme_cq_full(cq) ? 1 : 0;
        cq->head = new_head;
        if (!qid && n->dbbuf_enabled) {
-            pci_dma_write(pci, cq->db_addr, &cq->head, sizeof(cq->head));
+            v = cpu_to_le32(cq->head);
+            pci_dma_write(pci, cq->db_addr, &v, sizeof(cq->head));
        }
        if (start_sqs) {
            NvmeSQueue *sq;
@@ -7682,6 +7679,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)

        sq->tail = new_tail;
        if (!qid && n->dbbuf_enabled) {
+            v = cpu_to_le32(sq->tail);
+
            /*
             * The spec states "the host shall also update the controller's
             * corresponding doorbell property to match the value of that entry
@@ -7695,7 +7694,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
             * including ones that run on Linux, are not updating Admin Queues,
             * so we can't trust reading it for an appropriate sq tail.
             */
-            pci_dma_write(pci, sq->db_addr, &sq->tail, sizeof(sq->tail));
+            pci_dma_write(pci, sq->db_addr, &v, sizeof(sq->tail));
        }

        qemu_bh_schedule(sq->bh);
@@ -8206,8 +8205,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
    id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
    id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL);

-    id->sqes = (0x6 << 4) | 0x6;
-    id->cqes = (0x4 << 4) | 0x4;
+    id->sqes = (NVME_SQES << 4) | NVME_SQES;
+    id->cqes = (NVME_CQES << 4) | NVME_CQES;
    id->nn = cpu_to_le32(NVME_MAX_NAMESPACES);
    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
                           NVME_ONCS_FEATURES | NVME_ONCS_DSM |
@@ -115,7 +115,7 @@ static void nvme_dif_pract_generate_dif_crc64(NvmeNamespace *ns, uint8_t *buf,
        uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz);

        if (pil) {
-            crc = crc64_nvme(crc, mbuf, pil);
+            crc = crc64_nvme(~crc, mbuf, pil);
        }

        dif->g64.guard = cpu_to_be64(crc);
@@ -246,7 +246,7 @@ static uint16_t nvme_dif_prchk_crc64(NvmeNamespace *ns, NvmeDifTuple *dif,
        uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz);

        if (pil) {
-            crc = crc64_nvme(crc, mbuf, pil);
+            crc = crc64_nvme(~crc, mbuf, pil);
        }

        trace_pci_nvme_dif_prchk_guard_crc64(be64_to_cpu(dif->g64.guard), crc);
@@ -30,6 +30,13 @@
 #define NVME_FDP_MAX_EVENTS 63
 #define NVME_FDP_MAXPIDS 128

+/*
+ * The controller only supports Submission and Completion Queue Entry Sizes of
+ * 64 and 16 bytes respectively.
+ */
+#define NVME_SQES 6
+#define NVME_CQES 4
+
 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);

 typedef struct NvmeCtrl NvmeCtrl;
@@ -530,8 +537,6 @@ typedef struct NvmeCtrl {
    uint32_t    page_size;
    uint16_t    page_bits;
    uint16_t    max_prp_ents;
-    uint16_t    cqe_size;
-    uint16_t    sqe_size;
    uint32_t    max_q_ents;
    uint8_t     outstanding_aers;
    uint32_t    irq_status;
@@ -168,6 +168,7 @@ pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion q
 pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
 pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
 pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
+pci_nvme_err_invalid_create_cq_entry_size(uint8_t iosqes, uint8_t iocqes) "iosqes %"PRIu8" iocqes %"PRIu8""
 pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
 pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
 pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
@@ -311,7 +311,7 @@ static void pxb_cxl_dev_reset(DeviceState *dev)
     * The CXL specification allows for host bridges with no HDM decoders
     * if they only have a single root port.
     */
-    if (!PXB_DEV(dev)->hdm_for_passthrough) {
+    if (!PXB_CXL_DEV(dev)->hdm_for_passthrough) {
        dsp_count = pcie_count_ds_ports(hb->bus);
    }
    /* Initial reset will have 0 dsp so wait until > 0 */
@@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj)
    memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000);
    address_space_init(&s->pci_io_as, &s->pci_io, "raven-io");

+    /*
+     * Raven's raven_io_ops use the address-space API to access pci-conf-idx
+     * (which is also owned by the raven device). As such, mark the
+     * pci_io_non_contiguous as re-entrancy safe.
+     */
+    s->pci_io_non_contiguous.disable_reentrancy_guard = true;
+
    /* CPU address space */
    memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR,
                                &s->pci_io);
@@ -79,6 +79,8 @@ static Property pci_props[] = {
    DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
                       failover_pair_id),
    DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
+    DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
+                    QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
    DEFINE_PROP_END_OF_LIST()
 };

@@ -62,6 +62,17 @@ static void pci_adjust_config_limit(PCIBus *bus, uint32_t *limit)
    }
 }

+static bool is_pci_dev_ejected(PCIDevice *pci_dev)
+{
+    /*
+     * device unplug was requested and the guest acked it,
+     * so we stop responding config accesses even if the
+     * device is not deleted (failover flow)
+     */
+    return pci_dev && pci_dev->partially_hotplugged &&
+           !pci_dev->qdev.pending_deleted_event;
+}
+
 void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
                                  uint32_t limit, uint32_t val, uint32_t len)
 {
@@ -75,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
     * allowing direct removal of unexposed functions.
     */
    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
-        !pci_dev->has_power) {
+        !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
        return;
    }

@@ -100,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
     * allowing direct removal of unexposed functions.
     */
    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
-        !pci_dev->has_power) {
+        !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
        return ~0x0;
    }

@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,

    pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
                 PCI_ERR_UNC_SUPPORTED);
-    pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
-                 PCI_ERR_UNC_MASK_DEFAULT);
-    pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
-                 PCI_ERR_UNC_SUPPORTED);
+
+    if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) {
+        pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
+                     PCI_ERR_UNC_MASK_DEFAULT);
+        pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
+                     PCI_ERR_UNC_SUPPORTED);
+    }

    pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
                 PCI_ERR_UNC_SEVERITY_DEFAULT);
@@ -712,7 +712,7 @@ static int ppce500_prep_device_tree(PPCE500MachineState *machine,
    p->kernel_base = kernel_base;
    p->kernel_size = kernel_size;

-    qemu_register_reset(ppce500_reset_device_tree, p);
+    qemu_register_reset_nosnapshotload(ppce500_reset_device_tree, p);
    p->notifier.notify = ppce500_init_notify;
    qemu_add_machine_init_done_notifier(&p->notifier);

--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .0.0
 .0.5