Raw receive fix and encrypted objset security fix

This patch fixes two problems with the encryption code. First, the
current code does not correctly prohibit the DMU from updating
dn_maxblkid during object truncation within a raw receive. This
usually only causes issues when the truncating DRR_FREE record is
aggregated with DRR_FREE records later in the receive, so it is
relatively hard to hit.

Second, this patch fixes a security issue where reading blocks
within an encrypted object did not guarantee that the dnode block
itself had ever been verified against its MAC. Usually the
verification happened anyway when the bonus buffer was read, but
some use cases (notably zvols) might never perform the check.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7632
This commit is contained in:
Tom Caputi 2018-06-28 12:20:34 -04:00 committed by Brian Behlendorf
parent 3be1eb29da
commit 69830602de
3 changed files with 116 additions and 38 deletions

View File

@ -2137,13 +2137,17 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
}
/*
* Adjust encrypted and authenticated headers to accomodate the
* request if needed.
* Adjust encrypted and authenticated headers to accomodate
* the request if needed. Dnode blocks (ARC_FILL_IN_PLACE) are
* allowed to fail decryption due to keys not being loaded
* without being marked as an IO error.
*/
if (HDR_PROTECTED(hdr)) {
error = arc_fill_hdr_crypt(hdr, hash_lock, spa,
zb, !!(flags & ARC_FILL_NOAUTH));
if (error != 0) {
if (error == EACCES && (flags & ARC_FILL_IN_PLACE) != 0) {
return (error);
} else if (error != 0) {
if (hash_lock != NULL)
mutex_enter(hash_lock);
arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR);

View File

@ -1119,6 +1119,64 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
dbuf_rele_and_unlock(db, NULL, B_FALSE);
}
/*
* This function ensures that, when doing a decrypting read of a block,
* we make sure we have decrypted the dnode associated with it. We must do
* this so that we ensure we are fully authenticating the checksum-of-MACs
* tree from the root of the objset down to this block. Indirect blocks are
* always verified against their secure checksum-of-MACs assuming that the
* dnode containing them is correct. Now that we are doing a decrypting read,
* we can be sure that the key is loaded and verify that assumption. This is
* especially important considering that we always read encrypted dnode
* blocks as raw data (without verifying their MACs) to start, and
* decrypt / authenticate them when we need to read an encrypted bonus buffer.
*/
static int
dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
{
int err = 0;
objset_t *os = db->db_objset;
arc_buf_t *dnode_abuf;
dnode_t *dn;
zbookmark_phys_t zb;
ASSERT(MUTEX_HELD(&db->db_mtx));
if (!os->os_encrypted || os->os_raw_receive ||
(flags & DB_RF_NO_DECRYPT) != 0)
return (0);
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
dnode_abuf = (dn->dn_dbuf != NULL) ? dn->dn_dbuf->db_buf : NULL;
if (dnode_abuf == NULL || !arc_is_encrypted(dnode_abuf)) {
DB_DNODE_EXIT(db);
return (0);
}
SET_BOOKMARK(&zb, dmu_objset_id(os),
DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid);
err = arc_untransform(dnode_abuf, os->os_spa, &zb, B_TRUE);
/*
* An error code of EACCES tells us that the key is still not
* available. This is ok if we are only reading authenticated
* (and therefore non-encrypted) blocks.
*/
if (err == EACCES && ((db->db_blkid != DMU_BONUS_BLKID &&
!DMU_OT_IS_ENCRYPTED(dn->dn_type)) ||
(db->db_blkid == DMU_BONUS_BLKID &&
!DMU_OT_IS_ENCRYPTED(dn->dn_bonustype))))
err = 0;
DB_DNODE_EXIT(db);
return (err);
}
static int
dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
{
@ -1143,23 +1201,13 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
*/
int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
arc_buf_t *dn_buf = (dn->dn_dbuf != NULL) ?
dn->dn_dbuf->db_buf : NULL;
/* if the underlying dnode block is encrypted, decrypt it */
if (dn_buf != NULL && dn->dn_objset->os_encrypted &&
DMU_OT_IS_ENCRYPTED(dn->dn_bonustype) &&
(flags & DB_RF_NO_DECRYPT) == 0 &&
arc_is_encrypted(dn_buf)) {
SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid);
err = arc_untransform(dn_buf, dn->dn_objset->os_spa,
&zb, B_TRUE);
if (err != 0) {
DB_DNODE_EXIT(db);
mutex_exit(&db->db_mtx);
return (err);
}
err = dbuf_read_verify_dnode_crypt(db, flags);
if (err != 0) {
DB_DNODE_EXIT(db);
mutex_exit(&db->db_mtx);
return (err);
}
ASSERT3U(bonuslen, <=, db->db.db_size);
@ -1215,17 +1263,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
return (0);
}
DB_DNODE_EXIT(db);
db->db_state = DB_READ;
mutex_exit(&db->db_mtx);
if (DBUF_IS_L2CACHEABLE(db))
aflags |= ARC_FLAG_L2CACHE;
SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
db->db.db_object, db->db_level, db->db_blkid);
/*
* All bps of an encrypted os should have the encryption bit set.
* If this is not true it indicates tampering and we report an error.
@ -1234,11 +1271,31 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
spa_log_error(db->db_objset->os_spa, &zb);
zfs_panic_recover("unencrypted block in encrypted "
"object set %llu", dmu_objset_id(db->db_objset));
DB_DNODE_EXIT(db);
mutex_exit(&db->db_mtx);
return (SET_ERROR(EIO));
}
err = dbuf_read_verify_dnode_crypt(db, flags);
if (err != 0) {
DB_DNODE_EXIT(db);
mutex_exit(&db->db_mtx);
return (err);
}
DB_DNODE_EXIT(db);
db->db_state = DB_READ;
mutex_exit(&db->db_mtx);
if (DBUF_IS_L2CACHEABLE(db))
aflags |= ARC_FLAG_L2CACHE;
dbuf_add_ref(db, NULL);
SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
db->db.db_object, db->db_level, db->db_blkid);
zio_flags = (flags & DB_RF_CANFAIL) ?
ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
@ -1358,14 +1415,22 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
spa_t *spa = dn->dn_objset->os_spa;
/*
* If the arc buf is compressed or encrypted, we need to
* untransform it to read the data. This could happen during
* the "zfs receive" of a stream which is deduplicated and
* either raw or compressed. We do not need to do this if the
* caller wants raw encrypted data.
* Ensure that this block's dnode has been decrypted if
* the caller has requested decrypted data.
*/
if (db->db_buf != NULL && (flags & DB_RF_NO_DECRYPT) == 0 &&
err = dbuf_read_verify_dnode_crypt(db, flags);
/*
* If the arc buf is compressed or encrypted and the caller
* requested uncompressed data, we need to untransform it
* before returning. We also call arc_untransform() on any
* unauthenticated blocks, which will verify their MAC if
* the key is now available.
*/
if (err == 0 && db->db_buf != NULL &&
(flags & DB_RF_NO_DECRYPT) == 0 &&
(arc_is_encrypted(db->db_buf) ||
arc_is_unauthenticated(db->db_buf) ||
arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
zbookmark_phys_t zb;
@ -1376,7 +1441,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
dbuf_set_data(db, db->db_buf);
}
mutex_exit(&db->db_mtx);
if (prefetch)
if (err == 0 && prefetch)
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
@ -1943,6 +2008,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
ddt_prefetch(os->os_spa, db->db_blkptr);
if (db->db_level == 0) {
ASSERT(!db->db_objset->os_raw_receive ||
dn->dn_maxblkid >= db->db_blkid);
dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock);
ASSERT(dn->dn_maxblkid >= db->db_blkid);
}
@ -3806,8 +3873,10 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
if (db->db_level == 0) {
mutex_enter(&dn->dn_mtx);
if (db->db_blkid > dn->dn_phys->dn_maxblkid &&
db->db_blkid != DMU_SPILL_BLKID)
db->db_blkid != DMU_SPILL_BLKID) {
ASSERT0(db->db_objset->os_raw_receive);
dn->dn_phys->dn_maxblkid = db->db_blkid;
}
mutex_exit(&dn->dn_mtx);
if (dn->dn_type == DMU_OT_DNODE) {

View File

@ -367,7 +367,12 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
}
}
if (trunc) {
/*
* Do not truncate the maxblkid if we are performing a raw
* receive. The raw receive sets the mablkid manually and
* must not be overriden.
*/
if (trunc && !dn->dn_objset->os_raw_receive) {
ASSERTV(uint64_t off);
dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;