uipc_shm: Implements fspacectl(2) support

This implements fspacectl(2) support on shared memory objects. The
semantic of SPACECTL_DEALLOC is equivalent to clearing the backing
store and free the pages within the affected range. If the call
succeeds, subsequent reads on the affected range return all zero.

tests/sys/posixshm/posixshm_tests.c is expanded to include a
fspacectl(2) functional test.

Sponsored by:	The FreeBSD Foundation
Reviewed by:	kevans, kib
Differential Revision:	https://reviews.freebsd.org/D31490
This commit is contained in:
Ka Ho Ng 2021-08-12 23:01:02 +08:00
parent a638dc4ebc
commit 454bc887f2
2 changed files with 354 additions and 42 deletions

View File

@ -131,6 +131,8 @@ static int shm_dotruncate_locked(struct shmfd *shmfd, off_t length,
void *rl_cookie);
static int shm_copyin_path(struct thread *td, const char *userpath_in,
char **path_out);
static int shm_deallocate(struct shmfd *shmfd, off_t *offset,
off_t *length, int flags);
static fo_rdwr_t shm_read;
static fo_rdwr_t shm_write;
@ -146,6 +148,7 @@ static fo_mmap_t shm_mmap;
static fo_get_seals_t shm_get_seals;
static fo_add_seals_t shm_add_seals;
static fo_fallocate_t shm_fallocate;
static fo_fspacectl_t shm_fspacectl;
/* File descriptor operations. */
struct fileops shm_ops = {
@ -166,6 +169,7 @@ struct fileops shm_ops = {
.fo_get_seals = shm_get_seals,
.fo_add_seals = shm_add_seals,
.fo_fallocate = shm_fallocate,
.fo_fspacectl = shm_fspacectl,
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE,
};
@ -626,14 +630,64 @@ shm_copyin_path(struct thread *td, const char *userpath_in, char **path_out) {
return (error);
}
static int
shm_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base,
int end)
{
vm_page_t m;
int rv;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(base >= 0, ("%s: base %d", __func__, base));
KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base,
end));
retry:
m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
if (m != NULL) {
MPASS(vm_page_all_valid(m));
} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
m = vm_page_alloc(object, idx,
VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
if (m == NULL)
goto retry;
vm_object_pip_add(object, 1);
VM_OBJECT_WUNLOCK(object);
rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
VM_OBJECT_WLOCK(object);
vm_object_pip_wakeup(object);
if (rv == VM_PAGER_OK) {
/*
* Since the page was not resident, and therefore not
* recently accessed, immediately enqueue it for
* asynchronous laundering. The current operation is
* not regarded as an access.
*/
vm_page_launder(m);
} else {
vm_page_free(m);
VM_OBJECT_WUNLOCK(object);
return (EIO);
}
}
if (m != NULL) {
pmap_zero_page_area(m, base, end - base);
KASSERT(vm_page_all_valid(m), ("%s: page %p is invalid",
__func__, m));
vm_page_set_dirty(m);
vm_page_xunbusy(m);
}
return (0);
}
static int
shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
{
vm_object_t object;
vm_page_t m;
vm_pindex_t idx, nobjsize;
vm_pindex_t nobjsize;
vm_ooffset_t delta;
int base, rv;
int base, error;
KASSERT(length >= 0, ("shm_dotruncate: length < 0"));
object = shmfd->shm_object;
@ -660,45 +714,10 @@ shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
*/
base = length & PAGE_MASK;
if (base != 0) {
idx = OFF_TO_IDX(length);
retry:
m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
if (m != NULL) {
MPASS(vm_page_all_valid(m));
} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
m = vm_page_alloc(object, idx,
VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
if (m == NULL)
goto retry;
vm_object_pip_add(object, 1);
VM_OBJECT_WUNLOCK(object);
rv = vm_pager_get_pages(object, &m, 1, NULL,
NULL);
VM_OBJECT_WLOCK(object);
vm_object_pip_wakeup(object);
if (rv == VM_PAGER_OK) {
/*
* Since the page was not resident,
* and therefore not recently
* accessed, immediately enqueue it
* for asynchronous laundering. The
* current operation is not regarded
* as an access.
*/
vm_page_launder(m);
} else {
vm_page_free(m);
VM_OBJECT_WUNLOCK(object);
return (EIO);
}
}
if (m != NULL) {
pmap_zero_page_area(m, base, PAGE_SIZE - base);
KASSERT(vm_page_all_valid(m),
("shm_dotruncate: page %p is invalid", m));
vm_page_set_dirty(m);
vm_page_xunbusy(m);
}
error = shm_partial_page_invalidate(object,
OFF_TO_IDX(length), base, PAGE_SIZE);
if (error)
return (error);
}
delta = IDX_TO_OFF(object->size - nobjsize);
@ -1874,6 +1893,100 @@ shm_get_seals(struct file *fp, int *seals)
return (0);
}
static int
shm_deallocate(struct shmfd *shmfd, off_t *offset, off_t *length, int flags)
{
vm_object_t object;
vm_pindex_t pistart, pi, piend;
vm_ooffset_t off, len;
int startofs, endofs, end;
int error;
off = *offset;
len = *length;
KASSERT(off + len <= (vm_ooffset_t)OFF_MAX, ("off + len overflows"));
object = shmfd->shm_object;
startofs = off & PAGE_MASK;
endofs = (off + len) & PAGE_MASK;
pistart = OFF_TO_IDX(off);
piend = OFF_TO_IDX(off + len);
pi = OFF_TO_IDX(off + PAGE_MASK);
error = 0;
VM_OBJECT_WLOCK(object);
if (startofs != 0) {
end = pistart != piend ? PAGE_SIZE : endofs;
error = shm_partial_page_invalidate(object, pistart, startofs,
end);
if (error)
goto out;
off += end - startofs;
len -= end - startofs;
}
if (pi < piend) {
vm_object_page_remove(object, pi, piend, 0);
off += IDX_TO_OFF(piend - pi);
len -= IDX_TO_OFF(piend - pi);
}
if (endofs != 0 && pistart != piend) {
error = shm_partial_page_invalidate(object, piend, 0, endofs);
if (error)
goto out;
off += endofs;
len -= endofs;
}
out:
VM_OBJECT_WUNLOCK(shmfd->shm_object);
*offset = off;
*length = len;
return (error);
}
static int
shm_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags,
struct ucred *active_cred, struct thread *td)
{
void *rl_cookie;
struct shmfd *shmfd;
off_t off, len;
int error;
/* This assumes that the caller already checked for overflow. */
error = EINVAL;
shmfd = fp->f_data;
off = *offset;
len = *length;
if (cmd != SPACECTL_DEALLOC || off < 0 || len <= 0 ||
len > OFF_MAX - off || flags != 0)
return (EINVAL);
rl_cookie = rangelock_wlock(&shmfd->shm_rl, off, off + len,
&shmfd->shm_mtx);
switch (cmd) {
case SPACECTL_DEALLOC:
if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) {
error = EPERM;
break;
}
error = shm_deallocate(shmfd, &off, &len, flags);
if (error != 0)
break;
*offset = off;
*length = len;
break;
default:
__assert_unreachable();
}
rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
return (error);
}
static int
shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td)
{

View File

@ -2,6 +2,11 @@
* Copyright (c) 2006 Robert N. M. Watson
* All rights reserved.
*
* Copyright (c) 2021 The FreeBSD Foundation
*
* Portions of this software were developed by Ka Ho Ng
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -173,6 +178,126 @@ verify_object(const char *path, char expected_value)
close(fd);
}
static off_t shm_max_pages = 32;
static const char byte_to_fill = 0x5f;
static int
shm_fill(int fd, off_t offset, off_t len)
{
int error;
size_t blen;
char *buf;
error = 0;
buf = malloc(PAGE_SIZE);
if (buf == NULL)
return (1);
while (len > 0) {
blen = len < (off_t)PAGE_SIZE ? len : PAGE_SIZE;
memset(buf, byte_to_fill, blen);
if (pwrite(fd, buf, blen, offset) != (ssize_t)blen) {
error = 1;
break;
}
len -= blen;
offset += blen;
}
free(buf);
return (error);
}
static int
check_content_dealloc(int fd, off_t hole_start, off_t hole_len, off_t shm_sz)
{
int error;
size_t blen;
off_t offset, resid;
struct stat statbuf;
char *buf, *sblk;
error = 0;
buf = malloc(PAGE_SIZE * 2);
if (buf == NULL)
return (1);
sblk = buf + PAGE_SIZE;
memset(sblk, 0, PAGE_SIZE);
if ((uint64_t)hole_start + hole_len > (uint64_t)shm_sz)
hole_len = shm_sz - hole_start;
/*
* Check hole is zeroed.
*/
offset = hole_start;
resid = hole_len;
while (resid > 0) {
blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
error = 1;
break;
}
if (memcmp(buf, sblk, blen) != 0) {
error = 1;
break;
}
resid -= blen;
offset += blen;
}
memset(sblk, byte_to_fill, PAGE_SIZE);
/*
* Check file region before hole is zeroed.
*/
offset = 0;
resid = hole_start;
while (resid > 0) {
blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
error = 1;
break;
}
if (memcmp(buf, sblk, blen) != 0) {
error = 1;
break;
}
resid -= blen;
offset += blen;
}
/*
* Check file region after hole is zeroed.
*/
offset = hole_start + hole_len;
resid = shm_sz - offset;
while (resid > 0) {
blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
error = 1;
break;
}
if (memcmp(buf, sblk, blen) != 0) {
error = 1;
break;
}
resid -= blen;
offset += blen;
}
/*
* Check file size matches with expected file size.
*/
if (fstat(fd, &statbuf) == -1)
error = -1;
if (statbuf.st_size != shm_sz)
error = -1;
free(buf);
return (error);
}
ATF_TC_WITHOUT_HEAD(remap_object);
ATF_TC_BODY(remap_object, tc)
{
@ -958,6 +1083,79 @@ ATF_TC_BODY(fallocate, tc)
close(fd);
}
ATF_TC_WITHOUT_HEAD(fspacectl);
ATF_TC_BODY(fspacectl, tc)
{
struct spacectl_range range;
off_t offset, length, shm_sz;
int fd, error;
shm_sz = shm_max_pages << PAGE_SHIFT;
fd = shm_open("/testtest", O_RDWR|O_CREAT, 0666);
ATF_REQUIRE_MSG(fd >= 0, "shm_open failed; errno:%d", errno);
ATF_REQUIRE_MSG((error = posix_fallocate(fd, 0, shm_sz)) == 0,
"posix_fallocate failed; error=%d", error);
/* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) */
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
range.r_offset = offset = PAGE_SIZE;
range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) -
range.r_offset;
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
"Aligned fspacectl failed; errno=%d", errno);
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
"Aligned fspacectl content checking failed", errno);
/* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) */
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) +
(1 << (PAGE_SHIFT - 1)) - offset;
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
"Unaligned fspacectl failed; errno=%d", errno);
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
"Unaligned fspacectl content checking failed", errno);
/* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
range.r_offset = offset = PAGE_SHIFT;
range.r_len = length = OFF_MAX - offset;
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
"Aligned fspacectl to OFF_MAX failed; errno=%d", errno);
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
"Aligned fspacectl to OFF_MAX content checking failed", errno);
/* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
range.r_len = length = OFF_MAX - offset;
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
"Unaligned fspacectl to OFF_MAX failed; errno=%d", errno);
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
"Unaligned fspacectl to OFF_MAX content checking failed", errno);
/* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
range.r_offset = offset = PAGE_SIZE;
range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset;
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
"Aligned fspacectl past shm_sz failed; errno=%d", errno);
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
"Aligned fspacectl past shm_sz content checking failed", errno);
/* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset;
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
"Unaligned fspacectl past shm_sz failed; errno=%d", errno);
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
"Unaligned fspacectl past shm_sz content checking failed", errno);
ATF_REQUIRE(close(fd) == 0);
}
static int
shm_open_large(int psind, int policy, size_t sz)
{
@ -1704,6 +1902,7 @@ ATF_TP_ADD_TCS(tp)
ATF_TP_ADD_TC(tp, cloexec);
ATF_TP_ADD_TC(tp, mode);
ATF_TP_ADD_TC(tp, fallocate);
ATF_TP_ADD_TC(tp, fspacectl);
ATF_TP_ADD_TC(tp, largepage_basic);
ATF_TP_ADD_TC(tp, largepage_config);
ATF_TP_ADD_TC(tp, largepage_mmap);