uipc_shm: Implements fspacectl(2) support
This implements fspacectl(2) support on shared memory objects. The semantic of SPACECTL_DEALLOC is equivalent to clearing the backing store and free the pages within the affected range. If the call succeeds, subsequent reads on the affected range return all zero. tests/sys/posixshm/posixshm_tests.c is expanded to include a fspacectl(2) functional test. Sponsored by: The FreeBSD Foundation Reviewed by: kevans, kib Differential Revision: https://reviews.freebsd.org/D31490
This commit is contained in:
parent
a638dc4ebc
commit
454bc887f2
@ -131,6 +131,8 @@ static int shm_dotruncate_locked(struct shmfd *shmfd, off_t length,
|
||||
void *rl_cookie);
|
||||
static int shm_copyin_path(struct thread *td, const char *userpath_in,
|
||||
char **path_out);
|
||||
static int shm_deallocate(struct shmfd *shmfd, off_t *offset,
|
||||
off_t *length, int flags);
|
||||
|
||||
static fo_rdwr_t shm_read;
|
||||
static fo_rdwr_t shm_write;
|
||||
@ -146,6 +148,7 @@ static fo_mmap_t shm_mmap;
|
||||
static fo_get_seals_t shm_get_seals;
|
||||
static fo_add_seals_t shm_add_seals;
|
||||
static fo_fallocate_t shm_fallocate;
|
||||
static fo_fspacectl_t shm_fspacectl;
|
||||
|
||||
/* File descriptor operations. */
|
||||
struct fileops shm_ops = {
|
||||
@ -166,6 +169,7 @@ struct fileops shm_ops = {
|
||||
.fo_get_seals = shm_get_seals,
|
||||
.fo_add_seals = shm_add_seals,
|
||||
.fo_fallocate = shm_fallocate,
|
||||
.fo_fspacectl = shm_fspacectl,
|
||||
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE,
|
||||
};
|
||||
|
||||
@ -626,14 +630,64 @@ shm_copyin_path(struct thread *td, const char *userpath_in, char **path_out) {
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
shm_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base,
|
||||
int end)
|
||||
{
|
||||
vm_page_t m;
|
||||
int rv;
|
||||
|
||||
VM_OBJECT_ASSERT_WLOCKED(object);
|
||||
KASSERT(base >= 0, ("%s: base %d", __func__, base));
|
||||
KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base,
|
||||
end));
|
||||
|
||||
retry:
|
||||
m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
|
||||
if (m != NULL) {
|
||||
MPASS(vm_page_all_valid(m));
|
||||
} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
|
||||
m = vm_page_alloc(object, idx,
|
||||
VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
|
||||
if (m == NULL)
|
||||
goto retry;
|
||||
vm_object_pip_add(object, 1);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
|
||||
VM_OBJECT_WLOCK(object);
|
||||
vm_object_pip_wakeup(object);
|
||||
if (rv == VM_PAGER_OK) {
|
||||
/*
|
||||
* Since the page was not resident, and therefore not
|
||||
* recently accessed, immediately enqueue it for
|
||||
* asynchronous laundering. The current operation is
|
||||
* not regarded as an access.
|
||||
*/
|
||||
vm_page_launder(m);
|
||||
} else {
|
||||
vm_page_free(m);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
return (EIO);
|
||||
}
|
||||
}
|
||||
if (m != NULL) {
|
||||
pmap_zero_page_area(m, base, end - base);
|
||||
KASSERT(vm_page_all_valid(m), ("%s: page %p is invalid",
|
||||
__func__, m));
|
||||
vm_page_set_dirty(m);
|
||||
vm_page_xunbusy(m);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
|
||||
{
|
||||
vm_object_t object;
|
||||
vm_page_t m;
|
||||
vm_pindex_t idx, nobjsize;
|
||||
vm_pindex_t nobjsize;
|
||||
vm_ooffset_t delta;
|
||||
int base, rv;
|
||||
int base, error;
|
||||
|
||||
KASSERT(length >= 0, ("shm_dotruncate: length < 0"));
|
||||
object = shmfd->shm_object;
|
||||
@ -660,45 +714,10 @@ shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
|
||||
*/
|
||||
base = length & PAGE_MASK;
|
||||
if (base != 0) {
|
||||
idx = OFF_TO_IDX(length);
|
||||
retry:
|
||||
m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
|
||||
if (m != NULL) {
|
||||
MPASS(vm_page_all_valid(m));
|
||||
} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
|
||||
m = vm_page_alloc(object, idx,
|
||||
VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
|
||||
if (m == NULL)
|
||||
goto retry;
|
||||
vm_object_pip_add(object, 1);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
rv = vm_pager_get_pages(object, &m, 1, NULL,
|
||||
NULL);
|
||||
VM_OBJECT_WLOCK(object);
|
||||
vm_object_pip_wakeup(object);
|
||||
if (rv == VM_PAGER_OK) {
|
||||
/*
|
||||
* Since the page was not resident,
|
||||
* and therefore not recently
|
||||
* accessed, immediately enqueue it
|
||||
* for asynchronous laundering. The
|
||||
* current operation is not regarded
|
||||
* as an access.
|
||||
*/
|
||||
vm_page_launder(m);
|
||||
} else {
|
||||
vm_page_free(m);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
return (EIO);
|
||||
}
|
||||
}
|
||||
if (m != NULL) {
|
||||
pmap_zero_page_area(m, base, PAGE_SIZE - base);
|
||||
KASSERT(vm_page_all_valid(m),
|
||||
("shm_dotruncate: page %p is invalid", m));
|
||||
vm_page_set_dirty(m);
|
||||
vm_page_xunbusy(m);
|
||||
}
|
||||
error = shm_partial_page_invalidate(object,
|
||||
OFF_TO_IDX(length), base, PAGE_SIZE);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
delta = IDX_TO_OFF(object->size - nobjsize);
|
||||
|
||||
@ -1874,6 +1893,100 @@ shm_get_seals(struct file *fp, int *seals)
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
shm_deallocate(struct shmfd *shmfd, off_t *offset, off_t *length, int flags)
|
||||
{
|
||||
vm_object_t object;
|
||||
vm_pindex_t pistart, pi, piend;
|
||||
vm_ooffset_t off, len;
|
||||
int startofs, endofs, end;
|
||||
int error;
|
||||
|
||||
off = *offset;
|
||||
len = *length;
|
||||
KASSERT(off + len <= (vm_ooffset_t)OFF_MAX, ("off + len overflows"));
|
||||
object = shmfd->shm_object;
|
||||
startofs = off & PAGE_MASK;
|
||||
endofs = (off + len) & PAGE_MASK;
|
||||
pistart = OFF_TO_IDX(off);
|
||||
piend = OFF_TO_IDX(off + len);
|
||||
pi = OFF_TO_IDX(off + PAGE_MASK);
|
||||
error = 0;
|
||||
|
||||
VM_OBJECT_WLOCK(object);
|
||||
|
||||
if (startofs != 0) {
|
||||
end = pistart != piend ? PAGE_SIZE : endofs;
|
||||
error = shm_partial_page_invalidate(object, pistart, startofs,
|
||||
end);
|
||||
if (error)
|
||||
goto out;
|
||||
off += end - startofs;
|
||||
len -= end - startofs;
|
||||
}
|
||||
|
||||
if (pi < piend) {
|
||||
vm_object_page_remove(object, pi, piend, 0);
|
||||
off += IDX_TO_OFF(piend - pi);
|
||||
len -= IDX_TO_OFF(piend - pi);
|
||||
}
|
||||
|
||||
if (endofs != 0 && pistart != piend) {
|
||||
error = shm_partial_page_invalidate(object, piend, 0, endofs);
|
||||
if (error)
|
||||
goto out;
|
||||
off += endofs;
|
||||
len -= endofs;
|
||||
}
|
||||
|
||||
out:
|
||||
VM_OBJECT_WUNLOCK(shmfd->shm_object);
|
||||
*offset = off;
|
||||
*length = len;
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
shm_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags,
|
||||
struct ucred *active_cred, struct thread *td)
|
||||
{
|
||||
void *rl_cookie;
|
||||
struct shmfd *shmfd;
|
||||
off_t off, len;
|
||||
int error;
|
||||
|
||||
/* This assumes that the caller already checked for overflow. */
|
||||
error = EINVAL;
|
||||
shmfd = fp->f_data;
|
||||
off = *offset;
|
||||
len = *length;
|
||||
|
||||
if (cmd != SPACECTL_DEALLOC || off < 0 || len <= 0 ||
|
||||
len > OFF_MAX - off || flags != 0)
|
||||
return (EINVAL);
|
||||
|
||||
rl_cookie = rangelock_wlock(&shmfd->shm_rl, off, off + len,
|
||||
&shmfd->shm_mtx);
|
||||
switch (cmd) {
|
||||
case SPACECTL_DEALLOC:
|
||||
if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) {
|
||||
error = EPERM;
|
||||
break;
|
||||
}
|
||||
error = shm_deallocate(shmfd, &off, &len, flags);
|
||||
if (error != 0)
|
||||
break;
|
||||
*offset = off;
|
||||
*length = len;
|
||||
break;
|
||||
default:
|
||||
__assert_unreachable();
|
||||
}
|
||||
rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td)
|
||||
{
|
||||
|
@ -2,6 +2,11 @@
|
||||
* Copyright (c) 2006 Robert N. M. Watson
|
||||
* All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2021 The FreeBSD Foundation
|
||||
*
|
||||
* Portions of this software were developed by Ka Ho Ng
|
||||
* under sponsorship from the FreeBSD Foundation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
@ -173,6 +178,126 @@ verify_object(const char *path, char expected_value)
|
||||
close(fd);
|
||||
}
|
||||
|
||||
static off_t shm_max_pages = 32;
|
||||
static const char byte_to_fill = 0x5f;
|
||||
|
||||
static int
|
||||
shm_fill(int fd, off_t offset, off_t len)
|
||||
{
|
||||
int error;
|
||||
size_t blen;
|
||||
char *buf;
|
||||
error = 0;
|
||||
buf = malloc(PAGE_SIZE);
|
||||
if (buf == NULL)
|
||||
return (1);
|
||||
|
||||
while (len > 0) {
|
||||
blen = len < (off_t)PAGE_SIZE ? len : PAGE_SIZE;
|
||||
memset(buf, byte_to_fill, blen);
|
||||
if (pwrite(fd, buf, blen, offset) != (ssize_t)blen) {
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
len -= blen;
|
||||
offset += blen;
|
||||
}
|
||||
|
||||
free(buf);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
check_content_dealloc(int fd, off_t hole_start, off_t hole_len, off_t shm_sz)
|
||||
{
|
||||
int error;
|
||||
size_t blen;
|
||||
off_t offset, resid;
|
||||
struct stat statbuf;
|
||||
char *buf, *sblk;
|
||||
|
||||
error = 0;
|
||||
buf = malloc(PAGE_SIZE * 2);
|
||||
if (buf == NULL)
|
||||
return (1);
|
||||
sblk = buf + PAGE_SIZE;
|
||||
|
||||
memset(sblk, 0, PAGE_SIZE);
|
||||
|
||||
if ((uint64_t)hole_start + hole_len > (uint64_t)shm_sz)
|
||||
hole_len = shm_sz - hole_start;
|
||||
|
||||
/*
|
||||
* Check hole is zeroed.
|
||||
*/
|
||||
offset = hole_start;
|
||||
resid = hole_len;
|
||||
while (resid > 0) {
|
||||
blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
|
||||
if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
if (memcmp(buf, sblk, blen) != 0) {
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
resid -= blen;
|
||||
offset += blen;
|
||||
}
|
||||
|
||||
memset(sblk, byte_to_fill, PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* Check file region before hole is zeroed.
|
||||
*/
|
||||
offset = 0;
|
||||
resid = hole_start;
|
||||
while (resid > 0) {
|
||||
blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
|
||||
if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
if (memcmp(buf, sblk, blen) != 0) {
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
resid -= blen;
|
||||
offset += blen;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check file region after hole is zeroed.
|
||||
*/
|
||||
offset = hole_start + hole_len;
|
||||
resid = shm_sz - offset;
|
||||
while (resid > 0) {
|
||||
blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
|
||||
if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
if (memcmp(buf, sblk, blen) != 0) {
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
resid -= blen;
|
||||
offset += blen;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check file size matches with expected file size.
|
||||
*/
|
||||
if (fstat(fd, &statbuf) == -1)
|
||||
error = -1;
|
||||
if (statbuf.st_size != shm_sz)
|
||||
error = -1;
|
||||
|
||||
free(buf);
|
||||
return (error);
|
||||
}
|
||||
|
||||
ATF_TC_WITHOUT_HEAD(remap_object);
|
||||
ATF_TC_BODY(remap_object, tc)
|
||||
{
|
||||
@ -958,6 +1083,79 @@ ATF_TC_BODY(fallocate, tc)
|
||||
close(fd);
|
||||
}
|
||||
|
||||
ATF_TC_WITHOUT_HEAD(fspacectl);
|
||||
ATF_TC_BODY(fspacectl, tc)
|
||||
{
|
||||
struct spacectl_range range;
|
||||
off_t offset, length, shm_sz;
|
||||
int fd, error;
|
||||
|
||||
shm_sz = shm_max_pages << PAGE_SHIFT;
|
||||
|
||||
fd = shm_open("/testtest", O_RDWR|O_CREAT, 0666);
|
||||
ATF_REQUIRE_MSG(fd >= 0, "shm_open failed; errno:%d", errno);
|
||||
ATF_REQUIRE_MSG((error = posix_fallocate(fd, 0, shm_sz)) == 0,
|
||||
"posix_fallocate failed; error=%d", error);
|
||||
|
||||
/* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) */
|
||||
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
|
||||
range.r_offset = offset = PAGE_SIZE;
|
||||
range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) -
|
||||
range.r_offset;
|
||||
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
|
||||
"Aligned fspacectl failed; errno=%d", errno);
|
||||
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
|
||||
"Aligned fspacectl content checking failed", errno);
|
||||
|
||||
/* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) */
|
||||
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
|
||||
range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
|
||||
range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) +
|
||||
(1 << (PAGE_SHIFT - 1)) - offset;
|
||||
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
|
||||
"Unaligned fspacectl failed; errno=%d", errno);
|
||||
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
|
||||
"Unaligned fspacectl content checking failed", errno);
|
||||
|
||||
/* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */
|
||||
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
|
||||
range.r_offset = offset = PAGE_SHIFT;
|
||||
range.r_len = length = OFF_MAX - offset;
|
||||
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
|
||||
"Aligned fspacectl to OFF_MAX failed; errno=%d", errno);
|
||||
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
|
||||
"Aligned fspacectl to OFF_MAX content checking failed", errno);
|
||||
|
||||
/* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */
|
||||
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
|
||||
range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
|
||||
range.r_len = length = OFF_MAX - offset;
|
||||
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
|
||||
"Unaligned fspacectl to OFF_MAX failed; errno=%d", errno);
|
||||
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
|
||||
"Unaligned fspacectl to OFF_MAX content checking failed", errno);
|
||||
|
||||
/* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */
|
||||
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
|
||||
range.r_offset = offset = PAGE_SIZE;
|
||||
range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset;
|
||||
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
|
||||
"Aligned fspacectl past shm_sz failed; errno=%d", errno);
|
||||
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
|
||||
"Aligned fspacectl past shm_sz content checking failed", errno);
|
||||
|
||||
/* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */
|
||||
ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
|
||||
range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
|
||||
range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset;
|
||||
ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
|
||||
"Unaligned fspacectl past shm_sz failed; errno=%d", errno);
|
||||
ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
|
||||
"Unaligned fspacectl past shm_sz content checking failed", errno);
|
||||
|
||||
ATF_REQUIRE(close(fd) == 0);
|
||||
}
|
||||
|
||||
static int
|
||||
shm_open_large(int psind, int policy, size_t sz)
|
||||
{
|
||||
@ -1704,6 +1902,7 @@ ATF_TP_ADD_TCS(tp)
|
||||
ATF_TP_ADD_TC(tp, cloexec);
|
||||
ATF_TP_ADD_TC(tp, mode);
|
||||
ATF_TP_ADD_TC(tp, fallocate);
|
||||
ATF_TP_ADD_TC(tp, fspacectl);
|
||||
ATF_TP_ADD_TC(tp, largepage_basic);
|
||||
ATF_TP_ADD_TC(tp, largepage_config);
|
||||
ATF_TP_ADD_TC(tp, largepage_mmap);
|
||||
|
Loading…
Reference in New Issue
Block a user