install(1): Avoid unncessary fstatfs() calls and use mmap() based on size

According to git blame the trymmap() function was added in 1996 to skip
mmap() calls for NFS file systems. However, nowadays mmap() should be
perfectly safe even on NFS. Importantly, onl ufs and cd9660 file systems
were whitelisted so we don't use mmap() on ZFS. It also prevents the use
of mmap() when bootstrapping from macOS/Linux since on those systems the
trymmap() function was always returning zero due to the missing MFSNAMELEN
define.

This change keeps the trymmap() function but changes it to check whether
using mmap() can reduce the number of system calls that are required.
Using mmap() only reduces the number of system calls if we need multiple read()
syscalls, i.e. if the file size is > MAXBSIZE. However, mmap() is more expensive
than read() so this sets the threshold at 4 fewer syscalls. Additionally, for
larger file size mmap() can significantly increase the number of page faults,
so avoid it in that case.

It's unclear whether using mmap() is ever faster than a read with an appropriate
buffer size, but this change at least removes two unnecessary system calls
for every file that is installed.

Reviewed By:	markj
Differential Revision: https://reviews.freebsd.org/D26041
This commit is contained in:
arichardson 2020-10-14 12:28:41 +00:00
parent 4a18674270
commit 71cad0ea1c

View File

@ -148,7 +148,7 @@ static void metadata_log(const char *, const char *, struct timespec *,
const char *, const char *, off_t); const char *, const char *, off_t);
static int parseid(const char *, id_t *); static int parseid(const char *, id_t *);
static int strip(const char *, int, const char *, char **); static int strip(const char *, int, const char *, char **);
static int trymmap(int); static int trymmap(size_t);
static void usage(void); static void usage(void);
int int
@ -1087,7 +1087,7 @@ compare(int from_fd, const char *from_name __unused, size_t from_len,
if (do_digest) if (do_digest)
digest_init(&ctx); digest_init(&ctx);
done_compare = 0; done_compare = 0;
if (trymmap(from_fd) && trymmap(to_fd)) { if (trymmap(from_len) && trymmap(to_len)) {
p = mmap(NULL, from_len, PROT_READ, MAP_SHARED, p = mmap(NULL, from_len, PROT_READ, MAP_SHARED,
from_fd, (off_t)0); from_fd, (off_t)0);
if (p == MAP_FAILED) if (p == MAP_FAILED)
@ -1248,13 +1248,8 @@ copy(int from_fd, const char *from_name, int to_fd, const char *to_name,
digest_init(&ctx); digest_init(&ctx);
/*
* Mmap and write if less than 8M (the limit is so we don't totally
* trash memory on big files. This is really a minor hack, but it
* wins some CPU back.
*/
done_copy = 0; done_copy = 0;
if (size <= 8 * 1048576 && trymmap(from_fd) && if (trymmap((size_t)size) &&
(p = mmap(NULL, (size_t)size, PROT_READ, MAP_SHARED, (p = mmap(NULL, (size_t)size, PROT_READ, MAP_SHARED,
from_fd, (off_t)0)) != MAP_FAILED) { from_fd, (off_t)0)) != MAP_FAILED) {
nw = write(to_fd, p, size); nw = write(to_fd, p, size);
@ -1523,20 +1518,23 @@ usage(void)
* return true (1) if mmap should be tried, false (0) if not. * return true (1) if mmap should be tried, false (0) if not.
*/ */
static int static int
trymmap(int fd) trymmap(size_t filesize)
{ {
/* /*
* The ifdef is for bootstrapping - f_fstypename doesn't exist in * This function existed to skip mmap() for NFS file systems whereas
* pre-Lite2-merge systems. * nowadays mmap() should be perfectly safe. Nevertheless, using mmap()
*/ * only reduces the number of system calls if we need multiple read()
#ifdef MFSNAMELEN * syscalls, i.e. if the file size is > MAXBSIZE. However, mmap() is
struct statfs stfs; * more expensive than read() so set the threshold at 4 fewer syscalls.
* Additionally, for larger file size mmap() can significantly increase
if (fstatfs(fd, &stfs) != 0) * the number of page faults, so avoid it in that case.
return (0); *
if (strcmp(stfs.f_fstypename, "ufs") == 0 || * Note: the 8MB limit is not based on any meaningful benchmarking
strcmp(stfs.f_fstypename, "cd9660") == 0) * results, it is simply reusing the same value that was used before
return (1); * and also matches bin/cp.
#endif *
return (0); * XXX: Maybe we shouldn't bother with mmap() at all, since we use
* MAXBSIZE the syscall overhead of read() shouldn't be too high?
*/
return (filesize > 4 * MAXBSIZE && filesize < 8 * 1024 * 1024);
} }