diff --git a/Makefile b/Makefile index d3dc7b1f109e..cf973dcad417 100644 --- a/Makefile +++ b/Makefile @@ -209,7 +209,8 @@ SUB_MAKE= `test -x ${MYMAKE} && echo ${MYMAKE} || echo ${MAKE}` \ SUB_MAKE= ${MAKE} -m ${.CURDIR}/share/mk .endif -_MAKE= PATH=${PATH} ${SUB_MAKE} -f Makefile.inc1 TARGET=${_TARGET} TARGET_ARCH=${_TARGET_ARCH} +_MAKE= PATH=${PATH} MAKE_CMD=${MAKE} ${SUB_MAKE} -f Makefile.inc1 \ + TARGET=${_TARGET} TARGET_ARCH=${_TARGET_ARCH} # Only allow meta mode for the whitelisted targets. See META_TGT_WHITELIST # above. diff --git a/Makefile.inc1 b/Makefile.inc1 index a27d562df736..14b553a9058b 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -1013,7 +1013,7 @@ distributeworld installworld stageworld: _installcheck_world .PHONY ${IMAKEENV} rm -rf ${INSTALLTMP} .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} - find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -empty -delete + find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -type d -empty -delete .endfor .if defined(NO_ROOT) .for dist in base ${EXTRA_DISTRIBUTIONS} @@ -2378,11 +2378,11 @@ check-old-dirs: .PHONY done delete-old: delete-old-files delete-old-dirs .PHONY - @echo "To remove old libraries run '${MAKE} delete-old-libs'." + @echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'." check-old: check-old-files check-old-libs check-old-dirs .PHONY - @echo "To remove old files and directories run '${MAKE} delete-old'." - @echo "To remove old libraries run '${MAKE} delete-old-libs'." + @echo "To remove old files and directories run '${MAKE_CMD} delete-old'." + @echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'." .endif diff --git a/bin/ps/ps.1 b/bin/ps/ps.1 index ea094bccbcb1..8ec78775b8f4 100644 --- a/bin/ps/ps.1 +++ b/bin/ps/ps.1 @@ -29,7 +29,7 @@ .\" @(#)ps.1 8.3 (Berkeley) 4/18/94 .\" $FreeBSD$ .\" -.Dd July 28, 2016 +.Dd August 12, 2016 .Dt PS 1 .Os .Sh NAME @@ -319,7 +319,6 @@ the include file .It Dv "P_ADVLOCK" Ta No "0x00001" Ta "Process may hold a POSIX advisory lock" .It Dv "P_CONTROLT" Ta No "0x00002" Ta "Has a controlling terminal" .It Dv "P_KPROC" Ta No "0x00004" Ta "Kernel process" -.It Dv "P_FOLLOWFORK" Ta No "0x00008" Ta "Attach debugger to new children" .It Dv "P_PPWAIT" Ta No "0x00010" Ta "Parent is waiting for child to exec/exit" .It Dv "P_PROFIL" Ta No "0x00020" Ta "Has started profiling" .It Dv "P_STOPPROF" Ta No "0x00040" Ta "Has thread in requesting to stop prof" @@ -768,7 +767,8 @@ operating systems. The .Nm command appeared in -.At v4 . +.At v3 +in section 8 of the manual. .Sh BUGS Since .Nm diff --git a/cddl/usr.sbin/dtrace/tests/common/raise/Makefile b/cddl/usr.sbin/dtrace/tests/common/raise/Makefile index cde512f2d21f..83af0e40de1f 100644 --- a/cddl/usr.sbin/dtrace/tests/common/raise/Makefile +++ b/cddl/usr.sbin/dtrace/tests/common/raise/Makefile @@ -20,4 +20,6 @@ CFILES= \ tst.raise3.c \ +TEST_METADATA.t_dtrace_contrib+= required_memory="4g" + .include "../../dtrace.test.mk" diff --git a/cddl/usr.sbin/dtrace/tests/common/safety/Makefile b/cddl/usr.sbin/dtrace/tests/common/safety/Makefile index 53260533b641..5056260d0621 100644 --- a/cddl/usr.sbin/dtrace/tests/common/safety/Makefile +++ b/cddl/usr.sbin/dtrace/tests/common/safety/Makefile @@ -53,4 +53,6 @@ CFILES= \ +TEST_METADATA.t_dtrace_contrib+= required_memory="4g" + .include "../../dtrace.test.mk" diff --git a/cddl/usr.sbin/dtrace/tests/tools/genmakefiles.sh b/cddl/usr.sbin/dtrace/tests/tools/genmakefiles.sh index 9953064bcd3e..4b889442f45b 100755 --- a/cddl/usr.sbin/dtrace/tests/tools/genmakefiles.sh +++ b/cddl/usr.sbin/dtrace/tests/tools/genmakefiles.sh @@ -34,15 +34,28 @@ genmakefile() # One-off variable definitions. local special - if [ "$basedir" = proc ]; then + case "$basedir" in + proc) special=" LIBADD.tst.sigwait.exe+= rt " - elif [ "$basedir" = uctf ]; then + ;; + raise) + special=" +TEST_METADATA.t_dtrace_contrib+= required_memory=\"4g\" +" + ;; + safety) + special=" +TEST_METADATA.t_dtrace_contrib+= required_memory=\"4g\" +" + ;; + uctf) special=" WITH_CTF=YES " - fi + ;; + esac local makefile=$(mktemp) cat <<__EOF__ > $makefile diff --git a/etc/rc.d/jail b/etc/rc.d/jail index 51ecf77eb9cc..216c80e7fca3 100755 --- a/etc/rc.d/jail +++ b/etc/rc.d/jail @@ -260,6 +260,7 @@ parse_options() extract_var $_jv set_hostname_allow allow.set_hostname YN NO extract_var $_jv sysvipc_allow allow.sysvipc YN NO + extract_var $_jv enforce_statfs enforce_statfs - 2 extract_var $_jv osreldate osreldate extract_var $_jv osrelease osrelease for _p in $_parameters; do diff --git a/include/unistd.h b/include/unistd.h index 0d20027f8d56..0fb36e50e050 100644 --- a/include/unistd.h +++ b/include/unistd.h @@ -484,11 +484,18 @@ pid_t vfork(void) __returns_twice; #if __BSD_VISIBLE struct timeval; /* select(2) */ + +struct crypt_data { + int initialized; /* For compatibility with glibc. */ + char __buf[256]; /* Buffer returned by crypt_r(). */ +}; + int acct(const char *); int async_daemon(void); int check_utility_compat(const char *); const char * crypt_get_format(void); +char *crypt_r(const char *, const char *, struct crypt_data *); int crypt_set_format(const char *); int des_cipher(const char *, char *, long, int); int des_setkey(const char *key); diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index 696d907ac609..58f582fb4e73 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -29,6 +29,7 @@ SRCS+= __getosreldate.c \ devname.c \ dirfd.c \ dirname.c \ + dirname_compat.c \ disklabel.c \ dlfcn.c \ drand48.c \ diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index 0d5dfec80cce..6e5db3d8abb6 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -82,7 +82,6 @@ FBSD_1.0 { daemon; devname; devname_r; - dirname; getdiskbyname; dladdr; dlclose; @@ -418,6 +417,10 @@ FBSD_1.4 { stravis; }; +FBSD_1.5 { + dirname; +}; + FBSDprivate_1.0 { /* needed by thread libraries */ __thr_jtable; diff --git a/lib/libc/gen/dirname.3 b/lib/libc/gen/dirname.3 index 6685618a9869..18405f9af6ef 100644 --- a/lib/libc/gen/dirname.3 +++ b/lib/libc/gen/dirname.3 @@ -16,7 +16,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 29, 2016 +.Dd August 12, 2016 .Dt DIRNAME 3 .Os .Sh NAME @@ -37,6 +37,7 @@ Any trailing .Sq \&/ characters are not counted as part of the directory name. +.Sh RETURN VALUES If .Fa path is a null pointer, the empty string, or contains no @@ -46,40 +47,24 @@ characters, returns a pointer to the string .Qq \&. , signifying the current directory. -.Sh IMPLEMENTATION NOTES -The -.Fn dirname -function -returns a pointer to internal storage space allocated on the first call -that will be overwritten -by subsequent calls. -.Pp -Other vendor implementations of -.Fn dirname -may store their result in the input buffer, -making it safe to use in multithreaded applications. -Future versions of -.Fx -will follow this approach as well. -.Sh RETURN VALUES -On successful completion, -.Fn dirname -returns a pointer to the parent directory of +Otherwise, +it returns a pointer to the parent directory of .Fa path . -.Pp -If +.Sh IMPLEMENTATION NOTES +This implementation of .Fn dirname -fails, a null pointer is returned and the global variable -.Va errno -is set to indicate the error. -.Sh ERRORS -The following error codes may be set in -.Va errno : -.Bl -tag -width Er -.It Bq Er ENAMETOOLONG -The path component to be returned was larger than -.Dv MAXPATHLEN . -.El +uses the buffer provided by the caller to store the resulting parent +directory. +Other vendor implementations may return a pointer to internal storage +space instead. +The advantage of the former approach is that it ensures thread-safety, +while also placing no upper limit on the supported length of the +pathname. +.Pp +The algorithm used by this implementation also discards redundant +slashes and +.Qq \&. +pathname components from the pathname string. .Sh SEE ALSO .Xr basename 1 , .Xr dirname 1 , @@ -96,5 +81,10 @@ function first appeared in .Ox 2.2 and .Fx 4.2 . +.Pp +In +.Fx 12.0 , +this function was reimplemented to store its result in the provided +input buffer. .Sh AUTHORS -.An "Todd C. Miller" +.An Nuxi, the Netherlands diff --git a/lib/libc/gen/dirname.c b/lib/libc/gen/dirname.c index 5e0042fd2d10..3113631ac943 100644 --- a/lib/libc/gen/dirname.c +++ b/lib/libc/gen/dirname.c @@ -1,77 +1,90 @@ -/* $OpenBSD: dirname.c,v 1.13 2005/08/08 08:05:33 espie Exp $ */ - -/* - * Copyright (c) 1997, 2004 Todd C. Miller +/*- + * Copyright (c) 2015-2016 Nuxi, https://nuxi.nl/ * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); -#include #include -#include +#include #include -#include char * dirname(char *path) { - static char *dname = NULL; - size_t len; - const char *endp; + const char *in, *prev, *begin, *end; + char *out; + size_t prevlen; + bool skipslash; - if (dname == NULL) { - dname = (char *)malloc(MAXPATHLEN); - if (dname == NULL) - return(NULL); + /* + * If path is a null pointer or points to an empty string, + * dirname() shall return a pointer to the string ".". + */ + if (path == NULL || *path == '\0') + return ((char *)"."); + + /* Retain at least one leading slash character. */ + in = out = *path == '/' ? path + 1 : path; + + skipslash = true; + prev = "."; + prevlen = 1; + for (;;) { + /* Extract the next pathname component. */ + while (*in == '/') + ++in; + begin = in; + while (*in != '/' && *in != '\0') + ++in; + end = in; + if (begin == end) + break; + + /* + * Copy over the previous pathname component, except if + * it's dot. There is no point in retaining those. + */ + if (prevlen != 1 || *prev != '.') { + if (!skipslash) + *out++ = '/'; + skipslash = false; + memmove(out, prev, prevlen); + out += prevlen; + } + + /* Preserve the pathname component for the next iteration. */ + prev = begin; + prevlen = end - begin; } - /* Empty or NULL string gets treated as "." */ - if (path == NULL || *path == '\0') { - dname[0] = '.'; - dname[1] = '\0'; - return (dname); - } - - /* Strip any trailing slashes */ - endp = path + strlen(path) - 1; - while (endp > path && *endp == '/') - endp--; - - /* Find the start of the dir */ - while (endp > path && *endp != '/') - endp--; - - /* Either the dir is "/" or there are no slashes */ - if (endp == path) { - dname[0] = *endp == '/' ? '/' : '.'; - dname[1] = '\0'; - return (dname); - } else { - /* Move forward past the separating slashes */ - do { - endp--; - } while (endp > path && *endp == '/'); - } - - len = endp - path + 1; - if (len >= MAXPATHLEN) { - errno = ENAMETOOLONG; - return (NULL); - } - memcpy(dname, path, len); - dname[len] = '\0'; - return (dname); + /* + * If path does not contain a '/', then dirname() shall return a + * pointer to the string ".". + */ + if (out == path) + *out++ = '.'; + *out = '\0'; + return (path); } diff --git a/lib/libc/gen/dirname_compat.c b/lib/libc/gen/dirname_compat.c new file mode 100644 index 000000000000..48ad32c8d70a --- /dev/null +++ b/lib/libc/gen/dirname_compat.c @@ -0,0 +1,79 @@ +/* $OpenBSD: dirname.c,v 1.13 2005/08/08 08:05:33 espie Exp $ */ + +/* + * Copyright (c) 1997, 2004 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +char * +__freebsd11_dirname(char *path) +{ + static char *dname = NULL; + size_t len; + const char *endp; + + if (dname == NULL) { + dname = (char *)malloc(MAXPATHLEN); + if (dname == NULL) + return(NULL); + } + + /* Empty or NULL string gets treated as "." */ + if (path == NULL || *path == '\0') { + dname[0] = '.'; + dname[1] = '\0'; + return (dname); + } + + /* Strip any trailing slashes */ + endp = path + strlen(path) - 1; + while (endp > path && *endp == '/') + endp--; + + /* Find the start of the dir */ + while (endp > path && *endp != '/') + endp--; + + /* Either the dir is "/" or there are no slashes */ + if (endp == path) { + dname[0] = *endp == '/' ? '/' : '.'; + dname[1] = '\0'; + return (dname); + } else { + /* Move forward past the separating slashes */ + do { + endp--; + } while (endp > path && *endp == '/'); + } + + len = endp - path + 1; + if (len >= MAXPATHLEN) { + errno = ENAMETOOLONG; + return (NULL); + } + memcpy(dname, path, len); + dname[len] = '\0'; + return (dname); +} + +__sym_compat(dirname, __freebsd11_dirname, FBSD_1.0); diff --git a/lib/libc/net/getaddrinfo.c b/lib/libc/net/getaddrinfo.c index 0f82fd3cd212..56500c69d3f5 100644 --- a/lib/libc/net/getaddrinfo.c +++ b/lib/libc/net/getaddrinfo.c @@ -2249,6 +2249,8 @@ _dns_getaddrinfo(void *rv, void *cb_data, va_list ap) struct res_target q, q2; res_state res; + ai = NULL; + hostname = va_arg(ap, char *); pai = va_arg(ap, const struct addrinfo *); @@ -2327,16 +2329,16 @@ _dns_getaddrinfo(void *rv, void *cb_data, va_list ap) /* prefer IPv6 */ if (q.next) { ai = getanswer(buf2, q2.n, q2.name, q2.qtype, pai, res); - if (ai) { + if (ai != NULL) { cur->ai_next = ai; while (cur && cur->ai_next) cur = cur->ai_next; } } - if (!ai || pai->ai_family != AF_UNSPEC || + if (ai == NULL || pai->ai_family != AF_UNSPEC || (pai->ai_flags & (AI_ALL | AI_V4MAPPED)) != AI_V4MAPPED) { ai = getanswer(buf, q.n, q.name, q.qtype, pai, res); - if (ai) + if (ai != NULL) cur->ai_next = ai; } free(buf); diff --git a/lib/libc/tests/resolv/resolv_test.c b/lib/libc/tests/resolv/resolv_test.c index 74e89b13b80f..1da42e3bcd18 100644 --- a/lib/libc/tests/resolv/resolv_test.c +++ b/lib/libc/tests/resolv/resolv_test.c @@ -291,7 +291,7 @@ do { \ ATF_TC(getaddrinfo_test); ATF_TC_HEAD(getaddrinfo_test, tc) { - atf_tc_set_md_var(tc, "timeout", "450"); + atf_tc_set_md_var(tc, "timeout", "1200"); } ATF_TC_BODY(getaddrinfo_test, tc) { @@ -301,7 +301,7 @@ ATF_TC_BODY(getaddrinfo_test, tc) ATF_TC(gethostby_test); ATF_TC_HEAD(gethostby_test, tc) { - atf_tc_set_md_var(tc, "timeout", "450"); + atf_tc_set_md_var(tc, "timeout", "1200"); } ATF_TC_BODY(gethostby_test, tc) { @@ -312,7 +312,7 @@ ATF_TC_BODY(gethostby_test, tc) ATF_TC(getipnodeby_test); ATF_TC_HEAD(getipnodeby_test, tc) { - atf_tc_set_md_var(tc, "timeout", "450"); + atf_tc_set_md_var(tc, "timeout", "1200"); } ATF_TC_BODY(getipnodeby_test, tc) { diff --git a/lib/libcrypt/Makefile b/lib/libcrypt/Makefile index 3b982a3a62b7..fc966f597d76 100644 --- a/lib/libcrypt/Makefile +++ b/lib/libcrypt/Makefile @@ -17,7 +17,8 @@ SRCS= crypt.c misc.c \ crypt-sha256.c sha256c.c \ crypt-sha512.c sha512c.c MAN= crypt.3 -MLINKS= crypt.3 crypt_get_format.3 crypt.3 crypt_set_format.3 +MLINKS= crypt.3 crypt_get_format.3 crypt.3 crypt_r.3 \ + crypt.3 crypt_set_format.3 CFLAGS+= -I${.CURDIR}/../libmd -I${.CURDIR}/../libutil \ -I${.CURDIR}/../../sys/crypto/sha2 diff --git a/lib/libcrypt/crypt-md5.c b/lib/libcrypt/crypt-md5.c index 33186cd4de95..55986750e616 100644 --- a/lib/libcrypt/crypt-md5.c +++ b/lib/libcrypt/crypt-md5.c @@ -41,31 +41,27 @@ __FBSDID("$FreeBSD$"); * UNIX password */ -char * -crypt_md5(const char *pw, const char *salt) +int +crypt_md5(const char *pw, const char *salt, char *buffer) { MD5_CTX ctx,ctx1; unsigned long l; int sl, pl; u_int i; u_char final[MD5_SIZE]; - static const char *sp, *ep; - static char passwd[120], *p; + const char *ep; static const char *magic = "$1$"; - /* Refine the Salt first */ - sp = salt; - - /* If it starts with the magic string, then skip that */ - if(!strncmp(sp, magic, strlen(magic))) - sp += strlen(magic); + /* If the salt starts with the magic string, skip that. */ + if (!strncmp(salt, magic, strlen(magic))) + salt += strlen(magic); /* It stops at the first '$', max 8 chars */ - for(ep = sp; *ep && *ep != '$' && ep < (sp + 8); ep++) + for (ep = salt; *ep && *ep != '$' && ep < salt + 8; ep++) continue; /* get the length of the true salt */ - sl = ep - sp; + sl = ep - salt; MD5Init(&ctx); @@ -76,12 +72,12 @@ crypt_md5(const char *pw, const char *salt) MD5Update(&ctx, (const u_char *)magic, strlen(magic)); /* Then the raw salt */ - MD5Update(&ctx, (const u_char *)sp, (u_int)sl); + MD5Update(&ctx, (const u_char *)salt, (u_int)sl); /* Then just as many characters of the MD5(pw,salt,pw) */ MD5Init(&ctx1); MD5Update(&ctx1, (const u_char *)pw, strlen(pw)); - MD5Update(&ctx1, (const u_char *)sp, (u_int)sl); + MD5Update(&ctx1, (const u_char *)salt, (u_int)sl); MD5Update(&ctx1, (const u_char *)pw, strlen(pw)); MD5Final(final, &ctx1); for(pl = (int)strlen(pw); pl > 0; pl -= MD5_SIZE) @@ -99,9 +95,9 @@ crypt_md5(const char *pw, const char *salt) MD5Update(&ctx, (const u_char *)pw, 1); /* Now make the output string */ - strcpy(passwd, magic); - strncat(passwd, sp, (u_int)sl); - strcat(passwd, "$"); + buffer = stpcpy(buffer, magic); + buffer = stpncpy(buffer, salt, (u_int)sl); + *buffer++ = '$'; MD5Final(final, &ctx); @@ -118,7 +114,7 @@ crypt_md5(const char *pw, const char *salt) MD5Update(&ctx1, (const u_char *)final, MD5_SIZE); if(i % 3) - MD5Update(&ctx1, (const u_char *)sp, (u_int)sl); + MD5Update(&ctx1, (const u_char *)salt, (u_int)sl); if(i % 7) MD5Update(&ctx1, (const u_char *)pw, strlen(pw)); @@ -130,24 +126,22 @@ crypt_md5(const char *pw, const char *salt) MD5Final(final, &ctx1); } - p = passwd + strlen(passwd); - l = (final[ 0]<<16) | (final[ 6]<<8) | final[12]; - _crypt_to64(p, l, 4); p += 4; + _crypt_to64(buffer, l, 4); buffer += 4; l = (final[ 1]<<16) | (final[ 7]<<8) | final[13]; - _crypt_to64(p, l, 4); p += 4; + _crypt_to64(buffer, l, 4); buffer += 4; l = (final[ 2]<<16) | (final[ 8]<<8) | final[14]; - _crypt_to64(p, l, 4); p += 4; + _crypt_to64(buffer, l, 4); buffer += 4; l = (final[ 3]<<16) | (final[ 9]<<8) | final[15]; - _crypt_to64(p, l, 4); p += 4; + _crypt_to64(buffer, l, 4); buffer += 4; l = (final[ 4]<<16) | (final[10]<<8) | final[ 5]; - _crypt_to64(p, l, 4); p += 4; + _crypt_to64(buffer, l, 4); buffer += 4; l = final[11]; - _crypt_to64(p, l, 2); p += 2; - *p = '\0'; + _crypt_to64(buffer, l, 2); buffer += 2; + *buffer = '\0'; /* Don't leave anything around in vm they could use. */ memset(final, 0, sizeof(final)); - return (passwd); + return (0); } diff --git a/lib/libcrypt/crypt-nthash.c b/lib/libcrypt/crypt-nthash.c index 19b84ceb831d..b637eb6b5fa3 100644 --- a/lib/libcrypt/crypt-nthash.c +++ b/lib/libcrypt/crypt-nthash.c @@ -46,16 +46,14 @@ __FBSDID("$FreeBSD$"); */ /* ARGSUSED */ -char * -crypt_nthash(const char *pw, const char *salt __unused) +int +crypt_nthash(const char *pw, const char *salt __unused, char *buffer) { size_t unipwLen; - int i, j; - static char hexconvtab[] = "0123456789abcdef"; + int i; + static const char hexconvtab[] = "0123456789abcdef"; static const char *magic = "$3$"; - static char passwd[120]; u_int16_t unipw[128]; - char final[MD4_SIZE*2 + 1]; u_char hash[MD4_SIZE]; const char *s; MD4_CTX ctx; @@ -70,19 +68,14 @@ crypt_nthash(const char *pw, const char *salt __unused) MD4Init(&ctx); MD4Update(&ctx, (u_char *)unipw, unipwLen*sizeof(u_int16_t)); MD4Final(hash, &ctx); - - for (i = j = 0; i < MD4_SIZE; i++) { - final[j++] = hexconvtab[hash[i] >> 4]; - final[j++] = hexconvtab[hash[i] & 15]; + + buffer = stpcpy(buffer, magic); + *buffer++ = '$'; + for (i = 0; i < MD4_SIZE; i++) { + *buffer++ = hexconvtab[hash[i] >> 4]; + *buffer++ = hexconvtab[hash[i] & 15]; } - final[j] = '\0'; + *buffer = '\0'; - strcpy(passwd, magic); - strcat(passwd, "$"); - strncat(passwd, final, MD4_SIZE*2); - - /* Don't leave anything around in vm they could use. */ - memset(final, 0, sizeof(final)); - - return (passwd); + return (0); } diff --git a/lib/libcrypt/crypt-sha256.c b/lib/libcrypt/crypt-sha256.c index cab7405bfd60..0cfef105734e 100644 --- a/lib/libcrypt/crypt-sha256.c +++ b/lib/libcrypt/crypt-sha256.c @@ -59,11 +59,10 @@ static const char sha256_rounds_prefix[] = "rounds="; /* Maximum number of rounds. */ #define ROUNDS_MAX 999999999 -static char * -crypt_sha256_r(const char *key, const char *salt, char *buffer, int buflen) +int +crypt_sha256(const char *key, const char *salt, char *buffer) { u_long srounds; - int n; uint8_t alt_result[32], temp_result[32]; SHA256_CTX ctx, alt_ctx; size_t salt_len, key_len, cnt, rounds; @@ -210,42 +209,27 @@ crypt_sha256_r(const char *key, const char *salt, char *buffer, int buflen) /* Now we can construct the result string. It consists of three * parts. */ - cp = stpncpy(buffer, sha256_salt_prefix, MAX(0, buflen)); - buflen -= sizeof(sha256_salt_prefix) - 1; + cp = stpcpy(buffer, sha256_salt_prefix); - if (rounds_custom) { - n = snprintf(cp, MAX(0, buflen), "%s%zu$", - sha256_rounds_prefix, rounds); + if (rounds_custom) + cp += sprintf(cp, "%s%zu$", sha256_rounds_prefix, rounds); - cp += n; - buflen -= n; - } + cp = stpncpy(cp, salt, salt_len); - cp = stpncpy(cp, salt, MIN((size_t)MAX(0, buflen), salt_len)); - buflen -= MIN((size_t)MAX(0, buflen), salt_len); + *cp++ = '$'; - if (buflen > 0) { - *cp++ = '$'; - --buflen; - } - - b64_from_24bit(alt_result[0], alt_result[10], alt_result[20], 4, &buflen, &cp); - b64_from_24bit(alt_result[21], alt_result[1], alt_result[11], 4, &buflen, &cp); - b64_from_24bit(alt_result[12], alt_result[22], alt_result[2], 4, &buflen, &cp); - b64_from_24bit(alt_result[3], alt_result[13], alt_result[23], 4, &buflen, &cp); - b64_from_24bit(alt_result[24], alt_result[4], alt_result[14], 4, &buflen, &cp); - b64_from_24bit(alt_result[15], alt_result[25], alt_result[5], 4, &buflen, &cp); - b64_from_24bit(alt_result[6], alt_result[16], alt_result[26], 4, &buflen, &cp); - b64_from_24bit(alt_result[27], alt_result[7], alt_result[17], 4, &buflen, &cp); - b64_from_24bit(alt_result[18], alt_result[28], alt_result[8], 4, &buflen, &cp); - b64_from_24bit(alt_result[9], alt_result[19], alt_result[29], 4, &buflen, &cp); - b64_from_24bit(0, alt_result[31], alt_result[30], 3, &buflen, &cp); - if (buflen <= 0) { - errno = ERANGE; - buffer = NULL; - } - else - *cp = '\0'; /* Terminate the string. */ + b64_from_24bit(alt_result[0], alt_result[10], alt_result[20], 4, &cp); + b64_from_24bit(alt_result[21], alt_result[1], alt_result[11], 4, &cp); + b64_from_24bit(alt_result[12], alt_result[22], alt_result[2], 4, &cp); + b64_from_24bit(alt_result[3], alt_result[13], alt_result[23], 4, &cp); + b64_from_24bit(alt_result[24], alt_result[4], alt_result[14], 4, &cp); + b64_from_24bit(alt_result[15], alt_result[25], alt_result[5], 4, &cp); + b64_from_24bit(alt_result[6], alt_result[16], alt_result[26], 4, &cp); + b64_from_24bit(alt_result[27], alt_result[7], alt_result[17], 4, &cp); + b64_from_24bit(alt_result[18], alt_result[28], alt_result[8], 4, &cp); + b64_from_24bit(alt_result[9], alt_result[19], alt_result[29], 4, &cp); + b64_from_24bit(0, alt_result[31], alt_result[30], 3, &cp); + *cp = '\0'; /* Terminate the string. */ /* Clear the buffer for the intermediate result so that people * attaching to processes or reading core dumps cannot get any @@ -263,37 +247,7 @@ crypt_sha256_r(const char *key, const char *salt, char *buffer, int buflen) if (copied_salt != NULL) memset(copied_salt, '\0', salt_len); - return buffer; -} - -/* This entry point is equivalent to crypt(3). */ -char * -crypt_sha256(const char *key, const char *salt) -{ - /* We don't want to have an arbitrary limit in the size of the - * password. We can compute an upper bound for the size of the - * result in advance and so we can prepare the buffer we pass to - * `crypt_sha256_r'. */ - static char *buffer; - static int buflen; - int needed; - char *new_buffer; - - needed = (sizeof(sha256_salt_prefix) - 1 - + sizeof(sha256_rounds_prefix) + 9 + 1 - + strlen(salt) + 1 + 43 + 1); - - if (buflen < needed) { - new_buffer = (char *)realloc(buffer, needed); - - if (new_buffer == NULL) - return NULL; - - buffer = new_buffer; - buflen = needed; - } - - return crypt_sha256_r(key, salt, buffer, buflen); + return (0); } #ifdef TEST diff --git a/lib/libcrypt/crypt-sha512.c b/lib/libcrypt/crypt-sha512.c index 8e0054fb5a89..2e7cd4ec580e 100644 --- a/lib/libcrypt/crypt-sha512.c +++ b/lib/libcrypt/crypt-sha512.c @@ -59,11 +59,10 @@ static const char sha512_rounds_prefix[] = "rounds="; /* Maximum number of rounds. */ #define ROUNDS_MAX 999999999 -static char * -crypt_sha512_r(const char *key, const char *salt, char *buffer, int buflen) +int +crypt_sha512(const char *key, const char *salt, char *buffer) { u_long srounds; - int n; uint8_t alt_result[64], temp_result[64]; SHA512_CTX ctx, alt_ctx; size_t salt_len, key_len, cnt, rounds; @@ -210,54 +209,39 @@ crypt_sha512_r(const char *key, const char *salt, char *buffer, int buflen) /* Now we can construct the result string. It consists of three * parts. */ - cp = stpncpy(buffer, sha512_salt_prefix, MAX(0, buflen)); - buflen -= sizeof(sha512_salt_prefix) - 1; + cp = stpcpy(buffer, sha512_salt_prefix); - if (rounds_custom) { - n = snprintf(cp, MAX(0, buflen), "%s%zu$", - sha512_rounds_prefix, rounds); + if (rounds_custom) + cp += sprintf(cp, "%s%zu$", sha512_rounds_prefix, rounds); - cp += n; - buflen -= n; - } + cp = stpncpy(cp, salt, salt_len); - cp = stpncpy(cp, salt, MIN((size_t)MAX(0, buflen), salt_len)); - buflen -= MIN((size_t)MAX(0, buflen), salt_len); + *cp++ = '$'; - if (buflen > 0) { - *cp++ = '$'; - --buflen; - } + b64_from_24bit(alt_result[0], alt_result[21], alt_result[42], 4, &cp); + b64_from_24bit(alt_result[22], alt_result[43], alt_result[1], 4, &cp); + b64_from_24bit(alt_result[44], alt_result[2], alt_result[23], 4, &cp); + b64_from_24bit(alt_result[3], alt_result[24], alt_result[45], 4, &cp); + b64_from_24bit(alt_result[25], alt_result[46], alt_result[4], 4, &cp); + b64_from_24bit(alt_result[47], alt_result[5], alt_result[26], 4, &cp); + b64_from_24bit(alt_result[6], alt_result[27], alt_result[48], 4, &cp); + b64_from_24bit(alt_result[28], alt_result[49], alt_result[7], 4, &cp); + b64_from_24bit(alt_result[50], alt_result[8], alt_result[29], 4, &cp); + b64_from_24bit(alt_result[9], alt_result[30], alt_result[51], 4, &cp); + b64_from_24bit(alt_result[31], alt_result[52], alt_result[10], 4, &cp); + b64_from_24bit(alt_result[53], alt_result[11], alt_result[32], 4, &cp); + b64_from_24bit(alt_result[12], alt_result[33], alt_result[54], 4, &cp); + b64_from_24bit(alt_result[34], alt_result[55], alt_result[13], 4, &cp); + b64_from_24bit(alt_result[56], alt_result[14], alt_result[35], 4, &cp); + b64_from_24bit(alt_result[15], alt_result[36], alt_result[57], 4, &cp); + b64_from_24bit(alt_result[37], alt_result[58], alt_result[16], 4, &cp); + b64_from_24bit(alt_result[59], alt_result[17], alt_result[38], 4, &cp); + b64_from_24bit(alt_result[18], alt_result[39], alt_result[60], 4, &cp); + b64_from_24bit(alt_result[40], alt_result[61], alt_result[19], 4, &cp); + b64_from_24bit(alt_result[62], alt_result[20], alt_result[41], 4, &cp); + b64_from_24bit(0, 0, alt_result[63], 2, &cp); - b64_from_24bit(alt_result[0], alt_result[21], alt_result[42], 4, &buflen, &cp); - b64_from_24bit(alt_result[22], alt_result[43], alt_result[1], 4, &buflen, &cp); - b64_from_24bit(alt_result[44], alt_result[2], alt_result[23], 4, &buflen, &cp); - b64_from_24bit(alt_result[3], alt_result[24], alt_result[45], 4, &buflen, &cp); - b64_from_24bit(alt_result[25], alt_result[46], alt_result[4], 4, &buflen, &cp); - b64_from_24bit(alt_result[47], alt_result[5], alt_result[26], 4, &buflen, &cp); - b64_from_24bit(alt_result[6], alt_result[27], alt_result[48], 4, &buflen, &cp); - b64_from_24bit(alt_result[28], alt_result[49], alt_result[7], 4, &buflen, &cp); - b64_from_24bit(alt_result[50], alt_result[8], alt_result[29], 4, &buflen, &cp); - b64_from_24bit(alt_result[9], alt_result[30], alt_result[51], 4, &buflen, &cp); - b64_from_24bit(alt_result[31], alt_result[52], alt_result[10], 4, &buflen, &cp); - b64_from_24bit(alt_result[53], alt_result[11], alt_result[32], 4, &buflen, &cp); - b64_from_24bit(alt_result[12], alt_result[33], alt_result[54], 4, &buflen, &cp); - b64_from_24bit(alt_result[34], alt_result[55], alt_result[13], 4, &buflen, &cp); - b64_from_24bit(alt_result[56], alt_result[14], alt_result[35], 4, &buflen, &cp); - b64_from_24bit(alt_result[15], alt_result[36], alt_result[57], 4, &buflen, &cp); - b64_from_24bit(alt_result[37], alt_result[58], alt_result[16], 4, &buflen, &cp); - b64_from_24bit(alt_result[59], alt_result[17], alt_result[38], 4, &buflen, &cp); - b64_from_24bit(alt_result[18], alt_result[39], alt_result[60], 4, &buflen, &cp); - b64_from_24bit(alt_result[40], alt_result[61], alt_result[19], 4, &buflen, &cp); - b64_from_24bit(alt_result[62], alt_result[20], alt_result[41], 4, &buflen, &cp); - b64_from_24bit(0, 0, alt_result[63], 2, &buflen, &cp); - - if (buflen <= 0) { - errno = ERANGE; - buffer = NULL; - } - else - *cp = '\0'; /* Terminate the string. */ + *cp = '\0'; /* Terminate the string. */ /* Clear the buffer for the intermediate result so that people * attaching to processes or reading core dumps cannot get any @@ -275,37 +259,7 @@ crypt_sha512_r(const char *key, const char *salt, char *buffer, int buflen) if (copied_salt != NULL) memset(copied_salt, '\0', salt_len); - return buffer; -} - -/* This entry point is equivalent to crypt(3). */ -char * -crypt_sha512(const char *key, const char *salt) -{ - /* We don't want to have an arbitrary limit in the size of the - * password. We can compute an upper bound for the size of the - * result in advance and so we can prepare the buffer we pass to - * `crypt_sha512_r'. */ - static char *buffer; - static int buflen; - int needed; - char *new_buffer; - - needed = (sizeof(sha512_salt_prefix) - 1 - + sizeof(sha512_rounds_prefix) + 9 + 1 - + strlen(salt) + 1 + 86 + 1); - - if (buflen < needed) { - new_buffer = (char *)realloc(buffer, needed); - - if (new_buffer == NULL) - return NULL; - - buffer = new_buffer; - buflen = needed; - } - - return crypt_sha512_r(key, salt, buffer, buflen); + return (0); } #ifdef TEST diff --git a/lib/libcrypt/crypt.3 b/lib/libcrypt/crypt.3 index 828c37f51d63..f16dfd5dd053 100644 --- a/lib/libcrypt/crypt.3 +++ b/lib/libcrypt/crypt.3 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 9, 2014 +.Dd August 10, 2016 .Dt CRYPT 3 .Os .Sh NAME @@ -41,6 +41,8 @@ .In unistd.h .Ft char * .Fn crypt "const char *key" "const char *salt" +.Ft char * +.Fn crypt_r "const char *key" "const char *salt" "struct crypt_data *data" .Ft const char * .Fn crypt_get_format "void" .Ft int @@ -246,10 +248,20 @@ The .Fn crypt_set_format function sets the default encoding format according to the supplied .Fa string . +.Pp +The +.Fn crypt_r +function behaves identically to +.Fn crypt , +except that the resulting string is stored in +.Fa data , +making it thread-safe. .Sh RETURN VALUES The .Fn crypt -function returns a pointer to the encrypted value on success, and NULL on +and +.Fn crypt_r +functions return a pointer to the encrypted value on success, and NULL on failure. Note: this is not a standard behaviour, AT&T .Fn crypt @@ -280,6 +292,11 @@ section of the code (FreeSec 1.0) was developed outside the United States of America as an unencumbered replacement for the U.S.-only .Nx libcrypt encryption library. +.Pp +The +.Fn crypt_r +function was added in +.Fx 12.0 . .Sh AUTHORS .An -nosplit Originally written by diff --git a/lib/libcrypt/crypt.c b/lib/libcrypt/crypt.c index 623809e5afce..54dc33b54977 100644 --- a/lib/libcrypt/crypt.c +++ b/lib/libcrypt/crypt.c @@ -46,9 +46,9 @@ __FBSDID("$FreeBSD$"); * and it needs to be the default for backward compatibility. */ static const struct crypt_format { - const char *const name; - char *(*const func)(const char *, const char *); - const char *const magic; + const char *name; + int (*func)(const char *, const char *, char *); + const char *magic; } crypt_formats[] = { { "md5", crypt_md5, "$1$" }, #ifdef HAS_BLOWFISH @@ -104,20 +104,37 @@ crypt_set_format(const char *format) * otherwise, the currently selected format is used. */ char * -crypt(const char *passwd, const char *salt) +crypt_r(const char *passwd, const char *salt, struct crypt_data *data) { const struct crypt_format *cf; + int (*func)(const char *, const char *, char *); #ifdef HAS_DES int len; #endif for (cf = crypt_formats; cf->name != NULL; ++cf) - if (cf->magic != NULL && strstr(salt, cf->magic) == salt) - return (cf->func(passwd, salt)); + if (cf->magic != NULL && strstr(salt, cf->magic) == salt) { + func = cf->func; + goto match; + } #ifdef HAS_DES len = strlen(salt); - if ((len == 13 || len == 2) && strspn(salt, DES_SALT_ALPHABET) == len) - return (crypt_des(passwd, salt)); + if ((len == 13 || len == 2) && strspn(salt, DES_SALT_ALPHABET) == len) { + func = crypt_des; + goto match; + } #endif - return (crypt_format->func(passwd, salt)); + func = crypt_format->func; +match: + if (func(passwd, salt, data->__buf) != 0) + return (NULL); + return (data->__buf); +} + +char * +crypt(const char *passwd, const char *salt) +{ + static struct crypt_data data; + + return (crypt_r(passwd, salt, &data)); } diff --git a/lib/libcrypt/crypt.h b/lib/libcrypt/crypt.h index b33ad0943e29..81b0e0324efd 100644 --- a/lib/libcrypt/crypt.h +++ b/lib/libcrypt/crypt.h @@ -32,12 +32,12 @@ #define MD4_SIZE 16 #define MD5_SIZE 16 -char *crypt_des(const char *pw, const char *salt); -char *crypt_md5(const char *pw, const char *salt); -char *crypt_nthash(const char *pw, const char *salt); -char *crypt_blowfish(const char *pw, const char *salt); -char *crypt_sha256 (const char *pw, const char *salt); -char *crypt_sha512 (const char *pw, const char *salt); +int crypt_des(const char *pw, const char *salt, char *buf); +int crypt_md5(const char *pw, const char *salt, char *buf); +int crypt_nthash(const char *pw, const char *salt, char *buf); +int crypt_blowfish(const char *pw, const char *salt, char *buf); +int crypt_sha256 (const char *pw, const char *salt, char *buf); +int crypt_sha512 (const char *pw, const char *salt, char *buf); extern void _crypt_to64(char *s, u_long v, int n); -extern void b64_from_24bit(uint8_t B2, uint8_t B1, uint8_t B0, int n, int *buflen, char **cp); +extern void b64_from_24bit(uint8_t B2, uint8_t B1, uint8_t B0, int n, char **cp); diff --git a/lib/libcrypt/misc.c b/lib/libcrypt/misc.c index 0f63ce04214c..2202ffb652cd 100644 --- a/lib/libcrypt/misc.c +++ b/lib/libcrypt/misc.c @@ -47,7 +47,7 @@ _crypt_to64(char *s, u_long v, int n) } void -b64_from_24bit(uint8_t B2, uint8_t B1, uint8_t B0, int n, int *buflen, char **cp) +b64_from_24bit(uint8_t B2, uint8_t B1, uint8_t B0, int n, char **cp) { uint32_t w; int i; @@ -56,8 +56,6 @@ b64_from_24bit(uint8_t B2, uint8_t B1, uint8_t B0, int n, int *buflen, char **cp for (i = 0; i < n; i++) { **cp = itoa64[w&0x3f]; (*cp)++; - if ((*buflen)-- < 0) - break; w >>= 6; } } diff --git a/lib/libsysdecode/Makefile b/lib/libsysdecode/Makefile index 742c513469fe..123ea49ca572 100644 --- a/lib/libsysdecode/Makefile +++ b/lib/libsysdecode/Makefile @@ -9,6 +9,7 @@ SRCS= errno.c ioctl.c syscallnames.c utrace.c INCS= sysdecode.h CFLAGS+= -I${.CURDIR}/../../sys +CFLAGS+= -I${.CURDIR}/../../libexec/rtld-elf MAN+= sysdecode.3 \ sysdecode_abi_to_freebsd_errno.3 \ diff --git a/lib/libsysdecode/utrace.c b/lib/libsysdecode/utrace.c index 6a251390b2bc..dfd0e70380b0 100644 --- a/lib/libsysdecode/utrace.c +++ b/lib/libsysdecode/utrace.c @@ -33,31 +33,21 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include +#include "rtld_utrace.h" -#define UTRACE_DLOPEN_START 1 -#define UTRACE_DLOPEN_STOP 2 -#define UTRACE_DLCLOSE_START 3 -#define UTRACE_DLCLOSE_STOP 4 -#define UTRACE_LOAD_OBJECT 5 -#define UTRACE_UNLOAD_OBJECT 6 -#define UTRACE_ADD_RUNDEP 7 -#define UTRACE_PRELOAD_FINISHED 8 -#define UTRACE_INIT_CALL 9 -#define UTRACE_FINI_CALL 10 -#define UTRACE_DLSYM_START 11 -#define UTRACE_DLSYM_STOP 12 - -struct utrace_rtld { - char sig[4]; /* 'RTLD' */ +#ifdef __LP64__ +struct utrace_rtld32 { + char sig[4]; int event; - void *handle; - void *mapbase; - size_t mapsize; + uint32_t handle; + uint32_t mapbase; + uint32_t mapsize; int refcnt; char name[MAXPATHLEN]; }; +#endif static int print_utrace_rtld(FILE *fp, void *p) @@ -145,6 +135,14 @@ struct utrace_malloc { void *r; }; +#ifdef __LP64__ +struct utrace_malloc32 { + uint32_t p; + uint32_t s; + uint32_t r; +}; +#endif + static void print_utrace_malloc(FILE *fp, void *p) { @@ -163,15 +161,49 @@ print_utrace_malloc(FILE *fp, void *p) int sysdecode_utrace(FILE *fp, void *p, size_t len) { +#ifdef __LP64__ + struct utrace_rtld ur; + struct utrace_rtld32 *pr; + struct utrace_malloc um; + struct utrace_malloc32 *pm; +#endif + static const char rtld_utrace_sig[RTLD_UTRACE_SIG_SZ] = RTLD_UTRACE_SIG; - if (len == sizeof(struct utrace_rtld) && bcmp(p, "RTLD", 4) == 0) { + if (len == sizeof(struct utrace_rtld) && bcmp(p, rtld_utrace_sig, + sizeof(rtld_utrace_sig)) == 0) return (print_utrace_rtld(fp, p)); - } if (len == sizeof(struct utrace_malloc)) { print_utrace_malloc(fp, p); return (1); } - + +#ifdef __LP64__ + if (len == sizeof(struct utrace_rtld32) && bcmp(p, rtld_utrace_sig, + sizeof(rtld_utrace_sig)) == 0) { + pr = p; + memset(&ur, 0, sizeof(ur)); + memcpy(ur.sig, pr->sig, sizeof(ur.sig)); + ur.event = pr->event; + ur.handle = (void *)(uintptr_t)pr->handle; + ur.mapbase = (void *)(uintptr_t)pr->mapbase; + ur.mapsize = pr->mapsize; + ur.refcnt = pr->refcnt; + memcpy(ur.name, pr->name, sizeof(ur.name)); + return (print_utrace_rtld(fp, &ur)); + } + + if (len == sizeof(struct utrace_malloc32)) { + pm = p; + memset(&um, 0, sizeof(um)); + um.p = pm->p == (uint32_t)-1 ? (void *)(intptr_t)-1 : + (void *)(uintptr_t)pm->p; + um.s = pm->s; + um.r = (void *)(uintptr_t)pm->r; + print_utrace_malloc(fp, &um); + return (1); + } +#endif + return (0); } diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index ca722cfa175b..fe25e59ec65a 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -59,6 +59,7 @@ #include "paths.h" #include "rtld_tls.h" #include "rtld_printf.h" +#include "rtld_utrace.h" #include "notes.h" /* Types. */ @@ -273,29 +274,6 @@ char *ld_env_prefix = LD_; (dlp)->num_alloc = obj_count, \ (dlp)->num_used = 0) -#define UTRACE_DLOPEN_START 1 -#define UTRACE_DLOPEN_STOP 2 -#define UTRACE_DLCLOSE_START 3 -#define UTRACE_DLCLOSE_STOP 4 -#define UTRACE_LOAD_OBJECT 5 -#define UTRACE_UNLOAD_OBJECT 6 -#define UTRACE_ADD_RUNDEP 7 -#define UTRACE_PRELOAD_FINISHED 8 -#define UTRACE_INIT_CALL 9 -#define UTRACE_FINI_CALL 10 -#define UTRACE_DLSYM_START 11 -#define UTRACE_DLSYM_STOP 12 - -struct utrace_rtld { - char sig[4]; /* 'RTLD' */ - int event; - void *handle; - void *mapbase; /* Used for 'parent' and 'init/fini' */ - size_t mapsize; - int refcnt; /* Used for 'mode' */ - char name[MAXPATHLEN]; -}; - #define LD_UTRACE(e, h, mb, ms, r, n) do { \ if (ld_utrace != NULL) \ ld_utrace_log(e, h, mb, ms, r, n); \ @@ -306,11 +284,9 @@ ld_utrace_log(int event, void *handle, void *mapbase, size_t mapsize, int refcnt, const char *name) { struct utrace_rtld ut; + static const char rtld_utrace_sig[RTLD_UTRACE_SIG_SZ] = RTLD_UTRACE_SIG; - ut.sig[0] = 'R'; - ut.sig[1] = 'T'; - ut.sig[2] = 'L'; - ut.sig[3] = 'D'; + memcpy(ut.sig, rtld_utrace_sig, sizeof(ut.sig)); ut.event = event; ut.handle = handle; ut.mapbase = mapbase; @@ -1916,6 +1892,7 @@ static void init_rtld(caddr_t mapbase, Elf_Auxinfo **aux_info) { Obj_Entry objtmp; /* Temporary rtld object */ + const Elf_Ehdr *ehdr; const Elf_Dyn *dyn_rpath; const Elf_Dyn *dyn_soname; const Elf_Dyn *dyn_runpath; @@ -1954,6 +1931,9 @@ init_rtld(caddr_t mapbase, Elf_Auxinfo **aux_info) relocate_objects(&objtmp, true, &objtmp, 0, NULL); } + ehdr = (Elf_Ehdr *)mapbase; + objtmp.phdr = (Elf_Phdr *)((char *)mapbase + ehdr->e_phoff); + objtmp.phsize = ehdr->e_phnum * sizeof(objtmp.phdr[0]); /* Initialize the object list. */ TAILQ_INIT(&obj_list); @@ -2164,8 +2144,7 @@ load_needed_objects(Obj_Entry *first, int flags) { Obj_Entry *obj; - obj = first; - TAILQ_FOREACH_FROM(obj, &obj_list, next) { + for (obj = first; obj != NULL; obj = TAILQ_NEXT(obj, next)) { if (obj->marker) continue; if (process_needed(obj, obj->needed, flags) == -1) @@ -2769,9 +2748,8 @@ relocate_objects(Obj_Entry *first, bool bind_now, Obj_Entry *rtldobj, Obj_Entry *obj; int error; - error = 0; - obj = first; - TAILQ_FOREACH_FROM(obj, &obj_list, next) { + for (error = 0, obj = first; obj != NULL; + obj = TAILQ_NEXT(obj, next)) { if (obj->marker) continue; error = relocate_object(obj, bind_now, rtldobj, flags, @@ -2811,8 +2789,7 @@ resolve_objects_ifunc(Obj_Entry *first, bool bind_now, int flags, { Obj_Entry *obj; - obj = first; - TAILQ_FOREACH_FROM(obj, &obj_list, next) { + for (obj = first; obj != NULL; obj = TAILQ_NEXT(obj, next)) { if (obj->marker) continue; if (resolve_object_ifunc(obj, bind_now, flags, lockstate) == -1) @@ -4316,7 +4293,7 @@ trace_loaded_objects(Obj_Entry *obj) list_containers = getenv(_LD("TRACE_LOADED_OBJECTS_ALL")); - TAILQ_FOREACH_FROM(obj, &obj_list, next) { + for (; obj != NULL; obj = TAILQ_NEXT(obj, next)) { Needed_Entry *needed; char *name, *path; bool is_lib; @@ -4661,8 +4638,7 @@ allocate_tls(Obj_Entry *objs, void *oldtls, size_t tcbsize, size_t tcbalign) */ free_tls(oldtls, 2*sizeof(Elf_Addr), sizeof(Elf_Addr)); } else { - obj = objs; - TAILQ_FOREACH_FROM(obj, &obj_list, next) { + for (obj = objs; obj != NULL; obj = TAILQ_NEXT(obj, next)) { if (obj->marker || obj->tlsoffset == 0) continue; addr = segbase - obj->tlsoffset; diff --git a/libexec/rtld-elf/rtld_utrace.h b/libexec/rtld-elf/rtld_utrace.h new file mode 100644 index 000000000000..8ac8a02775fd --- /dev/null +++ b/libexec/rtld-elf/rtld_utrace.h @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2007 John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef RTLD_UTRACE_H +#define RTLD_UTRACE_H + +#include + +#define UTRACE_DLOPEN_START 1 +#define UTRACE_DLOPEN_STOP 2 +#define UTRACE_DLCLOSE_START 3 +#define UTRACE_DLCLOSE_STOP 4 +#define UTRACE_LOAD_OBJECT 5 +#define UTRACE_UNLOAD_OBJECT 6 +#define UTRACE_ADD_RUNDEP 7 +#define UTRACE_PRELOAD_FINISHED 8 +#define UTRACE_INIT_CALL 9 +#define UTRACE_FINI_CALL 10 +#define UTRACE_DLSYM_START 11 +#define UTRACE_DLSYM_STOP 12 + +#define RTLD_UTRACE_SIG_SZ 4 +#define RTLD_UTRACE_SIG "RTLD" + +struct utrace_rtld { + char sig[RTLD_UTRACE_SIG_SZ]; + int event; + void *handle; + void *mapbase; /* Used for 'parent' and 'init/fini' */ + size_t mapsize; + int refcnt; /* Used for 'mode' */ + char name[MAXPATHLEN]; +}; + +#endif diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index a02ce39cf3fd..cedefda9468e 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -1583,7 +1583,7 @@ show_static_rule(struct cmdline_opts *co, struct format_opts *fo, break; case O_NAT: - if (cmd->arg1 != 0) + if (cmd->arg1 != IP_FW_NAT44_GLOBAL) bprint_uint_arg(bp, "nat ", cmd->arg1); else bprintf(bp, "nat global"); @@ -3776,7 +3776,7 @@ compile_rule(char *av[], uint32_t *rbuf, int *rbufsize, struct tidx *tstate) action->len = F_INSN_SIZE(ipfw_insn_nat); CHECK_ACTLEN; if (*av != NULL && _substrcmp(*av, "global") == 0) { - action->arg1 = 0; + action->arg1 = IP_FW_NAT44_GLOBAL; av++; break; } else diff --git a/secure/lib/libcrypt/crypt-blowfish.c b/secure/lib/libcrypt/crypt-blowfish.c index acd9057b6e85..eb2d3456b25c 100644 --- a/secure/lib/libcrypt/crypt-blowfish.c +++ b/secure/lib/libcrypt/crypt-blowfish.c @@ -75,8 +75,6 @@ __FBSDID("$FreeBSD$"); static void encode_base64(u_int8_t *, u_int8_t *, u_int16_t); static void decode_base64(u_int8_t *, u_int16_t, const u_int8_t *); -static char encrypted[_PASSWORD_LEN]; - const static u_int8_t Base64Code[] = "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; @@ -135,8 +133,8 @@ decode_base64(u_int8_t *buffer, u_int16_t len, const u_int8_t *data) /* We handle $Vers$log2(NumRounds)$salt+passwd$ i.e. $2$04$iwouldntknowwhattosayetKdJ6iFtacBqJdKe6aW7ou */ -char * -crypt_blowfish(const char *key, const char *salt) +int +crypt_blowfish(const char *key, const char *salt, char *buffer) { blf_ctx state; u_int32_t rounds, i, k; @@ -157,10 +155,8 @@ crypt_blowfish(const char *key, const char *salt) /* Discard "$" identifier */ salt++; - if (*salt > BCRYPT_VERSION) { - /* How do I handle errors ? Return NULL */ - return NULL; - } + if (*salt > BCRYPT_VERSION) + return (-1); /* Check for minor versions */ if (salt[1] != '$') { @@ -174,7 +170,7 @@ crypt_blowfish(const char *key, const char *salt) salt++; break; default: - return NULL; + return (-1); } } else minr = 0; @@ -184,15 +180,15 @@ crypt_blowfish(const char *key, const char *salt) if (salt[2] != '$') /* Out of sync with passwd entry */ - return NULL; + return (-1); memcpy(arounds, salt, sizeof(arounds)); if (arounds[sizeof(arounds) - 1] != '$') - return NULL; + return (-1); arounds[sizeof(arounds) - 1] = 0; logr = strtonum(arounds, BCRYPT_MINLOGROUNDS, 31, NULL); if (logr == 0) - return NULL; + return (-1); /* Computer power doesn't increase linearly, 2^x should be fine */ rounds = 1U << logr; @@ -201,7 +197,7 @@ crypt_blowfish(const char *key, const char *salt) } if (strlen(salt) * 3 / 4 < BCRYPT_MAXSALT) - return NULL; + return (-1); /* We dont want the base64 salt but the raw data */ decode_base64(csalt, BCRYPT_MAXSALT, (const u_int8_t *) salt); @@ -248,23 +244,23 @@ crypt_blowfish(const char *key, const char *salt) } - i = 0; - encrypted[i++] = '$'; - encrypted[i++] = BCRYPT_VERSION; + *buffer++ = '$'; + *buffer++ = BCRYPT_VERSION; if (minr) - encrypted[i++] = minr; - encrypted[i++] = '$'; + *buffer++ = minr; + *buffer++ = '$'; - snprintf(encrypted + i, 4, "%2.2u$", logr); + snprintf(buffer, 4, "%2.2u$", logr); + buffer += 3; - encode_base64((u_int8_t *) encrypted + i + 3, csalt, BCRYPT_MAXSALT); - encode_base64((u_int8_t *) encrypted + strlen(encrypted), ciphertext, - 4 * BCRYPT_BLOCKS - 1); + encode_base64((u_int8_t *)buffer, csalt, BCRYPT_MAXSALT); + buffer += strlen(buffer); + encode_base64((u_int8_t *)buffer, ciphertext, 4 * BCRYPT_BLOCKS - 1); memset(&state, 0, sizeof(state)); memset(ciphertext, 0, sizeof(ciphertext)); memset(csalt, 0, sizeof(csalt)); memset(cdata, 0, sizeof(cdata)); - return encrypted; + return (0); } static void diff --git a/secure/lib/libcrypt/crypt-des.c b/secure/lib/libcrypt/crypt-des.c index 6bb9bc03c76f..4601e46fe31b 100644 --- a/secure/lib/libcrypt/crypt-des.c +++ b/secure/lib/libcrypt/crypt-des.c @@ -588,13 +588,12 @@ des_cipher(const char *in, char *out, u_long salt, int count) return(retval); } -char * -crypt_des(const char *key, const char *setting) +int +crypt_des(const char *key, const char *setting, char *buffer) { int i; u_int32_t count, salt, l, r0, r1, keybuf[2]; - u_char *p, *q; - static char output[21]; + u_char *q; if (!des_initialised) des_init(); @@ -610,7 +609,7 @@ crypt_des(const char *key, const char *setting) key++; } if (des_setkey((char *)keybuf)) - return(NULL); + return (-1); if (*setting == _PASSWORD_EFMT1) { /* @@ -629,7 +628,7 @@ crypt_des(const char *key, const char *setting) * Encrypt the key with itself. */ if (des_cipher((char *)keybuf, (char *)keybuf, 0L, 1)) - return(NULL); + return (-1); /* * And XOR with the next 8 characters of the key. */ @@ -638,19 +637,9 @@ crypt_des(const char *key, const char *setting) *q++ ^= *key++ << 1; if (des_setkey((char *)keybuf)) - return(NULL); + return (-1); } - strncpy(output, setting, 9); - - /* - * Double check that we weren't given a short setting. - * If we were, the above code will probably have created - * wierd values for count and salt, but we don't really care. - * Just make sure the output string doesn't have an extra - * NUL in it. - */ - output[9] = '\0'; - p = (u_char *)output + strlen(output); + buffer = stpncpy(buffer, setting, 9); } else { /* * "old"-style: @@ -662,43 +651,41 @@ crypt_des(const char *key, const char *setting) salt = (ascii_to_bin(setting[1]) << 6) | ascii_to_bin(setting[0]); - output[0] = setting[0]; + *buffer++ = setting[0]; /* * If the encrypted password that the salt was extracted from * is only 1 character long, the salt will be corrupted. We * need to ensure that the output string doesn't have an extra * NUL in it! */ - output[1] = setting[1] ? setting[1] : output[0]; - - p = (u_char *)output + 2; + *buffer++ = setting[1] ? setting[1] : setting[0]; } setup_salt(salt); /* * Do it. */ if (do_des(0L, 0L, &r0, &r1, (int)count)) - return(NULL); + return (-1); /* * Now encode the result... */ l = (r0 >> 8); - *p++ = ascii64[(l >> 18) & 0x3f]; - *p++ = ascii64[(l >> 12) & 0x3f]; - *p++ = ascii64[(l >> 6) & 0x3f]; - *p++ = ascii64[l & 0x3f]; + *buffer++ = ascii64[(l >> 18) & 0x3f]; + *buffer++ = ascii64[(l >> 12) & 0x3f]; + *buffer++ = ascii64[(l >> 6) & 0x3f]; + *buffer++ = ascii64[l & 0x3f]; l = (r0 << 16) | ((r1 >> 16) & 0xffff); - *p++ = ascii64[(l >> 18) & 0x3f]; - *p++ = ascii64[(l >> 12) & 0x3f]; - *p++ = ascii64[(l >> 6) & 0x3f]; - *p++ = ascii64[l & 0x3f]; + *buffer++ = ascii64[(l >> 18) & 0x3f]; + *buffer++ = ascii64[(l >> 12) & 0x3f]; + *buffer++ = ascii64[(l >> 6) & 0x3f]; + *buffer++ = ascii64[l & 0x3f]; l = r1 << 2; - *p++ = ascii64[(l >> 12) & 0x3f]; - *p++ = ascii64[(l >> 6) & 0x3f]; - *p++ = ascii64[l & 0x3f]; - *p = 0; + *buffer++ = ascii64[(l >> 12) & 0x3f]; + *buffer++ = ascii64[(l >> 6) & 0x3f]; + *buffer++ = ascii64[l & 0x3f]; + *buffer = '\0'; - return(output); + return (0); } diff --git a/share/misc/committers-src.dot b/share/misc/committers-src.dot index ef0e0c955512..bab00b0a4852 100644 --- a/share/misc/committers-src.dot +++ b/share/misc/committers-src.dot @@ -312,6 +312,7 @@ theraven [label="David Chisnall\ntheraven@FreeBSD.org\n2011/11/11"] thompsa [label="Andrew Thompson\nthompsa@FreeBSD.org\n2005/05/25"] ticso [label="Bernd Walter\nticso@FreeBSD.org\n2002/01/31"] tijl [label="Tijl Coosemans\ntijl@FreeBSD.org\n2010/07/16"] +tsoome [label="Toomas Soome\ntsoome@FreeBSD.org\n2016/08/10"] trasz [label="Edward Tomasz Napierala\ntrasz@FreeBSD.org\n2008/08/22"] trhodes [label="Tom Rhodes\ntrhodes@FreeBSD.org\n2002/05/28"] trociny [label="Mikolaj Golub\ntrociny@FreeBSD.org\n2011/03/10"] @@ -363,6 +364,8 @@ adrian -> sgalabov ae -> melifaro +allanjude -> tsoome + alc -> davide andre -> qingli @@ -520,6 +523,7 @@ imp -> sanpei imp -> shiba imp -> takawata imp -> toshi +imp -> tsoome imp -> uch jake -> bms diff --git a/share/mk/bsd.README b/share/mk/bsd.README index fb5948e708b4..210e6008246c 100644 --- a/share/mk/bsd.README +++ b/share/mk/bsd.README @@ -331,6 +331,7 @@ PROGS_CXX PROG and PROGS_CXX in one Makefile. To define - DEBUG_FLAGS - DPADD - DPSRCS + - INTERNALPROG (no installation) - LDADD - LDFLAGS - LIBADD diff --git a/share/mk/bsd.progs.mk b/share/mk/bsd.progs.mk index d26ba76f85e3..3254bdadc061 100644 --- a/share/mk/bsd.progs.mk +++ b/share/mk/bsd.progs.mk @@ -24,8 +24,8 @@ PROGS += ${PROGS_CXX} # just one of many PROG_OVERRIDE_VARS += BINDIR BINGRP BINOWN BINMODE DPSRCS MAN NO_WERROR \ PROGNAME SRCS STRIP WARNS -PROG_VARS += CFLAGS CXXFLAGS DEBUG_FLAGS DPADD LDADD LIBADD LINKS \ - LDFLAGS MLINKS ${PROG_OVERRIDE_VARS} +PROG_VARS += CFLAGS CXXFLAGS DEBUG_FLAGS DPADD INTERNALPROG LDADD LIBADD \ + LINKS LDFLAGS MLINKS ${PROG_OVERRIDE_VARS} .for v in ${PROG_VARS:O:u} .if empty(${PROG_OVERRIDE_VARS:M$v}) .if defined(${v}.${PROG}) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 52adb660c3d2..f87d3b5c599b 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -561,9 +561,9 @@ pmap_delayed_invl_wait(vm_page_t m) * block to complete before proceeding. * * The function works by setting the DI generation number for m's PV - * list to at least * the number for the current thread. This forces - * a caller to pmap_delayed_invl_wait() to spin until current thread - * calls pmap_delayed_invl_finished(). + * list to at least the DI generation number of the current thread. + * This forces a caller of pmap_delayed_invl_wait() to block until + * current thread calls pmap_delayed_invl_finished(). */ static void pmap_delayed_invl_page(vm_page_t m) diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 1b85b3298daa..04c5dcc0eacd 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -443,8 +443,8 @@ trap(struct trapframe *frame) goto out; case T_DNA: - KASSERT(!PCB_USER_FPU(td->td_pcb), - ("Unregistered use of FPU in kernel")); + if (PCB_USER_FPU(td->td_pcb)) + panic("Unregistered use of FPU in kernel"); fpudna(); goto out; diff --git a/sys/amd64/cloudabi64/cloudabi64_sysvec.c b/sys/amd64/cloudabi64/cloudabi64_sysvec.c index 08d85a2d5cb7..cefdef983238 100644 --- a/sys/amd64/cloudabi64/cloudabi64_sysvec.c +++ b/sys/amd64/cloudabi64/cloudabi64_sysvec.c @@ -196,7 +196,6 @@ static struct sysentvec cloudabi64_elf_sysvec = { .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, - .sv_usrstack = USRSTACK, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = cloudabi64_copyout_strings, .sv_setregs = cloudabi64_proc_setregs, diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index a451cb176e3a..d9ea4e329766 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -229,6 +229,13 @@ CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); #define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) extern pt_entry_t pagetable_dmap[]; +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); + +static int superpages_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, + "Are large page mappings enabled?"); + /* * Data for the pv entry allocation mechanism */ @@ -243,6 +250,13 @@ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); + +static int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode); +static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); +static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); +static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, + vm_offset_t va, struct rwlock **lockp); +static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, @@ -422,6 +436,13 @@ pmap_pte(pmap_t pmap, vm_offset_t va, int *level) return (l3); } +static inline bool +pmap_superpages_enabled(void) +{ + + return (superpages_enabled != 0); +} + bool pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, pd_entry_t **l2, pt_entry_t **l3) @@ -836,6 +857,11 @@ pmap_init(void) { int i; + /* + * Are large page mappings enabled? + */ + TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); + /* * Initialize the pv chunk list mutex. */ @@ -1574,7 +1600,6 @@ pmap_release(pmap_t pmap) vm_page_free_zero(m); } -#if 0 static int kvm_size(SYSCTL_HANDLER_ARGS) { @@ -1594,7 +1619,6 @@ kvm_free(SYSCTL_HANDLER_ARGS) } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); -#endif /* 0 */ /* * grow the number of kernel page table entries, if needed @@ -2002,6 +2026,15 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) l3_paddr = pmap_load(l2); + if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { + KASSERT((l3_paddr & ATTR_SW_MANAGED) == 0, + ("%s: TODO: Demote managed pages", __func__)); + if (pmap_demote_l2_locked(pmap, l2, sva & ~L2_OFFSET, + &lock) == NULL) + continue; + l3_paddr = pmap_load(l2); + } + /* * Weed out invalid mappings. */ @@ -2195,6 +2228,99 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) pmap_invalidate_all(pmap); } +/* + * Performs a break-before-make update of a pmap entry. This is needed when + * either promoting or demoting pages to ensure the TLB doesn't get into an + * inconsistent state. + */ +static void +pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, + vm_offset_t va) +{ + register_t intr; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * Ensure we don't get switched out with the page table in an + * inconsistent state. We also need to ensure no interrupts fire + * as they may make use of an address we are about to invalidate. + */ + intr = intr_disable(); + critical_enter(); + + /* Clear the old mapping */ + pmap_load_clear(pte); + PTE_SYNC(pte); + pmap_invalidate_page(pmap, va); + + /* Create the new mapping */ + pmap_load_store(pte, newpte); + PTE_SYNC(pte); + + critical_exit(); + intr_restore(intr); +} + +/* + * Tries to promote the 512, contiguous 4KB page mappings that are within a + * single level 2 table entry to a single 2MB page mapping. For promotion + * to occur, two conditions must be met: (1) the 4KB page mappings must map + * aligned, contiguous physical memory and (2) the 4KB page mappings must have + * identical characteristics. + */ +static void +pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + pt_entry_t *firstl3, *l3, newl2, oldl3, pa; + register_t intr; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + firstl3 = (pt_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); + newl2 = pmap_load(firstl3); + /* Ignore managed pages for now */ + if ((newl2 & ATTR_SW_MANAGED) != 0) + return; + + /* Check the alingment is valid */ + if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) + return; + + pa = newl2 + L2_SIZE - PAGE_SIZE; + for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { + oldl3 = pmap_load(l3); + if (oldl3 != pa) + return; + pa -= PAGE_SIZE; + } + + newl2 &= ~ATTR_DESCR_MASK; + newl2 |= L2_BLOCK; + + /* + * Ensure we don't get switched out with the page table in an + * inconsistent state. We also need to ensure no interrupts fire + * as they may make use of an address we are about to invalidate. + */ + intr = intr_disable(); + critical_enter(); + + /* Clear the old mapping */ + pmap_load_clear(l2); + PTE_SYNC(l2); + pmap_invalidate_range(pmap, rounddown2(va, L2_SIZE), + roundup2(va, L2_SIZE)); + + /* Create the new mapping */ + pmap_load_store(l2, newl2); + PTE_SYNC(l2); + + critical_exit(); + intr_restore(intr); +} + /* * Insert the given physical page (p) at * the specified virtual address (v) in the @@ -2214,7 +2340,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock *lock; pd_entry_t *pde; pt_entry_t new_l3, orig_l3; - pt_entry_t *l3; + pt_entry_t *l2, *l3; pv_entry_t pv; vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; vm_page_t mpte, om, l1_m, l2_m, l3_m; @@ -2241,6 +2367,20 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, lock = NULL; PMAP_LOCK(pmap); + pde = pmap_pde(pmap, va, &lvl); + if (pde != NULL && lvl == 1) { + l2 = pmap_l1_to_l2(pde, va); + if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && + (l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) { + if (va < VM_MAXUSER_ADDRESS) { + mpte = PHYS_TO_VM_PAGE( + pmap_load(l2) & ~ATTR_MASK); + mpte->wire_count++; + } + goto havel3; + } + } + if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); @@ -2322,6 +2462,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, l3 = pmap_l2_to_l3(pde, va); pmap_invalidate_page(pmap, va); } +havel3: om = NULL; orig_l3 = pmap_load(l3); @@ -2402,7 +2543,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if (orig_l3 != 0) { validate: orig_l3 = pmap_load_store(l3, new_l3); - PTE_SYNC(l3); opa = orig_l3 & ~ATTR_MASK; if (opa != pa) { @@ -2421,12 +2561,24 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, } } else { pmap_load_store(l3, new_l3); - PTE_SYNC(l3); } + + PTE_SYNC(l3); pmap_invalidate_page(pmap, va); + if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) cpu_icache_sync_range(va, PAGE_SIZE); + /* XXX: Not yet, not all demotions are handled */ +#if 0 + if ((mpte == NULL || mpte->wire_count == NL3PG) && + pmap_superpages_enabled() && (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) { + KASSERT(lvl == 2, ("Invalid pde level %d", lvl)); + pmap_promote_l2(pmap, pde, va, &lock); + } +#endif + if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); @@ -3342,14 +3494,271 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) m->md.pv_memattr = ma; /* - * ARM64TODO: Implement the below (from the amd64 pmap) * If "m" is a normal page, update its direct mapping. This update * can be relied upon to perform any cache operations that are * required for data coherence. */ if ((m->flags & PG_FICTITIOUS) == 0 && - PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) - panic("ARM64TODO: pmap_page_set_memattr"); + pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, + m->md.pv_memattr) != 0) + panic("memory attribute change on the direct map failed"); +} + +/* + * Changes the specified virtual address range's memory type to that given by + * the parameter "mode". The specified virtual address range must be + * completely contained within either the direct map or the kernel map. If + * the virtual address range is contained within the kernel map, then the + * memory type for each of the corresponding ranges of the direct map is also + * changed. (The corresponding ranges of the direct map are those ranges that + * map the same physical pages as the specified virtual address range.) These + * changes to the direct map are necessary because Intel describes the + * behavior of their processors as "undefined" if two or more mappings to the + * same physical page have different memory types. + * + * Returns zero if the change completed successfully, and either EINVAL or + * ENOMEM if the change failed. Specifically, EINVAL is returned if some part + * of the virtual address range was not mapped, and ENOMEM is returned if + * there was insufficient memory available to complete the change. In the + * latter case, the memory type may have been changed on some part of the + * virtual address range or the direct map. + */ +static int +pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) +{ + int error; + + PMAP_LOCK(kernel_pmap); + error = pmap_change_attr_locked(va, size, mode); + PMAP_UNLOCK(kernel_pmap); + return (error); +} + +static int +pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) +{ + vm_offset_t base, offset, tmpva; + pt_entry_t l3, *pte, *newpte; + int lvl; + + PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); + base = trunc_page(va); + offset = va & PAGE_MASK; + size = round_page(offset + size); + + if (!VIRT_IN_DMAP(base)) + return (EINVAL); + + for (tmpva = base; tmpva < base + size; ) { + pte = pmap_pte(kernel_pmap, va, &lvl); + if (pte == NULL) + return (EINVAL); + + if ((pmap_load(pte) & ATTR_IDX_MASK) == ATTR_IDX(mode)) { + /* + * We already have the correct attribute, + * ignore this entry. + */ + switch (lvl) { + default: + panic("Invalid DMAP table level: %d\n", lvl); + case 1: + tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE; + break; + case 2: + tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE; + break; + case 3: + tmpva += PAGE_SIZE; + break; + } + } else { + /* + * Split the entry to an level 3 table, then + * set the new attribute. + */ + switch (lvl) { + default: + panic("Invalid DMAP table level: %d\n", lvl); + case 1: + newpte = pmap_demote_l1(kernel_pmap, pte, + tmpva & ~L1_OFFSET); + if (newpte == NULL) + return (EINVAL); + pte = pmap_l1_to_l2(pte, tmpva); + case 2: + newpte = pmap_demote_l2(kernel_pmap, pte, + tmpva & ~L2_OFFSET); + if (newpte == NULL) + return (EINVAL); + pte = pmap_l2_to_l3(pte, tmpva); + case 3: + /* Update the entry */ + l3 = pmap_load(pte); + l3 &= ~ATTR_IDX_MASK; + l3 |= ATTR_IDX(mode); + + pmap_update_entry(kernel_pmap, pte, l3, tmpva); + + /* + * If moving to a non-cacheable entry flush + * the cache. + */ + if (mode == VM_MEMATTR_UNCACHEABLE) + cpu_dcache_wbinv_range(tmpva, L3_SIZE); + + break; + } + tmpva += PAGE_SIZE; + } + } + + return (0); +} + +/* + * Create an L2 table to map all addresses within an L1 mapping. + */ +static pt_entry_t * +pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) +{ + pt_entry_t *l2, newl2, oldl1; + vm_offset_t tmpl1; + vm_paddr_t l2phys, phys; + vm_page_t ml2; + int i; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + oldl1 = pmap_load(l1); + KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK, + ("pmap_demote_l1: Demoting a non-block entry")); + KASSERT((va & L1_OFFSET) == 0, + ("pmap_demote_l1: Invalid virtual address %#lx", va)); + + tmpl1 = 0; + if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) { + tmpl1 = kva_alloc(PAGE_SIZE); + if (tmpl1 == 0) + return (NULL); + } + + if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { + CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx" + " in pmap %p", va, pmap); + return (NULL); + } + + l2phys = VM_PAGE_TO_PHYS(ml2); + l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys); + + /* Address the range points at */ + phys = oldl1 & ~ATTR_MASK; + /* The attributed from the old l1 table to be copied */ + newl2 = oldl1 & ATTR_MASK; + + /* Create the new entries */ + for (i = 0; i < Ln_ENTRIES; i++) { + l2[i] = newl2 | phys; + phys += L2_SIZE; + } + cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); + + if (tmpl1 != 0) { + pmap_kenter(tmpl1, PAGE_SIZE, + DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, CACHED_MEMORY); + l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK)); + } + + pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va); + + if (tmpl1 != 0) { + pmap_kremove(tmpl1); + kva_free(tmpl1, PAGE_SIZE); + } + + return (l2); +} + +/* + * Create an L3 table to map all addresses within an L2 mapping. + */ +static pt_entry_t * +pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + pt_entry_t *l3, newl3, oldl2; + vm_offset_t tmpl2; + vm_paddr_t l3phys, phys; + vm_page_t ml3; + int i; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + oldl2 = pmap_load(l2); + KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_demote_l2: Demoting a non-block entry")); + KASSERT((va & L2_OFFSET) == 0, + ("pmap_demote_l2: Invalid virtual address %#lx", va)); + KASSERT((oldl2 & ATTR_SW_MANAGED) == 0, + ("pmap_demote_l2: TODO: Demote managed pages")); + + tmpl2 = 0; + if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) { + tmpl2 = kva_alloc(PAGE_SIZE); + if (tmpl2 == 0) + return (NULL); + } + + if ((ml3 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { + CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" + " in pmap %p", va, pmap); + return (NULL); + } + + l3phys = VM_PAGE_TO_PHYS(ml3); + l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys); + + /* Address the range points at */ + phys = oldl2 & ~ATTR_MASK; + /* The attributed from the old l2 table to be copied */ + newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; + + /* Create the new entries */ + for (i = 0; i < Ln_ENTRIES; i++) { + l3[i] = newl3 | phys; + phys += L3_SIZE; + } + cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE); + + if (tmpl2 != 0) { + pmap_kenter(tmpl2, PAGE_SIZE, + DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY); + l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK)); + } + + pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va); + + if (tmpl2 != 0) { + pmap_kremove(tmpl2); + kva_free(tmpl2, PAGE_SIZE); + } + + return (l3); + +} + +static pt_entry_t * +pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) +{ + struct rwlock *lock; + pt_entry_t *l3; + + lock = NULL; + l3 = pmap_demote_l2_locked(pmap, l2, va, &lock); + if (lock != NULL) + rw_wunlock(lock); + return (l3); } /* @@ -3482,6 +3891,53 @@ pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) } } +int +pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) +{ +#ifdef SMP + uint64_t par; +#endif + + switch (ESR_ELx_EXCEPTION(esr)) { + case EXCP_DATA_ABORT_L: + case EXCP_DATA_ABORT: + break; + default: + return (KERN_FAILURE); + } + +#ifdef SMP + PMAP_LOCK(pmap); + switch (esr & ISS_DATA_DFSC_MASK) { + case ISS_DATA_DFSC_TF_L0: + case ISS_DATA_DFSC_TF_L1: + case ISS_DATA_DFSC_TF_L2: + case ISS_DATA_DFSC_TF_L3: + /* Ask the MMU to check the address */ + if (pmap == kernel_pmap) + par = arm64_address_translate_s1e1r(far); + else + par = arm64_address_translate_s1e0r(far); + + /* + * If the translation was successful the address was invalid + * due to a break-before-make sequence. We can unlock and + * return success to the trap handler. + */ + if (PAR_SUCCESS(par)) { + PMAP_UNLOCK(pmap); + return (KERN_SUCCESS); + } + break; + default: + break; + } + PMAP_UNLOCK(pmap); +#endif + + return (KERN_FAILURE); +} + /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. @@ -3490,6 +3946,20 @@ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { + vm_offset_t superpage_offset; + + if (size < L2_SIZE) + return; + if (object != NULL && (object->flags & OBJ_COLORED) != 0) + offset += ptoa(object->pg_color); + superpage_offset = offset & L2_OFFSET; + if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || + (*addr & L2_OFFSET) == superpage_offset) + return; + if ((*addr & L2_OFFSET) < superpage_offset) + *addr = (*addr & ~L2_OFFSET) + superpage_offset; + else + *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; } /** diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c index ea932edca8f6..d9367dfcf742 100644 --- a/sys/arm64/arm64/trap.c +++ b/sys/arm64/arm64/trap.c @@ -179,16 +179,6 @@ data_abort(struct trapframe *frame, uint64_t esr, uint64_t far, int lower) return; } - KASSERT(td->td_md.md_spinlock_count == 0, - ("data abort with spinlock held")); - if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | - WARN_GIANTOK, NULL, "Kernel page fault") != 0) { - print_registers(frame); - printf(" far: %16lx\n", far); - printf(" esr: %.8lx\n", esr); - panic("data abort in critical section or under mutex"); - } - p = td->td_proc; if (lower) map = &p->p_vmspace->vm_map; @@ -200,6 +190,19 @@ data_abort(struct trapframe *frame, uint64_t esr, uint64_t far, int lower) map = &p->p_vmspace->vm_map; } + if (pmap_fault(map->pmap, esr, far) == KERN_SUCCESS) + return; + + KASSERT(td->td_md.md_spinlock_count == 0, + ("data abort with spinlock held")); + if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | + WARN_GIANTOK, NULL, "Kernel page fault") != 0) { + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + panic("data abort in critical section or under mutex"); + } + va = trunc_page(far); ftype = ((esr >> 6) & 1) ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ; diff --git a/sys/arm64/arm64/vm_machdep.c b/sys/arm64/arm64/vm_machdep.c index 1de942487f47..1f8466d29d96 100644 --- a/sys/arm64/arm64/vm_machdep.c +++ b/sys/arm64/arm64/vm_machdep.c @@ -201,6 +201,8 @@ cpu_set_user_tls(struct thread *td, void *tls_base) pcb = td->td_pcb; pcb->pcb_tpidr_el0 = (register_t)tls_base; + if (td == curthread) + WRITE_SPECIALREG(tpidr_el0, tls_base); return (0); } diff --git a/sys/arm64/cloudabi64/cloudabi64_sysvec.c b/sys/arm64/cloudabi64/cloudabi64_sysvec.c index cf3e594244bb..ebcb54e1a867 100644 --- a/sys/arm64/cloudabi64/cloudabi64_sysvec.c +++ b/sys/arm64/cloudabi64/cloudabi64_sysvec.c @@ -165,7 +165,6 @@ static struct sysentvec cloudabi64_elf_sysvec = { .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, - .sv_usrstack = USRSTACK, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_strings = cloudabi64_copyout_strings, .sv_setregs = cloudabi64_proc_setregs, diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h index 578eb46988f1..1ea61f6fe1cb 100644 --- a/sys/arm64/include/pmap.h +++ b/sys/arm64/include/pmap.h @@ -151,6 +151,8 @@ void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); bool pmap_get_tables(pmap_t, vm_offset_t, pd_entry_t **, pd_entry_t **, pd_entry_t **, pt_entry_t **); +int pmap_fault(pmap_t, uint64_t, uint64_t); + #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) #endif /* _KERNEL */ diff --git a/sys/boot/efi/loader/Makefile b/sys/boot/efi/loader/Makefile index 6b1b4acb342b..2f54032cd059 100644 --- a/sys/boot/efi/loader/Makefile +++ b/sys/boot/efi/loader/Makefile @@ -63,6 +63,18 @@ CFLAGS+= -DNO_PCI -DEFI LIBSTAND= ${.OBJDIR}/../../../../lib/libstand/libstand.a .endif +.if !defined(BOOT_HIDE_SERIAL_NUMBERS) +# Export serial numbers, UUID, and asset tag from loader. +CFLAGS+= -DSMBIOS_SERIAL_NUMBERS +.if defined(BOOT_LITTLE_ENDIAN_UUID) +# Use little-endian UUID format as defined in SMBIOS 2.6. +CFLAGS+= -DSMBIOS_LITTLE_ENDIAN_UUID +.elif defined(BOOT_NETWORK_ENDIAN_UUID) +# Use network-endian UUID format for backward compatibility. +CFLAGS+= -DSMBIOS_NETWORK_ENDIAN_UUID +.endif +.endif + .if ${MK_FORTH} != "no" BOOT_FORTH= yes CFLAGS+= -DBOOT_FORTH diff --git a/sys/boot/fdt/dts/arm/pcduino3b.dts b/sys/boot/fdt/dts/arm/pcduino3.dts similarity index 100% rename from sys/boot/fdt/dts/arm/pcduino3b.dts rename to sys/boot/fdt/dts/arm/pcduino3.dts diff --git a/sys/boot/fdt/dts/riscv/qemu.dts b/sys/boot/fdt/dts/riscv/qemu.dts index 067e18fcea8c..8da1ae5b663f 100644 --- a/sys/boot/fdt/dts/riscv/qemu.dts +++ b/sys/boot/fdt/dts/riscv/qemu.dts @@ -72,15 +72,11 @@ clock-frequency = < 400000000 >; }; - htif0: htif@0 { - compatible = "riscv,htif"; - interrupts = < 0 >; + console0: console@0 { + compatible = "riscv,console"; + status = "okay"; + interrupts = < 1 >; interrupt-parent = < &pic0 >; - - console0: console@0 { - compatible = "htif,console"; - status = "okay"; - }; }; }; diff --git a/sys/boot/fdt/dts/riscv/rocket.dts b/sys/boot/fdt/dts/riscv/rocket.dts index b8c139fd67f7..8ce718d8295f 100644 --- a/sys/boot/fdt/dts/riscv/rocket.dts +++ b/sys/boot/fdt/dts/riscv/rocket.dts @@ -83,15 +83,11 @@ clock-frequency = < 1000000 >; }; - htif0: htif@0 { - compatible = "riscv,htif"; - interrupts = < 0 >; + console0: console@0 { + compatible = "riscv,console"; + status = "okay"; + interrupts = < 1 >; interrupt-parent = < &pic0 >; - - console0: console@0 { - compatible = "htif,console"; - status = "okay"; - }; }; }; diff --git a/sys/boot/fdt/dts/riscv/spike.dts b/sys/boot/fdt/dts/riscv/spike.dts index 27d68f034c3b..dfe27f349351 100644 --- a/sys/boot/fdt/dts/riscv/spike.dts +++ b/sys/boot/fdt/dts/riscv/spike.dts @@ -65,6 +65,10 @@ }; memory { + /* + * This is not used currently. + * We take information from sbi_query_memory. + */ device_type = "memory"; reg = <0x80000000 0x40000000>; /* 1GB at 0x80000000 */ }; @@ -90,15 +94,11 @@ clock-frequency = < 1000000 >; }; - htif0: htif@0 { - compatible = "riscv,htif"; + console0: console@0 { + compatible = "riscv,console"; + status = "okay"; interrupts = < 1 >; interrupt-parent = < &pic0 >; - - console0: console@0 { - compatible = "htif,console"; - status = "okay"; - }; }; }; diff --git a/sys/boot/i386/btx/btxldr/btxldr.S b/sys/boot/i386/btx/btxldr/btxldr.S index 848b930b7907..c3e544e3a6fb 100644 --- a/sys/boot/i386/btx/btxldr/btxldr.S +++ b/sys/boot/i386/btx/btxldr/btxldr.S @@ -382,12 +382,12 @@ e_fmt: .asciz "Error: Client format not supported\n" #ifdef BTXLDR_VERBOSE m_mem: .asciz "Starting in protected mode (base mem=\0)\n" m_esp: .asciz "Arguments passed (esp=\0):\n" -m_args: .asciz"\n" +m_args: .asciz "\n" m_rel_bi: .asciz "Relocated bootinfo (size=48) to \0\n" m_rel_args: .asciz "Relocated arguments (size=18) to \0\n" m_rel_btx: .asciz "Relocated kernel (size=\0) to \0\n" diff --git a/sys/boot/i386/libi386/smbios.c b/sys/boot/i386/libi386/smbios.c index 7a7ce4ba4f56..4a8f3bc4ef08 100644 --- a/sys/boot/i386/libi386/smbios.c +++ b/sys/boot/i386/libi386/smbios.c @@ -238,6 +238,10 @@ smbios_parse_table(const caddr_t addr) smbios_setenv("smbios.system.serial", addr, 0x07); smbios_setuuid("smbios.system.uuid", addr + 0x08, smbios.ver); #endif + if (smbios.major >= 2 && smbios.minor >= 4) { + smbios_setenv("smbios.system.sku", addr, 0x19); + smbios_setenv("smbios.system.family", addr, 0x1a); + } break; case 2: /* 3.3.3 Base Board (or Module) Information (Type 2) */ @@ -246,7 +250,9 @@ smbios_parse_table(const caddr_t addr) smbios_setenv("smbios.planar.version", addr, 0x06); #ifdef SMBIOS_SERIAL_NUMBERS smbios_setenv("smbios.planar.serial", addr, 0x07); + smbios_setenv("smbios.planar.tag", addr, 0x08); #endif + smbios_setenv("smbios.planar.location", addr, 0x0a); break; case 3: /* 3.3.4 System Enclosure or Chassis (Type 3) */ diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index b869467d5288..8bf3ed0241a1 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -814,6 +814,14 @@ static struct da_quirk_entry da_quirk_table[] = {T_DIRECT, SIP_MEDIA_REMOVABLE, "JetFlash", "Transcend*", "*"}, /*quirks*/ DA_Q_NO_RC16 }, + { + /* + * I-O Data USB Flash Disk + * PR: usb/211716 + */ + {T_DIRECT, SIP_MEDIA_REMOVABLE, "I-O DATA", "USB Flash Disk*", + "*"}, /*quirks*/ DA_Q_NO_RC16 + }, /* ATA/SATA devices over SAS/USB/... */ { /* Hitachi Advanced Format (4k) drives */ diff --git a/sys/compat/cloudabi/cloudabi_util.h b/sys/compat/cloudabi/cloudabi_util.h index c0a02aa88dcb..6eb65aa87b32 100644 --- a/sys/compat/cloudabi/cloudabi_util.h +++ b/sys/compat/cloudabi/cloudabi_util.h @@ -33,6 +33,7 @@ #include struct file; +struct sysentvec; struct thread; struct timespec; @@ -76,4 +77,8 @@ int cloudabi_futex_lock_wrlock(struct thread *, cloudabi_lock_t *, cloudabi_scope_t, cloudabi_clockid_t, cloudabi_timestamp_t, cloudabi_timestamp_t); +/* vDSO setup and teardown. */ +void cloudabi_vdso_init(struct sysentvec *, char *, char *); +void cloudabi_vdso_destroy(struct sysentvec *); + #endif diff --git a/sys/compat/cloudabi/cloudabi_vdso.c b/sys/compat/cloudabi/cloudabi_vdso.c new file mode 100644 index 000000000000..27c3d365af83 --- /dev/null +++ b/sys/compat/cloudabi/cloudabi_vdso.c @@ -0,0 +1,88 @@ +/*- + * Copyright (c) 2016 Nuxi, https://nuxi.nl/ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +void +cloudabi_vdso_init(struct sysentvec *sv, char *begin, char *end) +{ + vm_page_t m; + vm_object_t obj; + vm_offset_t addr; + size_t i, pages, pages_length, vdso_length; + + /* Determine the number of pages needed to store the vDSO. */ + vdso_length = end - begin; + pages = howmany(vdso_length, PAGE_SIZE); + pages_length = pages * PAGE_SIZE; + + /* Allocate a VM object and fill it with the vDSO. */ + obj = vm_pager_allocate(OBJT_PHYS, 0, pages_length, + VM_PROT_DEFAULT, 0, NULL); + addr = kva_alloc(PAGE_SIZE); + for (i = 0; i < pages; ++i) { + VM_OBJECT_WLOCK(obj); + m = vm_page_grab(obj, i, VM_ALLOC_NOBUSY | VM_ALLOC_ZERO); + m->valid = VM_PAGE_BITS_ALL; + VM_OBJECT_WUNLOCK(obj); + + pmap_qenter(addr, &m, 1); + memcpy((void *)addr, begin + i * PAGE_SIZE, + MIN(vdso_length - i * PAGE_SIZE, PAGE_SIZE)); + pmap_qremove(addr, 1); + } + kva_free(addr, PAGE_SIZE); + + /* + * Place the vDSO at the top of the address space. The user + * stack can start right below it. + */ + sv->sv_shared_page_base = sv->sv_maxuser - pages_length; + sv->sv_shared_page_len = pages_length; + sv->sv_shared_page_obj = obj; + sv->sv_usrstack = sv->sv_shared_page_base; +} + +void +cloudabi_vdso_destroy(struct sysentvec *sv) +{ + + vm_object_deallocate(sv->sv_shared_page_obj); +} diff --git a/sys/compat/cloudabi64/cloudabi64_module.c b/sys/compat/cloudabi64/cloudabi64_module.c index 246a887105cb..da1ea1149a4b 100644 --- a/sys/compat/cloudabi64/cloudabi64_module.c +++ b/sys/compat/cloudabi64/cloudabi64_module.c @@ -38,8 +38,13 @@ __FBSDID("$FreeBSD$"); #include +#include + #include +extern char _binary_cloudabi64_vdso_o_start[]; +extern char _binary_cloudabi64_vdso_o_end[]; + register_t * cloudabi64_copyout_strings(struct image_params *imgp) { @@ -107,6 +112,8 @@ cloudabi64_fixup(register_t **stack_base, struct image_params *imgp) PTR(CLOUDABI_AT_PHDR, args->phdr), VAL(CLOUDABI_AT_PHNUM, args->phnum), VAL(CLOUDABI_AT_TID, td->td_tid), + PTR(CLOUDABI_AT_SYSINFO_EHDR, + imgp->proc->p_sysent->sv_shared_page_base), #undef VAL #undef PTR { .a_type = CLOUDABI_AT_NULL }, @@ -127,6 +134,9 @@ cloudabi64_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: + cloudabi_vdso_init(cloudabi64_brand.sysvec, + _binary_cloudabi64_vdso_o_start, + _binary_cloudabi64_vdso_o_end); if (elf64_insert_brand_entry(&cloudabi64_brand) < 0) { printf("Failed to add CloudABI ELF brand handler\n"); return (EINVAL); @@ -139,6 +149,7 @@ cloudabi64_modevent(module_t mod, int type, void *data) printf("Failed to remove CloudABI ELF brand handler\n"); return (EINVAL); } + cloudabi_vdso_destroy(cloudabi64_brand.sysvec); return (0); default: return (EOPNOTSUPP); diff --git a/sys/compat/cloudabi64/cloudabi64_vdso.lds.s b/sys/compat/cloudabi64/cloudabi64_vdso.lds.s new file mode 100644 index 000000000000..29c94d3cdde8 --- /dev/null +++ b/sys/compat/cloudabi64/cloudabi64_vdso.lds.s @@ -0,0 +1,51 @@ +/* + * Linker script for 64-bit vDSO for CloudABI. + * Based on sys/amd64/linux/linux_vdso.lds.s + * + * $FreeBSD$ + */ + +SECTIONS +{ + . = . + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .altinstructions : { *(.altinstructions) } + .altinstr_replacement : { *(.altinstr_replacement) } + + . = ALIGN(0x100); + .text : { *(.test .text*) } :text =0x90909090 +} + +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} diff --git a/sys/conf/files b/sys/conf/files index 4f5fa71ee9b8..b75d3994a80d 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -284,6 +284,7 @@ compat/cloudabi/cloudabi_proc.c optional compat_cloudabi64 compat/cloudabi/cloudabi_random.c optional compat_cloudabi64 compat/cloudabi/cloudabi_sock.c optional compat_cloudabi64 compat/cloudabi/cloudabi_thread.c optional compat_cloudabi64 +compat/cloudabi/cloudabi_vdso.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_fd.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_module.c optional compat_cloudabi64 compat/cloudabi64/cloudabi64_poll.c optional compat_cloudabi64 @@ -3348,6 +3349,7 @@ kern/subr_disk.c standard kern/subr_eventhandler.c standard kern/subr_fattime.c standard kern/subr_firmware.c optional firmware +kern/subr_gtaskqueue.c standard kern/subr_hash.c standard kern/subr_hints.c standard kern/subr_kdb.c standard diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 331b811081f7..b9c6c67f9665 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -8,6 +8,18 @@ # dependency lines other than the first are silently ignored. # # +cloudabi64_vdso.o optional compat_cloudabi64 \ + dependency "$S/contrib/cloudabi/cloudabi_vdso_x86_64.c" \ + compile-with "${CC} -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi64/cloudabi64_vdso.lds.s -D_KERNEL -I. -I$S -I$S/contrib/cloudabi -O2 -fomit-frame-pointer $S/contrib/cloudabi/cloudabi_vdso_x86_64.c -o ${.TARGET}" \ + no-obj no-implicit-rule \ + clean "cloudabi64_vdso.o" +# +cloudabi64_vdso_blob.o optional compat_cloudabi64 \ + dependency "cloudabi64_vdso.o" \ + compile-with "${OBJCOPY} --input-target binary --output-target elf64-x86-64-freebsd --binary-architecture i386 cloudabi64_vdso.o ${.TARGET}" \ + no-implicit-rule \ + clean "cloudabi64_vdso_blob.o" +# linux32_genassym.o optional compat_linux32 \ dependency "$S/amd64/linux32/linux32_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ @@ -282,6 +294,7 @@ dev/hyperv/vmbus/vmbus_br.c optional hyperv dev/hyperv/vmbus/vmbus_chan.c optional hyperv dev/hyperv/vmbus/vmbus_et.c optional hyperv dev/hyperv/vmbus/vmbus_if.m optional hyperv +dev/hyperv/vmbus/vmbus_xact.c optional hyperv dev/hyperv/vmbus/amd64/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/amd64/vmbus_vector.S optional hyperv dev/nfe/if_nfe.c optional nfe pci diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index b4c82085ace0..31acaf9ce32f 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -1,4 +1,16 @@ # $FreeBSD$ +cloudabi64_vdso.o optional compat_cloudabi64 \ + dependency "$S/contrib/cloudabi/cloudabi_vdso_aarch64.c" \ + compile-with "${CC} -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi64/cloudabi64_vdso.lds.s -D_KERNEL -I. -I$S -I$S/contrib/cloudabi -O2 -fomit-frame-pointer $S/contrib/cloudabi/cloudabi_vdso_aarch64.c -o ${.TARGET}" \ + no-obj no-implicit-rule \ + clean "cloudabi64_vdso.o" +# +cloudabi64_vdso_blob.o optional compat_cloudabi64 \ + dependency "cloudabi64_vdso.o" \ + compile-with "${OBJCOPY} --input-target binary --output-target elf64-littleaarch64 --binary-architecture aarch64 cloudabi64_vdso.o ${.TARGET}" \ + no-implicit-rule \ + clean "cloudabi64_vdso_blob.o" +# arm/arm/generic_timer.c standard arm/arm/gic.c standard arm/arm/gic_fdt.c optional fdt diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 6ac3783a0fc1..516bf77f7446 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -253,6 +253,7 @@ dev/hyperv/vmbus/vmbus_br.c optional hyperv dev/hyperv/vmbus/vmbus_chan.c optional hyperv dev/hyperv/vmbus/vmbus_et.c optional hyperv dev/hyperv/vmbus/vmbus_if.m optional hyperv +dev/hyperv/vmbus/vmbus_xact.c optional hyperv dev/hyperv/vmbus/i386/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/i386/vmbus_vector.S optional hyperv dev/ichwd/ichwd.c optional ichwd diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv index fe30078f183f..e6908bed8ae2 100644 --- a/sys/conf/files.riscv +++ b/sys/conf/files.riscv @@ -19,9 +19,6 @@ libkern/flsl.c standard libkern/flsll.c standard libkern/memmove.c standard libkern/memset.c standard -riscv/htif/htif.c optional htif -riscv/htif/htif_block.c optional htif -riscv/htif/htif_console.c optional htif riscv/riscv/autoconf.c standard riscv/riscv/bcopy.c standard riscv/riscv/bus_machdep.c standard @@ -36,6 +33,7 @@ riscv/riscv/db_interface.c optional ddb riscv/riscv/db_trace.c optional ddb riscv/riscv/dump_machdep.c standard riscv/riscv/elf_machdep.c standard +riscv/riscv/exception.S standard riscv/riscv/intr_machdep.c standard riscv/riscv/in_cksum.c optional inet | inet6 riscv/riscv/identcpu.c standard @@ -47,6 +45,8 @@ riscv/riscv/mem.c standard riscv/riscv/nexus.c standard riscv/riscv/ofw_machdep.c optional fdt riscv/riscv/pmap.c standard +riscv/riscv/riscv_console.c optional rcons +riscv/riscv/sbi.S standard riscv/riscv/stack_machdep.c optional ddb | stack riscv/riscv/support.S standard riscv/riscv/swtch.S standard diff --git a/sys/conf/kern.post.mk b/sys/conf/kern.post.mk index 4cb60c002b62..32ec5b1b445c 100644 --- a/sys/conf/kern.post.mk +++ b/sys/conf/kern.post.mk @@ -65,6 +65,10 @@ OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ # Keep the related ports builds in the obj directory so that they are only rebuilt once per kernel build WRKDIRPREFIX?= ${MAKEOBJDIRPREFIX}${SRC_BASE}/sys/${KERNCONF} PORTSMODULESENV=\ + env \ + -u CC \ + -u CXX \ + -u CPP \ PATH=${PATH}:${LOCALBASE}/bin:${LOCALBASE}/sbin \ SRC_BASE=${SRC_BASE} \ OSVERSION=${OSRELDATE} \ diff --git a/sys/conf/ldscript.riscv b/sys/conf/ldscript.riscv index 152b97eca30d..5fe32aea1782 100644 --- a/sys/conf/ldscript.riscv +++ b/sys/conf/ldscript.riscv @@ -6,7 +6,7 @@ SEARCH_DIR(/usr/lib); SECTIONS { /* Read-only sections, merged into text segment: */ - . = kernbase + 0x80000000 /* KERNENTRY */; + . = kernbase; .text : AT(ADDR(.text) - kernbase) { *(.text) diff --git a/sys/dev/filemon/filemon.c b/sys/dev/filemon/filemon.c index 919af9d6dbe6..26e1bc38d071 100644 --- a/sys/dev/filemon/filemon.c +++ b/sys/dev/filemon/filemon.c @@ -137,6 +137,8 @@ filemon_proc_get(struct proc *p) { struct filemon *filemon; + if (p->p_filemon == NULL) + return (NULL); PROC_LOCK(p); filemon = filemon_acquire(p->p_filemon); PROC_UNLOCK(p); diff --git a/sys/dev/hyperv/include/vmbus.h b/sys/dev/hyperv/include/vmbus.h index 8fde5885fdff..0a16e6e8957d 100644 --- a/sys/dev/hyperv/include/vmbus.h +++ b/sys/dev/hyperv/include/vmbus.h @@ -89,6 +89,11 @@ struct vmbus_chanpkt_hdr { (const void *)((const uint8_t *)(pkt) + \ VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen)) +/* Include padding */ +#define VMBUS_CHANPKT_DATALEN(pkt) \ + (VMBUS_CHANPKT_GETLEN((pkt)->cph_tlen) -\ + VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen)) + struct vmbus_rxbuf_desc { uint32_t rb_len; uint32_t rb_ofs; diff --git a/sys/dev/hyperv/include/vmbus_xact.h b/sys/dev/hyperv/include/vmbus_xact.h new file mode 100644 index 000000000000..c2919aa8c1f2 --- /dev/null +++ b/sys/dev/hyperv/include/vmbus_xact.h @@ -0,0 +1,59 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VMBUS_XACT_H_ +#define _VMBUS_XACT_H_ + +#include +#include + +struct vmbus_xact; +struct vmbus_xact_ctx; + +struct vmbus_xact_ctx *vmbus_xact_ctx_create(bus_dma_tag_t dtag, + size_t req_size, size_t resp_size, + size_t priv_size); +void vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx); +struct vmbus_xact *vmbus_xact_get(struct vmbus_xact_ctx *ctx, + size_t req_len); +void vmbus_xact_put(struct vmbus_xact *xact); + +void *vmbus_xact_req_data(const struct vmbus_xact *xact); +bus_addr_t vmbus_xact_req_paddr(const struct vmbus_xact *xact); +void *vmbus_xact_priv(const struct vmbus_xact *xact, + size_t priv_len); +void vmbus_xact_activate(struct vmbus_xact *xact); +void vmbus_xact_deactivate(struct vmbus_xact *xact); +const void *vmbus_xact_wait(struct vmbus_xact *xact, + size_t *resp_len); +void vmbus_xact_wakeup(struct vmbus_xact *xact, + const void *data, size_t dlen); +void vmbus_xact_ctx_wakeup(struct vmbus_xact_ctx *ctx, + const void *data, size_t dlen); + +#endif /* !_VMBUS_XACT_H_ */ diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c index 77fbb50004a0..c1d1c487fa96 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -45,9 +45,11 @@ #include #include -#include "hv_net_vsc.h" -#include "hv_rndis.h" -#include "hv_rndis_filter.h" +#include +#include +#include +#include +#include MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper-V netvsc driver"); @@ -68,6 +70,15 @@ static void hv_nv_on_receive_completion(struct vmbus_channel *chan, static void hv_nv_on_receive(netvsc_dev *net_dev, struct hn_rx_ring *rxr, struct vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkt); +static void hn_nvs_sent_none(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, + const struct nvsp_msg_ *msg, int); +static void hn_nvs_sent_xact(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, + const struct nvsp_msg_ *msg, int dlen); + +static struct hn_send_ctx hn_send_ctx_none = + HN_SEND_CTX_INITIALIZER(hn_nvs_sent_none, NULL); /* * @@ -141,9 +152,14 @@ hv_nv_get_next_send_section(netvsc_dev *net_dev) static int hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) { + struct vmbus_xact *xact; + struct hn_nvs_rxbuf_conn *conn; + const struct hn_nvs_rxbuf_connresp *resp; + size_t resp_len; + struct hn_send_ctx sndc; netvsc_dev *net_dev; - nvsp_msg *init_pkt; - int ret = 0; + uint32_t status; + int error; net_dev = hv_nv_get_outbound_net_device(sc); if (!net_dev) { @@ -155,7 +171,7 @@ hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) BUS_DMA_WAITOK | BUS_DMA_ZERO); if (net_dev->rx_buf == NULL) { device_printf(sc->hn_dev, "allocate rxbuf failed\n"); - return ENOMEM; + return (ENOMEM); } /* @@ -165,73 +181,76 @@ hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) * Only primary channel has RXBUF connected to it. Sub-channels * just share this RXBUF. */ - ret = vmbus_chan_gpadl_connect(sc->hn_prichan, + error = vmbus_chan_gpadl_connect(sc->hn_prichan, net_dev->rxbuf_dma.hv_paddr, net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle); - if (ret != 0) { - device_printf(sc->hn_dev, "rxbuf gpadl connect failed: %d\n", - ret); + if (error) { + if_printf(sc->hn_ifp, "rxbuf gpadl connect failed: %d\n", + error); goto cleanup; } - - /* sema_wait(&ext->channel_init_sema); KYS CHECK */ - - /* Notify the NetVsp of the gpadl handle */ - init_pkt = &net_dev->channel_init_packet; - - memset(init_pkt, 0, sizeof(nvsp_msg)); - - init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf; - init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = - net_dev->rx_buf_gpadl_handle; - init_pkt->msgs.vers_1_msgs.send_rx_buf.id = - NETVSC_RECEIVE_BUFFER_ID; - - /* Send the gpadl notification request */ - - ret = vmbus_chan_send(sc->hn_prichan, - VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); - if (ret != 0) { - goto cleanup; - } - - sema_wait(&net_dev->channel_init_sema); - - /* Check the response */ - if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status - != nvsp_status_success) { - ret = EINVAL; - goto cleanup; - } - - net_dev->rx_section_count = - init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections; - - net_dev->rx_sections = malloc(net_dev->rx_section_count * - sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_WAITOK); - memcpy(net_dev->rx_sections, - init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections, - net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section)); - /* - * For first release, there should only be 1 section that represents - * the entire receive buffer + * Connect RXBUF to NVS. */ - if (net_dev->rx_section_count != 1 - || net_dev->rx_sections->offset != 0) { - ret = EINVAL; + + xact = vmbus_xact_get(sc->hn_xact, sizeof(*conn)); + if (xact == NULL) { + if_printf(sc->hn_ifp, "no xact for nvs rxbuf conn\n"); + error = ENXIO; goto cleanup; } - goto exit; + conn = vmbus_xact_req_data(xact); + conn->nvs_type = HN_NVS_TYPE_RXBUF_CONN; + conn->nvs_gpadl = net_dev->rx_buf_gpadl_handle; + conn->nvs_sig = HN_NVS_RXBUF_SIG; + + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_xact, xact); + vmbus_xact_activate(xact); + + error = vmbus_chan_send(sc->hn_prichan, + VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, + conn, sizeof(*conn), (uint64_t)(uintptr_t)&sndc); + if (error != 0) { + if_printf(sc->hn_ifp, "send nvs rxbuf conn failed: %d\n", + error); + vmbus_xact_deactivate(xact); + vmbus_xact_put(xact); + goto cleanup; + } + + resp = vmbus_xact_wait(xact, &resp_len); + if (resp_len < sizeof(*resp)) { + if_printf(sc->hn_ifp, "invalid rxbuf conn resp length %zu\n", + resp_len); + vmbus_xact_put(xact); + error = EINVAL; + goto cleanup; + } + if (resp->nvs_type != HN_NVS_TYPE_RXBUF_CONNRESP) { + if_printf(sc->hn_ifp, "not rxbuf conn resp, type %u\n", + resp->nvs_type); + vmbus_xact_put(xact); + error = EINVAL; + goto cleanup; + } + + status = resp->nvs_status; + vmbus_xact_put(xact); + + if (status != HN_NVS_STATUS_OK) { + if_printf(sc->hn_ifp, "rxbuf conn failed: %x\n", status); + error = EIO; + goto cleanup; + } + net_dev->rx_section_count = 1; + + return (0); cleanup: hv_nv_destroy_rx_buffer(net_dev); - -exit: - return (ret); + return (error); } /* @@ -240,9 +259,14 @@ hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) static int hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) { + struct hn_send_ctx sndc; + struct vmbus_xact *xact; + struct hn_nvs_chim_conn *chim; + const struct hn_nvs_chim_connresp *resp; + size_t resp_len; + uint32_t status, sectsz; netvsc_dev *net_dev; - nvsp_msg *init_pkt; - int ret = 0; + int error; net_dev = hv_nv_get_outbound_net_device(sc); if (!net_dev) { @@ -254,7 +278,7 @@ hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) BUS_DMA_WAITOK | BUS_DMA_ZERO); if (net_dev->send_buf == NULL) { device_printf(sc->hn_dev, "allocate chimney txbuf failed\n"); - return ENOMEM; + return (ENOMEM); } /* @@ -264,47 +288,77 @@ hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) * Only primary channel has chimney sending buffer connected to it. * Sub-channels just share this chimney sending buffer. */ - ret = vmbus_chan_gpadl_connect(sc->hn_prichan, + error = vmbus_chan_gpadl_connect(sc->hn_prichan, net_dev->txbuf_dma.hv_paddr, net_dev->send_buf_size, &net_dev->send_buf_gpadl_handle); - if (ret != 0) { - device_printf(sc->hn_dev, "chimney sending buffer gpadl " - "connect failed: %d\n", ret); + if (error) { + if_printf(sc->hn_ifp, "chimney sending buffer gpadl " + "connect failed: %d\n", error); goto cleanup; } - /* Notify the NetVsp of the gpadl handle */ + /* + * Connect chimney sending buffer to NVS + */ - init_pkt = &net_dev->channel_init_packet; + xact = vmbus_xact_get(sc->hn_xact, sizeof(*chim)); + if (xact == NULL) { + if_printf(sc->hn_ifp, "no xact for nvs chim conn\n"); + error = ENXIO; + goto cleanup; + } - memset(init_pkt, 0, sizeof(nvsp_msg)); + chim = vmbus_xact_req_data(xact); + chim->nvs_type = HN_NVS_TYPE_CHIM_CONN; + chim->nvs_gpadl = net_dev->send_buf_gpadl_handle; + chim->nvs_sig = HN_NVS_CHIM_SIG; - init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf; - init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = - net_dev->send_buf_gpadl_handle; - init_pkt->msgs.vers_1_msgs.send_rx_buf.id = - NETVSC_SEND_BUFFER_ID; + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_xact, xact); + vmbus_xact_activate(xact); - /* Send the gpadl notification request */ - - ret = vmbus_chan_send(sc->hn_prichan, + error = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)init_pkt); - if (ret != 0) { + chim, sizeof(*chim), (uint64_t)(uintptr_t)&sndc); + if (error) { + if_printf(sc->hn_ifp, "send nvs chim conn failed: %d\n", + error); + vmbus_xact_deactivate(xact); + vmbus_xact_put(xact); goto cleanup; } - sema_wait(&net_dev->channel_init_sema); - - /* Check the response */ - if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status - != nvsp_status_success) { - ret = EINVAL; + resp = vmbus_xact_wait(xact, &resp_len); + if (resp_len < sizeof(*resp)) { + if_printf(sc->hn_ifp, "invalid chim conn resp length %zu\n", + resp_len); + vmbus_xact_put(xact); + error = EINVAL; + goto cleanup; + } + if (resp->nvs_type != HN_NVS_TYPE_CHIM_CONNRESP) { + if_printf(sc->hn_ifp, "not chim conn resp, type %u\n", + resp->nvs_type); + vmbus_xact_put(xact); + error = EINVAL; goto cleanup; } - net_dev->send_section_size = - init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size; + status = resp->nvs_status; + sectsz = resp->nvs_sectsz; + vmbus_xact_put(xact); + + if (status != HN_NVS_STATUS_OK) { + if_printf(sc->hn_ifp, "chim conn failed: %x\n", status); + error = EIO; + goto cleanup; + } + if (sectsz == 0) { + if_printf(sc->hn_ifp, "zero chimney sending buffer " + "section size\n"); + return 0; + } + + net_dev->send_section_size = sectsz; net_dev->send_section_count = net_dev->send_buf_size / net_dev->send_section_size; net_dev->bitsmap_words = howmany(net_dev->send_section_count, @@ -313,13 +367,15 @@ hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) malloc(net_dev->bitsmap_words * sizeof(long), M_NETVSC, M_WAITOK | M_ZERO); - goto exit; + if (bootverbose) { + if_printf(sc->hn_ifp, "chimney sending buffer %u/%u\n", + net_dev->send_section_size, net_dev->send_section_count); + } + return 0; cleanup: hv_nv_destroy_send_buffer(net_dev); - -exit: - return (ret); + return (error); } /* @@ -328,35 +384,27 @@ hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev) { - nvsp_msg *revoke_pkt; int ret = 0; - /* - * If we got a section count, it means we received a - * send_rx_buf_complete msg - * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore, - * we need to send a revoke msg here - */ if (net_dev->rx_section_count) { - /* Send the revoke receive buffer */ - revoke_pkt = &net_dev->revoke_packet; - memset(revoke_pkt, 0, sizeof(nvsp_msg)); - - revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf; - revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id = - NETVSC_RECEIVE_BUFFER_ID; - - ret = vmbus_chan_send(net_dev->sc->hn_prichan, - VMBUS_CHANPKT_TYPE_INBAND, 0, revoke_pkt, sizeof(nvsp_msg), - (uint64_t)(uintptr_t)revoke_pkt); + struct hn_nvs_rxbuf_disconn disconn; /* - * If we failed here, we might as well return and have a leak - * rather than continue and a bugchk + * Disconnect RXBUF from NVS. */ + memset(&disconn, 0, sizeof(disconn)); + disconn.nvs_type = HN_NVS_TYPE_RXBUF_DISCONN; + disconn.nvs_sig = HN_NVS_RXBUF_SIG; + + ret = vmbus_chan_send(net_dev->sc->hn_prichan, + VMBUS_CHANPKT_TYPE_INBAND, 0, &disconn, sizeof(disconn), + (uint64_t)(uintptr_t)&hn_send_ctx_none); if (ret != 0) { + if_printf(net_dev->sc->hn_ifp, + "send rxbuf disconn failed: %d\n", ret); return (ret); } + net_dev->rx_section_count = 0; } /* Tear down the gpadl on the vsp end */ @@ -379,12 +427,6 @@ hv_nv_destroy_rx_buffer(netvsc_dev *net_dev) net_dev->rx_buf = NULL; } - if (net_dev->rx_sections) { - free(net_dev->rx_sections, M_NETVSC); - net_dev->rx_sections = NULL; - net_dev->rx_section_count = 0; - } - return (ret); } @@ -414,9 +456,8 @@ hv_nv_destroy_send_buffer(netvsc_dev *net_dev) NETVSC_SEND_BUFFER_ID; ret = vmbus_chan_send(net_dev->sc->hn_prichan, - VMBUS_CHANPKT_TYPE_INBAND, 0, - revoke_pkt, sizeof(nvsp_msg), - (uint64_t)(uintptr_t)revoke_pkt); + VMBUS_CHANPKT_TYPE_INBAND, 0, revoke_pkt, sizeof(nvsp_msg), + (uint64_t)(uintptr_t)&hn_send_ctx_none); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk @@ -454,43 +495,64 @@ hv_nv_destroy_send_buffer(netvsc_dev *net_dev) return (ret); } - -/* - * Attempt to negotiate the caller-specified NVSP version - * - * For NVSP v2, Server 2008 R2 does not set - * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers - * to the negotiated version, so we cannot rely on that. - */ static int hv_nv_negotiate_nvsp_protocol(struct hn_softc *sc, netvsc_dev *net_dev, - uint32_t nvsp_ver) + uint32_t nvs_ver) { - nvsp_msg *init_pkt; - int ret; + struct hn_send_ctx sndc; + struct vmbus_xact *xact; + struct hn_nvs_init *init; + const struct hn_nvs_init_resp *resp; + size_t resp_len; + uint32_t status; + int error; - init_pkt = &net_dev->channel_init_packet; - memset(init_pkt, 0, sizeof(nvsp_msg)); - init_pkt->hdr.msg_type = nvsp_msg_type_init; + xact = vmbus_xact_get(sc->hn_xact, sizeof(*init)); + if (xact == NULL) { + if_printf(sc->hn_ifp, "no xact for nvs init\n"); + return (ENXIO); + } - /* - * Specify parameter as the only acceptable protocol version - */ - init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver; - init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver; + init = vmbus_xact_req_data(xact); + init->nvs_type = HN_NVS_TYPE_INIT; + init->nvs_ver_min = nvs_ver; + init->nvs_ver_max = nvs_ver; - /* Send the init request */ - ret = vmbus_chan_send(sc->hn_prichan, + vmbus_xact_activate(xact); + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_xact, xact); + + error = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); - if (ret != 0) - return (-1); + init, sizeof(*init), (uint64_t)(uintptr_t)&sndc); + if (error) { + if_printf(sc->hn_ifp, "send nvs init failed: %d\n", error); + vmbus_xact_deactivate(xact); + vmbus_xact_put(xact); + return (error); + } - sema_wait(&net_dev->channel_init_sema); - - if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success) + resp = vmbus_xact_wait(xact, &resp_len); + if (resp_len < sizeof(*resp)) { + if_printf(sc->hn_ifp, "invalid init resp length %zu\n", + resp_len); + vmbus_xact_put(xact); return (EINVAL); + } + if (resp->nvs_type != HN_NVS_TYPE_INIT_RESP) { + if_printf(sc->hn_ifp, "not init resp, type %u\n", + resp->nvs_type); + vmbus_xact_put(xact); + return (EINVAL); + } + status = resp->nvs_status; + vmbus_xact_put(xact); + + if (status != HN_NVS_STATUS_OK) { + if_printf(sc->hn_ifp, "nvs init failed for ver 0x%x\n", + nvs_ver); + return (EINVAL); + } return (0); } @@ -502,33 +564,19 @@ hv_nv_negotiate_nvsp_protocol(struct hn_softc *sc, netvsc_dev *net_dev, static int hv_nv_send_ndis_config(struct hn_softc *sc, uint32_t mtu) { - netvsc_dev *net_dev; - nvsp_msg *init_pkt; - int ret; + struct hn_nvs_ndis_conf conf; + int error; - net_dev = hv_nv_get_outbound_net_device(sc); - if (!net_dev) - return (-ENODEV); + memset(&conf, 0, sizeof(conf)); + conf.nvs_type = HN_NVS_TYPE_NDIS_CONF; + conf.nvs_mtu = mtu; + conf.nvs_caps = HN_NVS_NDIS_CONF_VLAN; - /* - * Set up configuration packet, write MTU - * Indicate we are capable of handling VLAN tags - */ - init_pkt = &net_dev->channel_init_packet; - memset(init_pkt, 0, sizeof(nvsp_msg)); - init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config; - init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu; - init_pkt-> - msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q - = 1; - - /* Send the configuration packet */ - ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); - if (ret != 0) - return (-EINVAL); - - return (0); + error = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, + &conf, sizeof(conf), (uint64_t)(uintptr_t)&hn_send_ctx_none); + if (error) + if_printf(sc->hn_ifp, "send nvs ndis conf failed: %d\n", error); + return (error); } /* @@ -538,8 +586,6 @@ static int hv_nv_connect_to_vsp(struct hn_softc *sc) { netvsc_dev *net_dev; - nvsp_msg *init_pkt; - uint32_t ndis_version; uint32_t protocol_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, NVSP_PROTOCOL_VERSION_4, @@ -549,6 +595,7 @@ hv_nv_connect_to_vsp(struct hn_softc *sc) int ret = 0; device_t dev = sc->hn_dev; struct ifnet *ifp = sc->hn_ifp; + struct hn_nvs_ndis_init ndis; net_dev = hv_nv_get_outbound_net_device(sc); @@ -581,37 +628,23 @@ hv_nv_connect_to_vsp(struct hn_softc *sc) ret = hv_nv_send_ndis_config(sc, ifp->if_mtu); /* - * Send the NDIS version + * Initialize NDIS. */ - init_pkt = &net_dev->channel_init_packet; - memset(init_pkt, 0, sizeof(nvsp_msg)); - - if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_4) { - ndis_version = NDIS_VERSION_6_1; - } else { - ndis_version = NDIS_VERSION_6_30; - } - - init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers; - init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers = - (ndis_version & 0xFFFF0000) >> 16; - init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers = - ndis_version & 0xFFFF; - - /* Send the init request */ + memset(&ndis, 0, sizeof(ndis)); + ndis.nvs_type = HN_NVS_TYPE_NDIS_INIT; + ndis.nvs_ndis_major = NDIS_VERSION_MAJOR_6; + if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_4) + ndis.nvs_ndis_minor = NDIS_VERSION_MINOR_1; + else + ndis.nvs_ndis_minor = NDIS_VERSION_MINOR_30; ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); + &ndis, sizeof(ndis), (uint64_t)(uintptr_t)&hn_send_ctx_none); if (ret != 0) { + if_printf(sc->hn_ifp, "send nvs ndis init failed: %d\n", ret); goto cleanup; } - /* - * TODO: BUGBUG - We have to wait for the above msg since the netvsp - * uses KMCL which acknowledges packet (completion packet) - * since our Vmbus always set the VMBUS_CHANPKT_FLAG_RC flag - */ - /* sema_wait(&NetVscChannel->channel_init_sema); */ /* Post the big receive buffer to NetVSP */ if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) @@ -731,6 +764,52 @@ hv_nv_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel) return (0); } +void +hn_nvs_sent_wakeup(struct hn_send_ctx *sndc __unused, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan __unused, + const struct nvsp_msg_ *msg, int dlen __unused) +{ + /* Copy the response back */ + memcpy(&net_dev->channel_init_packet, msg, sizeof(nvsp_msg)); + sema_post(&net_dev->channel_init_sema); +} + +static void +hn_nvs_sent_xact(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev __unused, struct vmbus_channel *chan __unused, + const struct nvsp_msg_ *msg, int dlen) +{ + + vmbus_xact_wakeup(sndc->hn_cbarg, msg, dlen); +} + +static void +hn_nvs_sent_none(struct hn_send_ctx *sndc __unused, + struct netvsc_dev_ *net_dev __unused, struct vmbus_channel *chan __unused, + const struct nvsp_msg_ *msg __unused, int dlen __unused) +{ + /* EMPTY */ +} + +void +hn_chim_free(struct netvsc_dev_ *net_dev, uint32_t chim_idx) +{ + u_long mask; + uint32_t idx; + + idx = chim_idx / BITS_PER_LONG; + KASSERT(idx < net_dev->bitsmap_words, + ("invalid chimney index 0x%x", chim_idx)); + + mask = 1UL << (chim_idx % BITS_PER_LONG); + KASSERT(net_dev->send_section_bitsmap[idx] & mask, + ("index bitmap 0x%lx, chimney index %u, " + "bitmap idx %d, bitmask 0x%lx", + net_dev->send_section_bitsmap[idx], chim_idx, idx, mask)); + + atomic_clear_long(&net_dev->send_section_bitsmap[idx], mask); +} + /* * Net VSC on send completion */ @@ -738,59 +817,16 @@ static void hv_nv_on_send_completion(netvsc_dev *net_dev, struct vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkt) { - const nvsp_msg *nvsp_msg_pkt; - netvsc_packet *net_vsc_pkt; + struct hn_send_ctx *sndc; - nvsp_msg_pkt = VMBUS_CHANPKT_CONST_DATA(pkt); - - if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete - || nvsp_msg_pkt->hdr.msg_type - == nvsp_msg_1_type_send_rx_buf_complete - || nvsp_msg_pkt->hdr.msg_type - == nvsp_msg_1_type_send_send_buf_complete - || nvsp_msg_pkt->hdr.msg_type - == nvsp_msg5_type_subchannel) { - /* Copy the response back */ - memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt, - sizeof(nvsp_msg)); - sema_post(&net_dev->channel_init_sema); - } else if (nvsp_msg_pkt->hdr.msg_type == - nvsp_msg_1_type_send_rndis_pkt_complete) { - /* Get the send context */ - net_vsc_pkt = - (netvsc_packet *)(unsigned long)pkt->cph_xactid; - if (NULL != net_vsc_pkt) { - if (net_vsc_pkt->send_buf_section_idx != - NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { - u_long mask; - int idx; - - idx = net_vsc_pkt->send_buf_section_idx / - BITS_PER_LONG; - KASSERT(idx < net_dev->bitsmap_words, - ("invalid section index %u", - net_vsc_pkt->send_buf_section_idx)); - mask = 1UL << - (net_vsc_pkt->send_buf_section_idx % - BITS_PER_LONG); - - KASSERT(net_dev->send_section_bitsmap[idx] & - mask, - ("index bitmap 0x%lx, section index %u, " - "bitmap idx %d, bitmask 0x%lx", - net_dev->send_section_bitsmap[idx], - net_vsc_pkt->send_buf_section_idx, - idx, mask)); - atomic_clear_long( - &net_dev->send_section_bitsmap[idx], mask); - } - - /* Notify the layer above us */ - net_vsc_pkt->compl.send.on_send_completion(chan, - net_vsc_pkt->compl.send.send_completion_context); - - } - } + sndc = (struct hn_send_ctx *)(uintptr_t)pkt->cph_xactid; + sndc->hn_cb(sndc, net_dev, chan, VMBUS_CHANPKT_CONST_DATA(pkt), + VMBUS_CHANPKT_DATALEN(pkt)); + /* + * NOTE: + * 'sndc' CAN NOT be accessed anymore, since it can be freed by + * its callback. + */ } /* @@ -799,14 +835,14 @@ hv_nv_on_send_completion(netvsc_dev *net_dev, struct vmbus_channel *chan, * Returns 0 on success, non-zero on failure. */ int -hv_nv_on_send(struct vmbus_channel *chan, - netvsc_packet *pkt, struct vmbus_gpa *gpa, int gpa_cnt) +hv_nv_on_send(struct vmbus_channel *chan, bool is_data_pkt, + struct hn_send_ctx *sndc, struct vmbus_gpa *gpa, int gpa_cnt) { nvsp_msg send_msg; int ret; send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt; - if (pkt->is_data_pkt) { + if (is_data_pkt) { /* 0 is RMC_DATA */ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0; } else { @@ -815,17 +851,17 @@ hv_nv_on_send(struct vmbus_channel *chan, } send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx = - pkt->send_buf_section_idx; + sndc->hn_chim_idx; send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = - pkt->send_buf_section_size; + sndc->hn_chim_sz; if (gpa_cnt) { ret = vmbus_chan_send_sglist(chan, gpa, gpa_cnt, - &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt); + &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)sndc); } else { ret = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt); + &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)sndc); } return (ret); diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.h b/sys/dev/hyperv/netvsc/hv_net_vsc.h index e1e1b1634d52..a861c3e18d53 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.h +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.h @@ -1060,7 +1060,6 @@ typedef struct netvsc_dev_ { uint32_t rx_buf_size; uint32_t rx_buf_gpadl_handle; uint32_t rx_section_count; - nvsp_1_rx_buf_section *rx_sections; /* Used for NetVSP initialization protocol */ struct sema channel_init_sema; @@ -1112,29 +1111,8 @@ typedef void (*pfn_on_send_rx_completion)(struct vmbus_channel *, void *); #endif typedef struct netvsc_packet_ { - uint8_t is_data_pkt; /* One byte */ - uint16_t vlan_tci; - uint32_t status; - - /* Completion */ - union { - struct { - uint64_t rx_completion_tid; - void *rx_completion_context; - /* This is no longer used */ - pfn_on_send_rx_completion on_rx_completion; - } rx; - struct { - uint64_t send_completion_tid; - void *send_completion_context; - /* Still used in netvsc and filter code */ - pfn_on_send_rx_completion on_send_completion; - } send; - } compl; - uint32_t send_buf_section_idx; - uint32_t send_buf_section_size; - - void *rndis_mesg; + uint16_t vlan_tci; + uint32_t status; uint32_t tot_data_buf_len; void *data; } netvsc_packet; @@ -1264,20 +1242,22 @@ typedef struct hn_softc { struct taskqueue *hn_tx_taskq; struct sysctl_oid *hn_tx_sysctl_tree; struct sysctl_oid *hn_rx_sysctl_tree; + struct vmbus_xact_ctx *hn_xact; } hn_softc_t; /* * Externs */ extern int hv_promisc_mode; +struct hn_send_ctx; void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status); netvsc_dev *hv_nv_on_device_add(struct hn_softc *sc, void *additional_info, struct hn_rx_ring *rxr); int hv_nv_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel); -int hv_nv_on_send(struct vmbus_channel *chan, netvsc_packet *pkt, - struct vmbus_gpa *gpa, int gpa_cnt); +int hv_nv_on_send(struct vmbus_channel *chan, bool is_data_pkt, + struct hn_send_ctx *sndc, struct vmbus_gpa *gpa, int gpa_cnt); int hv_nv_get_next_send_section(netvsc_dev *net_dev); void hv_nv_subchan_attach(struct vmbus_channel *chan, struct hn_rx_ring *rxr); diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c index 38390d702c18..4653e776da92 100644 --- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -115,6 +115,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include "hv_net_vsc.h" #include "hv_rndis.h" @@ -124,6 +125,9 @@ __FBSDID("$FreeBSD$"); /* Short for Hyper-V network interface */ #define NETVSC_DEVNAME "hn" +#define HN_XACT_REQ_SIZE (2 * PAGE_SIZE) +#define HN_XACT_RESP_SIZE (2 * PAGE_SIZE) + /* * It looks like offset 0 of buf is reserved to hold the softc pointer. * The sc pointer evidently not needed, and is not presently populated. @@ -166,7 +170,7 @@ struct hn_txdesc { struct hn_tx_ring *txr; int refs; uint32_t flags; /* HN_TXD_FLAG_ */ - netvsc_packet netvsc_pkt; /* XXX to be removed */ + struct hn_send_ctx send_ctx; bus_dmamap_t data_dmap; @@ -542,6 +546,11 @@ netvsc_attach(device_t dev) IFCAP_LRO; ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO; + sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), + HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); + if (sc->hn_xact == NULL) + goto failed; + error = hv_rf_on_device_add(sc, &device_info, ring_cnt, &sc->hn_rx_ring[0]); if (error) @@ -643,6 +652,7 @@ netvsc_detach(device_t dev) if (sc->hn_tx_taskq != hn_tx_taskq) taskqueue_free(sc->hn_tx_taskq); + vmbus_xact_ctx_destroy(sc->hn_xact); return (0); } @@ -781,14 +791,15 @@ hn_txeof(struct hn_tx_ring *txr) } static void -hn_tx_done(struct vmbus_channel *chan, void *xpkt) +hn_tx_done(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, + struct vmbus_channel *chan, const struct nvsp_msg_ *msg __unused, + int dlen __unused) { - netvsc_packet *packet = xpkt; - struct hn_txdesc *txd; + struct hn_txdesc *txd = sndc->hn_cbarg; struct hn_tx_ring *txr; - txd = (struct hn_txdesc *)(uintptr_t) - packet->compl.send.send_completion_tid; + if (sndc->hn_chim_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) + hn_chim_free(net_dev, sndc->hn_chim_idx); txr = txd->txr; KASSERT(txr->hn_chan == chan, @@ -835,16 +846,14 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; int error, nsegs, i; struct mbuf *m_head = *m_head0; - netvsc_packet *packet; rndis_msg *rndis_mesg; rndis_packet *rndis_pkt; rndis_per_packet_info *rppi; struct rndis_hash_value *hash_value; - uint32_t rndis_msg_size; + uint32_t rndis_msg_size, tot_data_buf_len, send_buf_section_idx; + int send_buf_section_size; - packet = &txd->netvsc_pkt; - packet->is_data_pkt = TRUE; - packet->tot_data_buf_len = m_head->m_pkthdr.len; + tot_data_buf_len = m_head->m_pkthdr.len; /* * extension points to the area reserved for the @@ -859,7 +868,7 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) rndis_pkt = &rndis_mesg->msg.packet; rndis_pkt->data_offset = sizeof(rndis_packet); - rndis_pkt->data_length = packet->tot_data_buf_len; + rndis_pkt->data_length = tot_data_buf_len; rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); @@ -967,15 +976,14 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) } } - rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size; - packet->tot_data_buf_len = rndis_mesg->msg_len; + rndis_mesg->msg_len = tot_data_buf_len + rndis_msg_size; + tot_data_buf_len = rndis_mesg->msg_len; /* * Chimney send, if the packet could fit into one chimney buffer. */ - if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) { + if (tot_data_buf_len < txr->hn_tx_chimney_size) { netvsc_dev *net_dev = txr->hn_sc->net_dev; - uint32_t send_buf_section_idx; txr->hn_tx_chimney_tried++; send_buf_section_idx = @@ -990,9 +998,7 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) dest += rndis_msg_size; m_copydata(m_head, 0, m_head->m_pkthdr.len, dest); - packet->send_buf_section_idx = send_buf_section_idx; - packet->send_buf_section_size = - packet->tot_data_buf_len; + send_buf_section_size = tot_data_buf_len; txr->hn_gpa_cnt = 0; txr->hn_tx_chimney++; goto done; @@ -1039,16 +1045,14 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) gpa->gpa_len = segs[i].ds_len; } - packet->send_buf_section_idx = - NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; - packet->send_buf_section_size = 0; + send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; + send_buf_section_size = 0; done: txd->m = m_head; /* Set the completion routine */ - packet->compl.send.on_send_completion = hn_tx_done; - packet->compl.send.send_completion_context = packet; - packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)txd; + hn_send_ctx_init(&txd->send_ctx, hn_tx_done, txd, + send_buf_section_idx, send_buf_section_size); return 0; } @@ -1068,7 +1072,7 @@ hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) * Make sure that txd is not freed before ETHER_BPF_MTAP. */ hn_txdesc_hold(txd); - error = hv_nv_on_send(txr->hn_chan, &txd->netvsc_pkt, + error = hv_nv_on_send(txr->hn_chan, true, &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt); if (!error) { ETHER_BPF_MTAP(ifp, txd->m); diff --git a/sys/dev/hyperv/netvsc/hv_rndis.h b/sys/dev/hyperv/netvsc/hv_rndis.h index da2b408c6494..6527668de9fe 100644 --- a/sys/dev/hyperv/netvsc/hv_rndis.h +++ b/sys/dev/hyperv/netvsc/hv_rndis.h @@ -41,6 +41,10 @@ #define NDIS_VERSION_6_1 0x00060001 #define NDIS_VERSION_6_30 0x0006001e +#define NDIS_VERSION_MAJOR_6 6 +#define NDIS_VERSION_MINOR_1 1 +#define NDIS_VERSION_MINOR_30 30 + #define NDIS_VERSION (NDIS_VERSION_5_1) /* diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.c b/sys/dev/hyperv/netvsc/hv_rndis_filter.c index 22acfd61031d..7daf7b282f5f 100644 --- a/sys/dev/hyperv/netvsc/hv_rndis_filter.c +++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.c @@ -85,11 +85,17 @@ static int hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter); static int hv_rf_init_device(rndis_device *device); static int hv_rf_open_device(rndis_device *device); static int hv_rf_close_device(rndis_device *device); -static void hv_rf_on_send_request_completion(struct vmbus_channel *, void *context); -static void hv_rf_on_send_request_halt_completion(struct vmbus_channel *, void *context); int hv_rf_send_offload_request(struct hn_softc *sc, rndis_offload_params *offloads); + +static void hn_rndis_sent_halt(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, + const struct nvsp_msg_ *msg, int dlen); +static void hn_rndis_sent_cb(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, + const struct nvsp_msg_ *msg, int dlen); + /* * Set the Per-Packet-Info with the specified type */ @@ -238,17 +244,14 @@ static int hv_rf_send_request(rndis_device *device, rndis_request *request, uint32_t message_type) { - netvsc_packet *packet; netvsc_dev *net_dev = device->net_dev; - int send_buf_section_idx; + uint32_t send_buf_section_idx, tot_data_buf_len; struct vmbus_gpa gpa[2]; - int gpa_cnt; + int gpa_cnt, send_buf_section_size; + hn_sent_callback_t cb; /* Set up the packet to send it */ - packet = &request->pkt; - - packet->is_data_pkt = FALSE; - packet->tot_data_buf_len = request->request_msg.msg_len; + tot_data_buf_len = request->request_msg.msg_len; gpa_cnt = 1; gpa[0].gpa_page = hv_get_phys_addr(&request->request_msg) >> PAGE_SHIFT; @@ -265,16 +268,12 @@ hv_rf_send_request(rndis_device *device, rndis_request *request, gpa[1].gpa_len = request->request_msg.msg_len - gpa[0].gpa_len; } - packet->compl.send.send_completion_context = request; /* packet */ - if (message_type != REMOTE_NDIS_HALT_MSG) { - packet->compl.send.on_send_completion = - hv_rf_on_send_request_completion; - } else { - packet->compl.send.on_send_completion = - hv_rf_on_send_request_halt_completion; - } - packet->compl.send.send_completion_tid = (unsigned long)device; - if (packet->tot_data_buf_len < net_dev->send_section_size) { + if (message_type != REMOTE_NDIS_HALT_MSG) + cb = hn_rndis_sent_cb; + else + cb = hn_rndis_sent_halt; + + if (tot_data_buf_len < net_dev->send_section_size) { send_buf_section_idx = hv_nv_get_next_send_section(net_dev); if (send_buf_section_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { @@ -282,19 +281,20 @@ hv_rf_send_request(rndis_device *device, rndis_request *request, send_buf_section_idx * net_dev->send_section_size); memcpy(dest, &request->request_msg, request->request_msg.msg_len); - packet->send_buf_section_idx = send_buf_section_idx; - packet->send_buf_section_size = packet->tot_data_buf_len; + send_buf_section_size = tot_data_buf_len; gpa_cnt = 0; goto sendit; } /* Failed to allocate chimney send buffer; move on */ } - packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; - packet->send_buf_section_size = 0; + send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; + send_buf_section_size = 0; sendit: - return hv_nv_on_send(device->net_dev->sc->hn_prichan, packet, - gpa, gpa_cnt); + hn_send_ctx_init(&request->send_ctx, cb, request, + send_buf_section_idx, send_buf_section_size); + return hv_nv_on_send(device->net_dev->sc->hn_prichan, false, + &request->send_ctx, gpa, gpa_cnt); } /* @@ -1056,6 +1056,7 @@ int hv_rf_on_device_add(struct hn_softc *sc, void *additl_info, int nchan, struct hn_rx_ring *rxr) { + struct hn_send_ctx sndc; int ret; netvsc_dev *net_dev; rndis_device *rndis_dev; @@ -1162,9 +1163,10 @@ hv_rf_on_device_add(struct hn_softc *sc, void *additl_info, init_pkt->msgs.vers_5_msgs.subchannel_request.num_subchannels = net_dev->num_channel - 1; + hn_send_ctx_init_simple(&sndc, hn_nvs_sent_wakeup, NULL); ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, - init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); + init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)&sndc); if (ret != 0) { device_printf(dev, "Fail to allocate subchannel\n"); goto out; @@ -1235,23 +1237,24 @@ hv_rf_on_close(struct hn_softc *sc) return (hv_rf_close_device((rndis_device *)net_dev->extension)); } -/* - * RNDIS filter on send request completion callback - */ -static void -hv_rf_on_send_request_completion(struct vmbus_channel *chan __unused, - void *context __unused) +static void +hn_rndis_sent_cb(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, + struct vmbus_channel *chan __unused, const struct nvsp_msg_ *msg __unused, + int dlen __unused) { + if (sndc->hn_chim_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) + hn_chim_free(net_dev, sndc->hn_chim_idx); } -/* - * RNDIS filter on send request (halt only) completion callback - */ -static void -hv_rf_on_send_request_halt_completion(struct vmbus_channel *chan __unused, - void *context) +static void +hn_rndis_sent_halt(struct hn_send_ctx *sndc, struct netvsc_dev_ *net_dev, + struct vmbus_channel *chan __unused, const struct nvsp_msg_ *msg __unused, + int dlen __unused) { - rndis_request *request = context; + rndis_request *request = sndc->hn_cbarg; + + if (sndc->hn_chim_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) + hn_chim_free(net_dev, sndc->hn_chim_idx); /* * Notify hv_rf_halt_device() about halt completion. diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.h b/sys/dev/hyperv/netvsc/hv_rndis_filter.h index 2f940db33e89..ebfda20b94db 100644 --- a/sys/dev/hyperv/netvsc/hv_rndis_filter.h +++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.h @@ -33,6 +33,7 @@ #include #include +#include /* * Defines @@ -75,7 +76,7 @@ typedef struct rndis_request_ { uint8_t buf_resp[PAGE_SIZE]; /* Simplify allocation by having a netvsc packet inline */ - netvsc_packet pkt; + struct hn_send_ctx send_ctx; /* * The max request size is sizeof(rndis_msg) + PAGE_SIZE. diff --git a/sys/dev/hyperv/netvsc/if_hnreg.h b/sys/dev/hyperv/netvsc/if_hnreg.h new file mode 100644 index 000000000000..abe371a36915 --- /dev/null +++ b/sys/dev/hyperv/netvsc/if_hnreg.h @@ -0,0 +1,138 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IF_HNREG_H_ +#define _IF_HNREG_H_ + +#include +#include + +#define HN_NVS_RXBUF_SIG 0xcafe +#define HN_NVS_CHIM_SIG 0xface + +#define HN_NVS_STATUS_OK 1 + +#define HN_NVS_TYPE_INIT 1 +#define HN_NVS_TYPE_INIT_RESP 2 +#define HN_NVS_TYPE_NDIS_INIT 100 +#define HN_NVS_TYPE_RXBUF_CONN 101 +#define HN_NVS_TYPE_RXBUF_CONNRESP 102 +#define HN_NVS_TYPE_RXBUF_DISCONN 103 +#define HN_NVS_TYPE_CHIM_CONN 104 +#define HN_NVS_TYPE_CHIM_CONNRESP 105 +#define HN_NVS_TYPE_NDIS_CONF 125 + +/* + * Any size less than this one will _not_ work, e.g. hn_nvs_init + * only has 12B valid data, however, if only 12B data were sent, + * Hypervisor would never reply. + */ +#define HN_NVS_REQSIZE_MIN 32 + +struct hn_nvs_init { + uint32_t nvs_type; /* HN_NVS_TYPE_INIT */ + uint32_t nvs_ver_min; + uint32_t nvs_ver_max; + uint8_t nvs_rsvd[20]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_init) >= HN_NVS_REQSIZE_MIN); + +struct hn_nvs_init_resp { + uint32_t nvs_type; /* HN_NVS_TYPE_INIT_RESP */ + uint32_t nvs_ver; /* deprecated */ + uint32_t nvs_rsvd; + uint32_t nvs_status; /* HN_NVS_STATUS_ */ +} __packed; + +/* No reponse */ +struct hn_nvs_ndis_conf { + uint32_t nvs_type; /* HN_NVS_TYPE_NDIS_CONF */ + uint32_t nvs_mtu; + uint32_t nvs_rsvd; + uint64_t nvs_caps; /* HN_NVS_NDIS_CONF_ */ + uint8_t nvs_rsvd1[12]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_ndis_conf) >= HN_NVS_REQSIZE_MIN); + +#define HN_NVS_NDIS_CONF_SRIOV 0x0004 +#define HN_NVS_NDIS_CONF_VLAN 0x0008 + +/* No response */ +struct hn_nvs_ndis_init { + uint32_t nvs_type; /* HN_NVS_TYPE_NDIS_INIT */ + uint32_t nvs_ndis_major; /* NDIS_VERSION_MAJOR_ */ + uint32_t nvs_ndis_minor; /* NDIS_VERSION_MINOR_ */ + uint8_t nvs_rsvd[20]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_ndis_init) >= HN_NVS_REQSIZE_MIN); + +struct hn_nvs_rxbuf_conn { + uint32_t nvs_type; /* HN_NVS_TYPE_RXBUF_CONN */ + uint32_t nvs_gpadl; /* RXBUF vmbus GPADL */ + uint16_t nvs_sig; /* HN_NVS_RXBUF_SIG */ + uint8_t nvs_rsvd[22]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_rxbuf_conn) >= HN_NVS_REQSIZE_MIN); + +struct hn_nvs_rxbuf_sect { + uint32_t nvs_start; + uint32_t nvs_slotsz; + uint32_t nvs_slotcnt; + uint32_t nvs_end; +} __packed; + +struct hn_nvs_rxbuf_connresp { + uint32_t nvs_type; /* HN_NVS_TYPE_RXBUF_CONNRESP */ + uint32_t nvs_status; /* HN_NVS_STATUS_ */ + uint32_t nvs_nsect; /* # of elem in nvs_sect */ + struct hn_nvs_rxbuf_sect nvs_sect[]; +} __packed; + +/* No response */ +struct hn_nvs_rxbuf_disconn { + uint32_t nvs_type; /* HN_NVS_TYPE_RXBUF_DISCONN */ + uint16_t nvs_sig; /* HN_NVS_RXBUF_SIG */ + uint8_t nvs_rsvd[26]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_rxbuf_disconn) >= HN_NVS_REQSIZE_MIN); + +struct hn_nvs_chim_conn { + uint32_t nvs_type; /* HN_NVS_TYPE_CHIM_CONN */ + uint32_t nvs_gpadl; /* chimney buf vmbus GPADL */ + uint16_t nvs_sig; /* NDIS_NVS_CHIM_SIG */ + uint8_t nvs_rsvd[22]; +} __packed; +CTASSERT(sizeof(struct hn_nvs_chim_conn) >= HN_NVS_REQSIZE_MIN); + +struct hn_nvs_chim_connresp { + uint32_t nvs_type; /* HN_NVS_TYPE_CHIM_CONNRESP */ + uint32_t nvs_status; /* HN_NVS_STATUS_ */ + uint32_t nvs_sectsz; /* section size */ +} __packed; + +#endif /* !_IF_HNREG_H_ */ diff --git a/sys/dev/hyperv/netvsc/if_hnvar.h b/sys/dev/hyperv/netvsc/if_hnvar.h new file mode 100644 index 000000000000..9d2c1267ff73 --- /dev/null +++ b/sys/dev/hyperv/netvsc/if_hnvar.h @@ -0,0 +1,83 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IF_HNVAR_H_ +#define _IF_HNVAR_H_ + +#include +#include + +struct netvsc_dev_; +struct nvsp_msg_; + +struct vmbus_channel; +struct hn_send_ctx; + +typedef void (*hn_sent_callback_t) + (struct hn_send_ctx *, struct netvsc_dev_ *, + struct vmbus_channel *, const struct nvsp_msg_ *, int); + +struct hn_send_ctx { + hn_sent_callback_t hn_cb; + void *hn_cbarg; + uint32_t hn_chim_idx; + int hn_chim_sz; +}; + +#define HN_SEND_CTX_INITIALIZER(cb, cbarg) \ +{ \ + .hn_cb = cb, \ + .hn_cbarg = cbarg, \ + .hn_chim_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX, \ + .hn_chim_sz = 0 \ +} + +static __inline void +hn_send_ctx_init(struct hn_send_ctx *sndc, hn_sent_callback_t cb, + void *cbarg, uint32_t chim_idx, int chim_sz) +{ + sndc->hn_cb = cb; + sndc->hn_cbarg = cbarg; + sndc->hn_chim_idx = chim_idx; + sndc->hn_chim_sz = chim_sz; +} + +static __inline void +hn_send_ctx_init_simple(struct hn_send_ctx *sndc, hn_sent_callback_t cb, + void *cbarg) +{ + hn_send_ctx_init(sndc, cb, cbarg, + NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX, 0); +} + +void hn_nvs_sent_wakeup(struct hn_send_ctx *sndc, + struct netvsc_dev_ *net_dev, struct vmbus_channel *chan, + const struct nvsp_msg_ *msg, int dlen); +void hn_chim_free(struct netvsc_dev_ *net_dev, uint32_t chim_idx); + +#endif /* !_IF_HNVAR_H_ */ diff --git a/sys/dev/hyperv/vmbus/vmbus.c b/sys/dev/hyperv/vmbus/vmbus.c index 05c9c5b2d90c..6753c9e9b524 100644 --- a/sys/dev/hyperv/vmbus/vmbus.c +++ b/sys/dev/hyperv/vmbus/vmbus.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -62,25 +63,10 @@ __FBSDID("$FreeBSD$"); #define VMBUS_GPADL_START 0xe1e10 struct vmbus_msghc { - struct hypercall_postmsg_in *mh_inprm; + struct vmbus_xact *mh_xact; struct hypercall_postmsg_in mh_inprm_save; - struct hyperv_dma mh_inprm_dma; - - struct vmbus_message *mh_resp; - struct vmbus_message mh_resp0; }; -struct vmbus_msghc_ctx { - struct vmbus_msghc *mhc_free; - struct mtx mhc_free_lock; - uint32_t mhc_flags; - - struct vmbus_msghc *mhc_active; - struct mtx mhc_active_lock; -}; - -#define VMBUS_MSGHC_CTXF_DESTROY 0x0001 - static int vmbus_probe(device_t); static int vmbus_attach(device_t); static int vmbus_detach(device_t); @@ -116,15 +102,6 @@ static int vmbus_doattach(struct vmbus_softc *); static void vmbus_event_proc_dummy(struct vmbus_softc *, int); -static struct vmbus_msghc_ctx *vmbus_msghc_ctx_create(bus_dma_tag_t); -static void vmbus_msghc_ctx_destroy( - struct vmbus_msghc_ctx *); -static void vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *); -static struct vmbus_msghc *vmbus_msghc_alloc(bus_dma_tag_t); -static void vmbus_msghc_free(struct vmbus_msghc *); -static struct vmbus_msghc *vmbus_msghc_get1(struct vmbus_msghc_ctx *, - uint32_t); - static struct vmbus_softc *vmbus_sc; extern inthand_t IDTVEC(vmbus_isr); @@ -182,85 +159,6 @@ vmbus_get_softc(void) return vmbus_sc; } -static struct vmbus_msghc * -vmbus_msghc_alloc(bus_dma_tag_t parent_dtag) -{ - struct vmbus_msghc *mh; - - mh = malloc(sizeof(*mh), M_DEVBUF, M_WAITOK | M_ZERO); - - mh->mh_inprm = hyperv_dmamem_alloc(parent_dtag, - HYPERCALL_PARAM_ALIGN, 0, HYPERCALL_POSTMSGIN_SIZE, - &mh->mh_inprm_dma, BUS_DMA_WAITOK); - if (mh->mh_inprm == NULL) { - free(mh, M_DEVBUF); - return NULL; - } - return mh; -} - -static void -vmbus_msghc_free(struct vmbus_msghc *mh) -{ - hyperv_dmamem_free(&mh->mh_inprm_dma, mh->mh_inprm); - free(mh, M_DEVBUF); -} - -static void -vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *mhc) -{ - KASSERT(mhc->mhc_active == NULL, ("still have active msg hypercall")); - KASSERT(mhc->mhc_free == NULL, ("still have hypercall msg")); - - mtx_destroy(&mhc->mhc_free_lock); - mtx_destroy(&mhc->mhc_active_lock); - free(mhc, M_DEVBUF); -} - -static struct vmbus_msghc_ctx * -vmbus_msghc_ctx_create(bus_dma_tag_t parent_dtag) -{ - struct vmbus_msghc_ctx *mhc; - - mhc = malloc(sizeof(*mhc), M_DEVBUF, M_WAITOK | M_ZERO); - mtx_init(&mhc->mhc_free_lock, "vmbus msghc free", NULL, MTX_DEF); - mtx_init(&mhc->mhc_active_lock, "vmbus msghc act", NULL, MTX_DEF); - - mhc->mhc_free = vmbus_msghc_alloc(parent_dtag); - if (mhc->mhc_free == NULL) { - vmbus_msghc_ctx_free(mhc); - return NULL; - } - return mhc; -} - -static struct vmbus_msghc * -vmbus_msghc_get1(struct vmbus_msghc_ctx *mhc, uint32_t dtor_flag) -{ - struct vmbus_msghc *mh; - - mtx_lock(&mhc->mhc_free_lock); - - while ((mhc->mhc_flags & dtor_flag) == 0 && mhc->mhc_free == NULL) { - mtx_sleep(&mhc->mhc_free, &mhc->mhc_free_lock, 0, - "gmsghc", 0); - } - if (mhc->mhc_flags & dtor_flag) { - /* Being destroyed */ - mh = NULL; - } else { - mh = mhc->mhc_free; - KASSERT(mh != NULL, ("no free hypercall msg")); - KASSERT(mh->mh_resp == NULL, - ("hypercall msg has pending response")); - mhc->mhc_free = NULL; - } - - mtx_unlock(&mhc->mhc_free_lock); - - return mh; -} - void vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize) { @@ -269,7 +167,7 @@ vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize) if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX) panic("invalid data size %zu", dsize); - inprm = mh->mh_inprm; + inprm = vmbus_xact_req_data(mh->mh_xact); memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE); inprm->hc_connid = VMBUS_CONNID_MESSAGE; inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL; @@ -280,63 +178,50 @@ struct vmbus_msghc * vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize) { struct vmbus_msghc *mh; + struct vmbus_xact *xact; if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX) panic("invalid data size %zu", dsize); - mh = vmbus_msghc_get1(sc->vmbus_msg_hc, VMBUS_MSGHC_CTXF_DESTROY); - if (mh == NULL) - return NULL; + xact = vmbus_xact_get(sc->vmbus_xc, + dsize + __offsetof(struct hypercall_postmsg_in, hc_data[0])); + if (xact == NULL) + return (NULL); + + mh = vmbus_xact_priv(xact, sizeof(*mh)); + mh->mh_xact = xact; vmbus_msghc_reset(mh, dsize); - return mh; + return (mh); } void -vmbus_msghc_put(struct vmbus_softc *sc, struct vmbus_msghc *mh) +vmbus_msghc_put(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) { - struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; - KASSERT(mhc->mhc_active == NULL, ("msg hypercall is active")); - mh->mh_resp = NULL; - - mtx_lock(&mhc->mhc_free_lock); - KASSERT(mhc->mhc_free == NULL, ("has free hypercall msg")); - mhc->mhc_free = mh; - mtx_unlock(&mhc->mhc_free_lock); - wakeup(&mhc->mhc_free); + vmbus_xact_put(mh->mh_xact); } void * vmbus_msghc_dataptr(struct vmbus_msghc *mh) { - return mh->mh_inprm->hc_data; -} + struct hypercall_postmsg_in *inprm; -static void -vmbus_msghc_ctx_destroy(struct vmbus_msghc_ctx *mhc) -{ - struct vmbus_msghc *mh; - - mtx_lock(&mhc->mhc_free_lock); - mhc->mhc_flags |= VMBUS_MSGHC_CTXF_DESTROY; - mtx_unlock(&mhc->mhc_free_lock); - wakeup(&mhc->mhc_free); - - mh = vmbus_msghc_get1(mhc, 0); - if (mh == NULL) - panic("can't get msghc"); - - vmbus_msghc_free(mh); - vmbus_msghc_ctx_free(mhc); + inprm = vmbus_xact_req_data(mh->mh_xact); + return (inprm->hc_data); } int vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) { sbintime_t time = SBT_1MS; + struct hypercall_postmsg_in *inprm; + bus_addr_t inprm_paddr; int i; + inprm = vmbus_xact_req_data(mh->mh_xact); + inprm_paddr = vmbus_xact_req_paddr(mh->mh_xact); + /* * Save the input parameter so that we could restore the input * parameter if the Hypercall failed. @@ -345,7 +230,7 @@ vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) * Is this really necessary?! i.e. Will the Hypercall ever * overwrite the input parameter? */ - memcpy(&mh->mh_inprm_save, mh->mh_inprm, HYPERCALL_POSTMSGIN_SIZE); + memcpy(&mh->mh_inprm_save, inprm, HYPERCALL_POSTMSGIN_SIZE); /* * In order to cope with transient failures, e.g. insufficient @@ -357,7 +242,7 @@ vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) for (i = 0; i < HC_RETRY_MAX; ++i) { uint64_t status; - status = hypercall_post_message(mh->mh_inprm_dma.hv_paddr); + status = hypercall_post_message(inprm_paddr); if (status == HYPERCALL_STATUS_SUCCESS) return 0; @@ -366,8 +251,7 @@ vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) time *= 2; /* Restore input parameter and try again */ - memcpy(mh->mh_inprm, &mh->mh_inprm_save, - HYPERCALL_POSTMSGIN_SIZE); + memcpy(inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE); } #undef HC_RETRY_MAX @@ -376,62 +260,30 @@ vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) } int -vmbus_msghc_exec(struct vmbus_softc *sc, struct vmbus_msghc *mh) +vmbus_msghc_exec(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) { - struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; int error; - KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response")); - - mtx_lock(&mhc->mhc_active_lock); - KASSERT(mhc->mhc_active == NULL, ("pending active msg hypercall")); - mhc->mhc_active = mh; - mtx_unlock(&mhc->mhc_active_lock); - + vmbus_xact_activate(mh->mh_xact); error = vmbus_msghc_exec_noresult(mh); - if (error) { - mtx_lock(&mhc->mhc_active_lock); - KASSERT(mhc->mhc_active == mh, ("msghc mismatch")); - mhc->mhc_active = NULL; - mtx_unlock(&mhc->mhc_active_lock); - } + if (error) + vmbus_xact_deactivate(mh->mh_xact); return error; } const struct vmbus_message * -vmbus_msghc_wait_result(struct vmbus_softc *sc, struct vmbus_msghc *mh) +vmbus_msghc_wait_result(struct vmbus_softc *sc __unused, struct vmbus_msghc *mh) { - struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; + size_t resp_len; - mtx_lock(&mhc->mhc_active_lock); - - KASSERT(mhc->mhc_active == mh, ("msghc mismatch")); - while (mh->mh_resp == NULL) { - mtx_sleep(&mhc->mhc_active, &mhc->mhc_active_lock, 0, - "wmsghc", 0); - } - mhc->mhc_active = NULL; - - mtx_unlock(&mhc->mhc_active_lock); - - return mh->mh_resp; + return (vmbus_xact_wait(mh->mh_xact, &resp_len)); } void vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg) { - struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; - struct vmbus_msghc *mh; - mtx_lock(&mhc->mhc_active_lock); - - mh = mhc->mhc_active; - KASSERT(mh != NULL, ("no pending msg hypercall")); - memcpy(&mh->mh_resp0, msg, sizeof(mh->mh_resp0)); - mh->mh_resp = &mh->mh_resp0; - - mtx_unlock(&mhc->mhc_active_lock); - wakeup(&mhc->mhc_active); + vmbus_xact_ctx_wakeup(sc->vmbus_xc, msg, sizeof(*msg)); } uint32_t @@ -1187,9 +1039,10 @@ vmbus_doattach(struct vmbus_softc *sc) /* * Create context for "post message" Hypercalls */ - sc->vmbus_msg_hc = vmbus_msghc_ctx_create( - bus_get_dma_tag(sc->vmbus_dev)); - if (sc->vmbus_msg_hc == NULL) { + sc->vmbus_xc = vmbus_xact_ctx_create(bus_get_dma_tag(sc->vmbus_dev), + HYPERCALL_POSTMSGIN_SIZE, VMBUS_MSG_SIZE, + sizeof(struct vmbus_msghc)); + if (sc->vmbus_xc == NULL) { ret = ENXIO; goto cleanup; } @@ -1244,9 +1097,9 @@ vmbus_doattach(struct vmbus_softc *sc) cleanup: vmbus_intr_teardown(sc); vmbus_dma_free(sc); - if (sc->vmbus_msg_hc != NULL) { - vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc); - sc->vmbus_msg_hc = NULL; + if (sc->vmbus_xc != NULL) { + vmbus_xact_ctx_destroy(sc->vmbus_xc); + sc->vmbus_xc = NULL; } free(sc->vmbus_chmap, M_DEVBUF); mtx_destroy(&sc->vmbus_scan_lock); @@ -1305,9 +1158,9 @@ vmbus_detach(device_t dev) vmbus_intr_teardown(sc); vmbus_dma_free(sc); - if (sc->vmbus_msg_hc != NULL) { - vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc); - sc->vmbus_msg_hc = NULL; + if (sc->vmbus_xc != NULL) { + vmbus_xact_ctx_destroy(sc->vmbus_xc); + sc->vmbus_xc = NULL; } free(sc->vmbus_chmap, M_DEVBUF); diff --git a/sys/dev/hyperv/vmbus/vmbus_var.h b/sys/dev/hyperv/vmbus/vmbus_var.h index c278c1512607..47d9004e5719 100644 --- a/sys/dev/hyperv/vmbus/vmbus_var.h +++ b/sys/dev/hyperv/vmbus/vmbus_var.h @@ -86,7 +86,7 @@ struct vmbus_softc { u_long *vmbus_rx_evtflags; /* compat evtflgs from host */ struct vmbus_channel **vmbus_chmap; - struct vmbus_msghc_ctx *vmbus_msg_hc; + struct vmbus_xact_ctx *vmbus_xc; struct vmbus_pcpu_data vmbus_pcpu[MAXCPU]; /* diff --git a/sys/dev/hyperv/vmbus/vmbus_xact.c b/sys/dev/hyperv/vmbus/vmbus_xact.c new file mode 100644 index 000000000000..642c165bc293 --- /dev/null +++ b/sys/dev/hyperv/vmbus/vmbus_xact.c @@ -0,0 +1,313 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include + +struct vmbus_xact { + struct vmbus_xact_ctx *x_ctx; + void *x_priv; + + void *x_req; + struct hyperv_dma x_req_dma; + + const void *x_resp; + size_t x_resp_len; + void *x_resp0; +}; + +struct vmbus_xact_ctx { + uint32_t xc_flags; + size_t xc_req_size; + size_t xc_resp_size; + size_t xc_priv_size; + + struct vmbus_xact *xc_free; + struct mtx xc_free_lock; + + struct vmbus_xact *xc_active; + struct mtx xc_active_lock; +}; + +#define VMBUS_XACT_CTXF_DESTROY 0x0001 + +static struct vmbus_xact *vmbus_xact_alloc(struct vmbus_xact_ctx *, + bus_dma_tag_t); +static void vmbus_xact_free(struct vmbus_xact *); +static struct vmbus_xact *vmbus_xact_get1(struct vmbus_xact_ctx *, + uint32_t); + +static struct vmbus_xact * +vmbus_xact_alloc(struct vmbus_xact_ctx *ctx, bus_dma_tag_t parent_dtag) +{ + struct vmbus_xact *xact; + + xact = malloc(sizeof(*xact), M_DEVBUF, M_WAITOK | M_ZERO); + xact->x_ctx = ctx; + + /* XXX assume that page aligned is enough */ + xact->x_req = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + ctx->xc_req_size, &xact->x_req_dma, BUS_DMA_WAITOK); + if (xact->x_req == NULL) { + free(xact, M_DEVBUF); + return (NULL); + } + if (ctx->xc_priv_size != 0) + xact->x_priv = malloc(ctx->xc_priv_size, M_DEVBUF, M_WAITOK); + xact->x_resp0 = malloc(ctx->xc_resp_size, M_DEVBUF, M_WAITOK); + + return (xact); +} + +static void +vmbus_xact_free(struct vmbus_xact *xact) +{ + + hyperv_dmamem_free(&xact->x_req_dma, xact->x_req); + free(xact->x_resp0, M_DEVBUF); + if (xact->x_priv != NULL) + free(xact->x_priv, M_DEVBUF); + free(xact, M_DEVBUF); +} + +static struct vmbus_xact * +vmbus_xact_get1(struct vmbus_xact_ctx *ctx, uint32_t dtor_flag) +{ + struct vmbus_xact *xact; + + mtx_lock(&ctx->xc_free_lock); + + while ((ctx->xc_flags & dtor_flag) == 0 && ctx->xc_free == NULL) + mtx_sleep(&ctx->xc_free, &ctx->xc_free_lock, 0, "gxact", 0); + if (ctx->xc_flags & dtor_flag) { + /* Being destroyed */ + xact = NULL; + } else { + xact = ctx->xc_free; + KASSERT(xact != NULL, ("no free xact")); + KASSERT(xact->x_resp == NULL, ("xact has pending response")); + ctx->xc_free = NULL; + } + + mtx_unlock(&ctx->xc_free_lock); + + return (xact); +} + +struct vmbus_xact_ctx * +vmbus_xact_ctx_create(bus_dma_tag_t dtag, size_t req_size, size_t resp_size, + size_t priv_size) +{ + struct vmbus_xact_ctx *ctx; + + ctx = malloc(sizeof(*ctx), M_DEVBUF, M_WAITOK | M_ZERO); + ctx->xc_req_size = req_size; + ctx->xc_resp_size = resp_size; + ctx->xc_priv_size = priv_size; + + ctx->xc_free = vmbus_xact_alloc(ctx, dtag); + if (ctx->xc_free == NULL) { + free(ctx, M_DEVBUF); + return (NULL); + } + + mtx_init(&ctx->xc_free_lock, "vmbus xact free", NULL, MTX_DEF); + mtx_init(&ctx->xc_active_lock, "vmbus xact active", NULL, MTX_DEF); + + return (ctx); +} + +void +vmbus_xact_ctx_destroy(struct vmbus_xact_ctx *ctx) +{ + struct vmbus_xact *xact; + + mtx_lock(&ctx->xc_free_lock); + ctx->xc_flags |= VMBUS_XACT_CTXF_DESTROY; + mtx_unlock(&ctx->xc_free_lock); + wakeup(&ctx->xc_free); + + xact = vmbus_xact_get1(ctx, 0); + if (xact == NULL) + panic("can't get xact"); + + vmbus_xact_free(xact); + mtx_destroy(&ctx->xc_free_lock); + mtx_destroy(&ctx->xc_active_lock); + free(ctx, M_DEVBUF); +} + +struct vmbus_xact * +vmbus_xact_get(struct vmbus_xact_ctx *ctx, size_t req_len) +{ + struct vmbus_xact *xact; + + if (req_len > ctx->xc_req_size) + panic("invalid request size %zu", req_len); + + xact = vmbus_xact_get1(ctx, VMBUS_XACT_CTXF_DESTROY); + if (xact == NULL) + return (NULL); + + memset(xact->x_req, 0, req_len); + return (xact); +} + +void +vmbus_xact_put(struct vmbus_xact *xact) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + + KASSERT(ctx->xc_active == NULL, ("pending active xact")); + xact->x_resp = NULL; + + mtx_lock(&ctx->xc_free_lock); + KASSERT(ctx->xc_free == NULL, ("has free xact")); + ctx->xc_free = xact; + mtx_unlock(&ctx->xc_free_lock); + wakeup(&ctx->xc_free); +} + +void * +vmbus_xact_req_data(const struct vmbus_xact *xact) +{ + + return (xact->x_req); +} + +bus_addr_t +vmbus_xact_req_paddr(const struct vmbus_xact *xact) +{ + + return (xact->x_req_dma.hv_paddr); +} + +void * +vmbus_xact_priv(const struct vmbus_xact *xact, size_t priv_len) +{ + + if (priv_len > xact->x_ctx->xc_priv_size) + panic("invalid priv size %zu", priv_len); + return (xact->x_priv); +} + +void +vmbus_xact_activate(struct vmbus_xact *xact) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + + KASSERT(xact->x_resp == NULL, ("xact has pending response")); + + mtx_lock(&ctx->xc_active_lock); + KASSERT(ctx->xc_active == NULL, ("pending active xact")); + ctx->xc_active = xact; + mtx_unlock(&ctx->xc_active_lock); +} + +void +vmbus_xact_deactivate(struct vmbus_xact *xact) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + + mtx_lock(&ctx->xc_active_lock); + KASSERT(ctx->xc_active == xact, ("xact mismatch")); + ctx->xc_active = NULL; + mtx_unlock(&ctx->xc_active_lock); +} + +const void * +vmbus_xact_wait(struct vmbus_xact *xact, size_t *resp_len) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + const void *resp; + + mtx_lock(&ctx->xc_active_lock); + + KASSERT(ctx->xc_active == xact, ("xact mismatch")); + while (xact->x_resp == NULL) { + mtx_sleep(&ctx->xc_active, &ctx->xc_active_lock, 0, + "wxact", 0); + } + ctx->xc_active = NULL; + + resp = xact->x_resp; + *resp_len = xact->x_resp_len; + + mtx_unlock(&ctx->xc_active_lock); + + return (resp); +} + +static void +vmbus_xact_save_resp(struct vmbus_xact *xact, const void *data, size_t dlen) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + size_t cplen = dlen; + + mtx_assert(&ctx->xc_active_lock, MA_OWNED); + + if (cplen > ctx->xc_resp_size) { + printf("vmbus: xact response truncated %zu -> %zu\n", + cplen, ctx->xc_resp_size); + cplen = ctx->xc_resp_size; + } + + KASSERT(ctx->xc_active == xact, ("xact mismatch")); + memcpy(xact->x_resp0, data, cplen); + xact->x_resp_len = cplen; + xact->x_resp = xact->x_resp0; +} + +void +vmbus_xact_wakeup(struct vmbus_xact *xact, const void *data, size_t dlen) +{ + struct vmbus_xact_ctx *ctx = xact->x_ctx; + + mtx_lock(&ctx->xc_active_lock); + vmbus_xact_save_resp(xact, data, dlen); + mtx_unlock(&ctx->xc_active_lock); + wakeup(&ctx->xc_active); +} + +void +vmbus_xact_ctx_wakeup(struct vmbus_xact_ctx *ctx, const void *data, size_t dlen) +{ + mtx_lock(&ctx->xc_active_lock); + KASSERT(ctx->xc_active != NULL, ("no pending xact")); + vmbus_xact_save_resp(ctx->xc_active, data, dlen); + mtx_unlock(&ctx->xc_active_lock); + wakeup(&ctx->xc_active); +} diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 6b89b869242e..37189753012e 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -230,18 +230,32 @@ static void vtnet_disable_interrupts(struct vtnet_softc *); static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); /* Tunables. */ +static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters"); static int vtnet_csum_disable = 0; TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); +SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN, + &vtnet_csum_disable, 0, "Disables receive and send checksum offload"); static int vtnet_tso_disable = 0; TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); +SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable, + 0, "Disables TCP Segmentation Offload"); static int vtnet_lro_disable = 0; TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); +SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable, + 0, "Disables TCP Large Receive Offload"); static int vtnet_mq_disable = 0; TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); -static int vtnet_mq_max_pairs = 0; +SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable, + 0, "Disables Multi Queue support"); +static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS; TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); +SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN, + &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs"); static int vtnet_rx_process_limit = 512; TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); +SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, + &vtnet_rx_process_limit, 0, + "Limits the number RX segments processed in a single pass"); static uma_zone_t vtnet_tx_header_zone; @@ -597,7 +611,6 @@ static void vtnet_setup_features(struct vtnet_softc *sc) { device_t dev; - int max_pairs, max; dev = sc->vtnet_dev; @@ -646,32 +659,31 @@ vtnet_setup_features(struct vtnet_softc *sc) if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { - max_pairs = virtio_read_dev_config_2(dev, + sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); - if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || - max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) - max_pairs = 1; } else - max_pairs = 1; + sc->vtnet_max_vq_pairs = 1; - if (max_pairs > 1) { + if (sc->vtnet_max_vq_pairs > 1) { /* - * Limit the maximum number of queue pairs to the number of - * CPUs or the configured maximum. The actual number of - * queues that get used may be less. + * Limit the maximum number of queue pairs to the lower of + * the number of CPUs and the configured maximum. + * The actual number of queues that get used may be less. */ - max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); - if (max > 0 && max_pairs > max) - max_pairs = max; - if (max_pairs > mp_ncpus) - max_pairs = mp_ncpus; - if (max_pairs > VTNET_MAX_QUEUE_PAIRS) - max_pairs = VTNET_MAX_QUEUE_PAIRS; - if (max_pairs > 1) - sc->vtnet_flags |= VTNET_FLAG_MULTIQ; - } + int max; - sc->vtnet_max_vq_pairs = max_pairs; + max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); + if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) { + if (max > mp_ncpus) + max = mp_ncpus; + if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) + max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX; + if (max > 1) { + sc->vtnet_requested_vq_pairs = max; + sc->vtnet_flags |= VTNET_FLAG_MULTIQ; + } + } + } } static int @@ -2982,13 +2994,11 @@ vtnet_set_active_vq_pairs(struct vtnet_softc *sc) dev = sc->vtnet_dev; if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { - MPASS(sc->vtnet_max_vq_pairs == 1); sc->vtnet_act_vq_pairs = 1; return; } - /* BMV: Just use the maximum configured for now. */ - npairs = sc->vtnet_max_vq_pairs; + npairs = sc->vtnet_requested_vq_pairs; if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { device_printf(dev, @@ -3852,6 +3862,9 @@ vtnet_setup_sysctl(struct vtnet_softc *sc) SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, "Maximum number of supported virtqueue pairs"); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs", + CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0, + "Requested number of virtqueue pairs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, "Number of active virtqueue pairs"); diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h index f89f6b11fb21..15436d983ca9 100644 --- a/sys/dev/virtio/network/if_vtnetvar.h +++ b/sys/dev/virtio/network/if_vtnetvar.h @@ -155,6 +155,7 @@ struct vtnet_softc { int vtnet_if_flags; int vtnet_act_vq_pairs; int vtnet_max_vq_pairs; + int vtnet_requested_vq_pairs; struct virtqueue *vtnet_ctrl_vq; struct vtnet_mac_filter *vtnet_mac_filter; diff --git a/sys/fs/autofs/autofs_vnops.c b/sys/fs/autofs/autofs_vnops.c index e53e38e55323..3dfdb559e9b3 100644 --- a/sys/fs/autofs/autofs_vnops.c +++ b/sys/fs/autofs/autofs_vnops.c @@ -329,6 +329,21 @@ autofs_mkdir(struct vop_mkdir_args *ap) return (error); } +static int +autofs_print(struct vop_print_args *ap) +{ + struct vnode *vp; + struct autofs_node *anp; + + vp = ap->a_vp; + anp = vp->v_data; + + printf(" name \"%s\", fileno %d, cached %d, wildcards %d\n", + anp->an_name, anp->an_fileno, anp->an_cached, anp->an_wildcards); + + return (0); +} + /* * Write out a single 'struct dirent', based on 'name' and 'fileno' arguments. */ @@ -529,6 +544,7 @@ struct vop_vector autofs_vnodeops = { .vop_link = VOP_EOPNOTSUPP, .vop_mkdir = autofs_mkdir, .vop_mknod = VOP_EOPNOTSUPP, + .vop_print = autofs_print, .vop_read = VOP_EOPNOTSUPP, .vop_readdir = autofs_readdir, .vop_remove = VOP_EOPNOTSUPP, diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c index 1a50ec8685fa..ad77f5edbf82 100644 --- a/sys/fs/nfs/nfs_commonkrpc.c +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -89,7 +89,7 @@ uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; NFSSTATESPINLOCK; NFSREQSPINLOCK; NFSDLOCKMUTEX; -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern int nfscl_ticks; extern void (*ncl_call_invalcaches)(struct vnode *); @@ -642,7 +642,7 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, procnum = NFSV4PROC_COMPOUND; if (nmp != NULL) { - NFSINCRGLOBAL(newnfsstats.rpcrequests); + NFSINCRGLOBAL(nfsstatsv1.rpcrequests); /* Map the procnum to the old NFSv2 one, as required. */ if ((nd->nd_flag & ND_NFSV2) != 0) { @@ -762,18 +762,18 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, if (stat == RPC_SUCCESS) { error = 0; } else if (stat == RPC_TIMEDOUT) { - NFSINCRGLOBAL(newnfsstats.rpctimeouts); + NFSINCRGLOBAL(nfsstatsv1.rpctimeouts); error = ETIMEDOUT; } else if (stat == RPC_VERSMISMATCH) { - NFSINCRGLOBAL(newnfsstats.rpcinvalid); + NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EOPNOTSUPP; } else if (stat == RPC_PROGVERSMISMATCH) { - NFSINCRGLOBAL(newnfsstats.rpcinvalid); + NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EPROTONOSUPPORT; } else if (stat == RPC_INTR) { error = EINTR; } else { - NFSINCRGLOBAL(newnfsstats.rpcinvalid); + NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EACCES; } if (error) { diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c index a22073f87c20..23a861d4a9b1 100644 --- a/sys/fs/nfs/nfs_commonport.c +++ b/sys/fs/nfs/nfs_commonport.c @@ -58,7 +58,7 @@ extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *, extern int nfsrv_useacl; struct mount nfsv4root_mnt; int newnfs_numnfsd = 0; -struct nfsstats newnfsstats; +struct nfsstatsv1 nfsstatsv1; int nfs_numnfscbd = 0; int nfscl_debuglevel = 0; char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; @@ -69,6 +69,7 @@ void (*ncl_call_invalcaches)(struct vnode *) = NULL; static int nfs_realign_test; static int nfs_realign_count; +static struct ext_nfsstats oldnfsstats; SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, @@ -446,9 +447,12 @@ nfssvc_nfscommon(struct thread *td, struct nfssvc_args *uap) static int nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { - int error = EINVAL; + int error = EINVAL, i, j; struct nfsd_idargs nid; struct nfsd_oidargs onid; + struct { + int vers; /* Just the first field of nfsstats. */ + } nfsstatver; if (uap->flag & NFSSVC_IDNAME) { if ((uap->flag & NFSSVC_NEWSTRUCT) != 0) @@ -472,63 +476,157 @@ nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) error = nfssvc_idname(&nid); goto out; } else if (uap->flag & NFSSVC_GETSTATS) { - error = copyout(&newnfsstats, - CAST_USER_ADDR_T(uap->argp), sizeof (newnfsstats)); + if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { + /* Copy fields to the old ext_nfsstat structure. */ + oldnfsstats.attrcache_hits = + nfsstatsv1.attrcache_hits; + oldnfsstats.attrcache_misses = + nfsstatsv1.attrcache_misses; + oldnfsstats.lookupcache_hits = + nfsstatsv1.lookupcache_hits; + oldnfsstats.lookupcache_misses = + nfsstatsv1.lookupcache_misses; + oldnfsstats.direofcache_hits = + nfsstatsv1.direofcache_hits; + oldnfsstats.direofcache_misses = + nfsstatsv1.direofcache_misses; + oldnfsstats.accesscache_hits = + nfsstatsv1.accesscache_hits; + oldnfsstats.accesscache_misses = + nfsstatsv1.accesscache_misses; + oldnfsstats.biocache_reads = + nfsstatsv1.biocache_reads; + oldnfsstats.read_bios = + nfsstatsv1.read_bios; + oldnfsstats.read_physios = + nfsstatsv1.read_physios; + oldnfsstats.biocache_writes = + nfsstatsv1.biocache_writes; + oldnfsstats.write_bios = + nfsstatsv1.write_bios; + oldnfsstats.write_physios = + nfsstatsv1.write_physios; + oldnfsstats.biocache_readlinks = + nfsstatsv1.biocache_readlinks; + oldnfsstats.readlink_bios = + nfsstatsv1.readlink_bios; + oldnfsstats.biocache_readdirs = + nfsstatsv1.biocache_readdirs; + oldnfsstats.readdir_bios = + nfsstatsv1.readdir_bios; + for (i = 0; i < NFSV4_NPROCS; i++) + oldnfsstats.rpccnt[i] = nfsstatsv1.rpccnt[i]; + oldnfsstats.rpcretries = nfsstatsv1.rpcretries; + for (i = 0; i < NFSV4OP_NOPS; i++) + oldnfsstats.srvrpccnt[i] = + nfsstatsv1.srvrpccnt[i]; + for (i = NFSV42_NOPS, j = NFSV4OP_NOPS; + i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) + oldnfsstats.srvrpccnt[j] = + nfsstatsv1.srvrpccnt[i]; + oldnfsstats.srvrpc_errs = nfsstatsv1.srvrpc_errs; + oldnfsstats.srv_errs = nfsstatsv1.srv_errs; + oldnfsstats.rpcrequests = nfsstatsv1.rpcrequests; + oldnfsstats.rpctimeouts = nfsstatsv1.rpctimeouts; + oldnfsstats.rpcunexpected = nfsstatsv1.rpcunexpected; + oldnfsstats.rpcinvalid = nfsstatsv1.rpcinvalid; + oldnfsstats.srvcache_inproghits = + nfsstatsv1.srvcache_inproghits; + oldnfsstats.srvcache_idemdonehits = + nfsstatsv1.srvcache_idemdonehits; + oldnfsstats.srvcache_nonidemdonehits = + nfsstatsv1.srvcache_nonidemdonehits; + oldnfsstats.srvcache_misses = + nfsstatsv1.srvcache_misses; + oldnfsstats.srvcache_tcppeak = + nfsstatsv1.srvcache_tcppeak; + oldnfsstats.srvcache_size = nfsstatsv1.srvcache_size; + oldnfsstats.srvclients = nfsstatsv1.srvclients; + oldnfsstats.srvopenowners = nfsstatsv1.srvopenowners; + oldnfsstats.srvopens = nfsstatsv1.srvopens; + oldnfsstats.srvlockowners = nfsstatsv1.srvlockowners; + oldnfsstats.srvlocks = nfsstatsv1.srvlocks; + oldnfsstats.srvdelegates = nfsstatsv1.srvdelegates; + for (i = 0; i < NFSV4OP_CBNOPS; i++) + oldnfsstats.cbrpccnt[i] = + nfsstatsv1.cbrpccnt[i]; + oldnfsstats.clopenowners = nfsstatsv1.clopenowners; + oldnfsstats.clopens = nfsstatsv1.clopens; + oldnfsstats.cllockowners = nfsstatsv1.cllockowners; + oldnfsstats.cllocks = nfsstatsv1.cllocks; + oldnfsstats.cldelegates = nfsstatsv1.cldelegates; + oldnfsstats.cllocalopenowners = + nfsstatsv1.cllocalopenowners; + oldnfsstats.cllocalopens = nfsstatsv1.cllocalopens; + oldnfsstats.cllocallockowners = + nfsstatsv1.cllocallockowners; + oldnfsstats.cllocallocks = nfsstatsv1.cllocallocks; + error = copyout(&oldnfsstats, uap->argp, + sizeof (oldnfsstats)); + } else { + error = copyin(uap->argp, &nfsstatver, + sizeof(nfsstatver)); + if (error == 0 && nfsstatver.vers != NFSSTATS_V1) + error = EPERM; + if (error == 0) + error = copyout(&nfsstatsv1, uap->argp, + sizeof (nfsstatsv1)); + } if (error == 0) { if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { - newnfsstats.attrcache_hits = 0; - newnfsstats.attrcache_misses = 0; - newnfsstats.lookupcache_hits = 0; - newnfsstats.lookupcache_misses = 0; - newnfsstats.direofcache_hits = 0; - newnfsstats.direofcache_misses = 0; - newnfsstats.accesscache_hits = 0; - newnfsstats.accesscache_misses = 0; - newnfsstats.biocache_reads = 0; - newnfsstats.read_bios = 0; - newnfsstats.read_physios = 0; - newnfsstats.biocache_writes = 0; - newnfsstats.write_bios = 0; - newnfsstats.write_physios = 0; - newnfsstats.biocache_readlinks = 0; - newnfsstats.readlink_bios = 0; - newnfsstats.biocache_readdirs = 0; - newnfsstats.readdir_bios = 0; - newnfsstats.rpcretries = 0; - newnfsstats.rpcrequests = 0; - newnfsstats.rpctimeouts = 0; - newnfsstats.rpcunexpected = 0; - newnfsstats.rpcinvalid = 0; - bzero(newnfsstats.rpccnt, - sizeof(newnfsstats.rpccnt)); + nfsstatsv1.attrcache_hits = 0; + nfsstatsv1.attrcache_misses = 0; + nfsstatsv1.lookupcache_hits = 0; + nfsstatsv1.lookupcache_misses = 0; + nfsstatsv1.direofcache_hits = 0; + nfsstatsv1.direofcache_misses = 0; + nfsstatsv1.accesscache_hits = 0; + nfsstatsv1.accesscache_misses = 0; + nfsstatsv1.biocache_reads = 0; + nfsstatsv1.read_bios = 0; + nfsstatsv1.read_physios = 0; + nfsstatsv1.biocache_writes = 0; + nfsstatsv1.write_bios = 0; + nfsstatsv1.write_physios = 0; + nfsstatsv1.biocache_readlinks = 0; + nfsstatsv1.readlink_bios = 0; + nfsstatsv1.biocache_readdirs = 0; + nfsstatsv1.readdir_bios = 0; + nfsstatsv1.rpcretries = 0; + nfsstatsv1.rpcrequests = 0; + nfsstatsv1.rpctimeouts = 0; + nfsstatsv1.rpcunexpected = 0; + nfsstatsv1.rpcinvalid = 0; + bzero(nfsstatsv1.rpccnt, + sizeof(nfsstatsv1.rpccnt)); } if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) { - newnfsstats.srvrpc_errs = 0; - newnfsstats.srv_errs = 0; - newnfsstats.srvcache_inproghits = 0; - newnfsstats.srvcache_idemdonehits = 0; - newnfsstats.srvcache_nonidemdonehits = 0; - newnfsstats.srvcache_misses = 0; - newnfsstats.srvcache_tcppeak = 0; - newnfsstats.srvclients = 0; - newnfsstats.srvopenowners = 0; - newnfsstats.srvopens = 0; - newnfsstats.srvlockowners = 0; - newnfsstats.srvlocks = 0; - newnfsstats.srvdelegates = 0; - newnfsstats.clopenowners = 0; - newnfsstats.clopens = 0; - newnfsstats.cllockowners = 0; - newnfsstats.cllocks = 0; - newnfsstats.cldelegates = 0; - newnfsstats.cllocalopenowners = 0; - newnfsstats.cllocalopens = 0; - newnfsstats.cllocallockowners = 0; - newnfsstats.cllocallocks = 0; - bzero(newnfsstats.srvrpccnt, - sizeof(newnfsstats.srvrpccnt)); - bzero(newnfsstats.cbrpccnt, - sizeof(newnfsstats.cbrpccnt)); + nfsstatsv1.srvrpc_errs = 0; + nfsstatsv1.srv_errs = 0; + nfsstatsv1.srvcache_inproghits = 0; + nfsstatsv1.srvcache_idemdonehits = 0; + nfsstatsv1.srvcache_nonidemdonehits = 0; + nfsstatsv1.srvcache_misses = 0; + nfsstatsv1.srvcache_tcppeak = 0; + nfsstatsv1.srvclients = 0; + nfsstatsv1.srvopenowners = 0; + nfsstatsv1.srvopens = 0; + nfsstatsv1.srvlockowners = 0; + nfsstatsv1.srvlocks = 0; + nfsstatsv1.srvdelegates = 0; + nfsstatsv1.clopenowners = 0; + nfsstatsv1.clopens = 0; + nfsstatsv1.cllockowners = 0; + nfsstatsv1.cllocks = 0; + nfsstatsv1.cldelegates = 0; + nfsstatsv1.cllocalopenowners = 0; + nfsstatsv1.cllocalopens = 0; + nfsstatsv1.cllocallockowners = 0; + nfsstatsv1.cllocallocks = 0; + bzero(nfsstatsv1.srvrpccnt, + sizeof(nfsstatsv1.srvrpccnt)); + bzero(nfsstatsv1.cbrpccnt, + sizeof(nfsstatsv1.cbrpccnt)); } } goto out; diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index 6b41e2fbd915..3bfbb1c01f68 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -254,24 +255,26 @@ /* * Must be one more than last op#. + * NFSv4.2 isn't implemented yet, but define the op# limit for it. */ #define NFSV41_NOPS 59 +#define NFSV42_NOPS 72 /* Quirky case if the illegal op code */ #define NFSV4OP_OPILLEGAL 10044 /* - * Fake NFSV4OP_xxx used for nfsstat. Start at NFSV4OP_NOPS. + * Fake NFSV4OP_xxx used for nfsstat. Start at NFSV42_NOPS. */ -#define NFSV4OP_SYMLINK (NFSV4OP_NOPS) -#define NFSV4OP_MKDIR (NFSV4OP_NOPS + 1) -#define NFSV4OP_RMDIR (NFSV4OP_NOPS + 2) -#define NFSV4OP_READDIRPLUS (NFSV4OP_NOPS + 3) -#define NFSV4OP_MKNOD (NFSV4OP_NOPS + 4) -#define NFSV4OP_FSSTAT (NFSV4OP_NOPS + 5) -#define NFSV4OP_FSINFO (NFSV4OP_NOPS + 6) -#define NFSV4OP_PATHCONF (NFSV4OP_NOPS + 7) -#define NFSV4OP_V3CREATE (NFSV4OP_NOPS + 8) +#define NFSV4OP_SYMLINK (NFSV42_NOPS) +#define NFSV4OP_MKDIR (NFSV42_NOPS + 1) +#define NFSV4OP_RMDIR (NFSV42_NOPS + 2) +#define NFSV4OP_READDIRPLUS (NFSV42_NOPS + 3) +#define NFSV4OP_MKNOD (NFSV42_NOPS + 4) +#define NFSV4OP_FSSTAT (NFSV42_NOPS + 5) +#define NFSV4OP_FSINFO (NFSV42_NOPS + 6) +#define NFSV4OP_PATHCONF (NFSV42_NOPS + 7) +#define NFSV4OP_V3CREATE (NFSV42_NOPS + 8) /* * This is the count of the fake operations listed above. @@ -285,12 +288,12 @@ #define NFSV4OP_CBRECALL 4 /* - * Must be one greater than the last Callback Operation#. + * Must be one greater than the last Callback Operation# for NFSv4.0. */ #define NFSV4OP_CBNOPS 5 /* - * Additional Callback Ops for NFSv4.1 only. Not yet in nfsstats. + * Additional Callback Ops for NFSv4.1 only. */ #define NFSV4OP_CBLAYOUTRECALL 5 #define NFSV4OP_CBNOTIFY 6 @@ -303,6 +306,9 @@ #define NFSV4OP_CBNOTIFYLOCK 13 #define NFSV4OP_CBNOTIFYDEVID 14 +#define NFSV41_CBNOPS 15 +#define NFSV42_CBNOPS 16 + /* * The lower numbers -> 21 are used by NFSv2 and v3. These define higher * numbers used by NFSv4. @@ -360,7 +366,72 @@ #endif /* NFS_V3NPROCS */ /* - * Stats structure + * New stats structure. + * The vers field will be set to NFSSTATS_V1 by the caller. + */ +#define NFSSTATS_V1 1 +struct nfsstatsv1 { + int vers; /* Set to version requested by caller. */ + uint64_t attrcache_hits; + uint64_t attrcache_misses; + uint64_t lookupcache_hits; + uint64_t lookupcache_misses; + uint64_t direofcache_hits; + uint64_t direofcache_misses; + uint64_t accesscache_hits; + uint64_t accesscache_misses; + uint64_t biocache_reads; + uint64_t read_bios; + uint64_t read_physios; + uint64_t biocache_writes; + uint64_t write_bios; + uint64_t write_physios; + uint64_t biocache_readlinks; + uint64_t readlink_bios; + uint64_t biocache_readdirs; + uint64_t readdir_bios; + uint64_t rpccnt[NFSV41_NPROCS + 15]; + uint64_t rpcretries; + uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + uint64_t srvrpc_errs; + uint64_t srv_errs; + uint64_t rpcrequests; + uint64_t rpctimeouts; + uint64_t rpcunexpected; + uint64_t rpcinvalid; + uint64_t srvcache_inproghits; + uint64_t srvcache_idemdonehits; + uint64_t srvcache_nonidemdonehits; + uint64_t srvcache_misses; + uint64_t srvcache_tcppeak; + int srvcache_size; /* Updated by atomic_xx_int(). */ + uint64_t srvclients; + uint64_t srvopenowners; + uint64_t srvopens; + uint64_t srvlockowners; + uint64_t srvlocks; + uint64_t srvdelegates; + uint64_t cbrpccnt[NFSV42_CBNOPS]; + uint64_t clopenowners; + uint64_t clopens; + uint64_t cllockowners; + uint64_t cllocks; + uint64_t cldelegates; + uint64_t cllocalopenowners; + uint64_t cllocalopens; + uint64_t cllocallockowners; + uint64_t cllocallocks; + uint64_t srvstartcnt; + uint64_t srvdonecnt; + uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + uint64_t srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + struct bintime srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS]; + struct bintime busyfrom; + struct bintime busytime; +}; + +/* + * Old stats structure. */ struct ext_nfsstats { int attrcache_hits; @@ -415,11 +486,6 @@ struct ext_nfsstats { }; #ifdef _KERNEL -/* - * Define the ext_nfsstats as nfsstats for the kernel code. - */ -#define nfsstats ext_nfsstats - /* * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. */ diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index 08e7b24b5b1d..daa49a09eec1 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -345,10 +345,10 @@ /* * NFSPROC_NOOP is a fake op# that can't be the same as any V2/3/4 Procedure - * or Operation#. Since the NFS V4 Op #s go higher, use NFSV41_NOPS, which + * or Operation#. Since the NFS V4 Op #s go higher, use NFSV42_NOPS, which * is one greater than the highest Op#. */ -#define NFSPROC_NOOP NFSV41_NOPS +#define NFSPROC_NOOP NFSV42_NOPS /* Actual Version 2 procedure numbers */ #define NFSV2PROC_NULL 0 diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c index c804393ed8ed..492a072f4bdc 100644 --- a/sys/fs/nfsclient/nfs_clbio.c +++ b/sys/fs/nfsclient/nfs_clbio.c @@ -60,7 +60,7 @@ __FBSDID("$FreeBSD$"); #include extern int newnfs_directio_allow_mmap; -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct mtx ncl_iod_mutex; extern int ncl_numasync; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; @@ -466,7 +466,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) switch (vp->v_type) { case VREG: - NFSINCRGLOBAL(newnfsstats.biocache_reads); + NFSINCRGLOBAL(nfsstatsv1.biocache_reads); lbn = uio->uio_offset / biosize; on = uio->uio_offset - (lbn * biosize); @@ -543,7 +543,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) n = MIN((unsigned)(bcount - on), uio->uio_resid); break; case VLNK: - NFSINCRGLOBAL(newnfsstats.biocache_readlinks); + NFSINCRGLOBAL(nfsstatsv1.biocache_readlinks); bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, td); if (!bp) { error = newnfs_sigintr(nmp, td); @@ -563,7 +563,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) on = 0; break; case VDIR: - NFSINCRGLOBAL(newnfsstats.biocache_readdirs); + NFSINCRGLOBAL(nfsstatsv1.biocache_readdirs); if (np->n_direofoffset && uio->uio_offset >= np->n_direofoffset) { return (0); @@ -992,7 +992,7 @@ ncl_write(struct vop_write_args *ap) } } - NFSINCRGLOBAL(newnfsstats.biocache_writes); + NFSINCRGLOBAL(nfsstatsv1.biocache_writes); lbn = uio->uio_offset / biosize; on = uio->uio_offset - (lbn * biosize); n = MIN((unsigned)(biosize - on), uio->uio_resid); @@ -1606,7 +1606,7 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td, switch (vp->v_type) { case VREG: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; - NFSINCRGLOBAL(newnfsstats.read_bios); + NFSINCRGLOBAL(nfsstatsv1.read_bios); error = ncl_readrpc(vp, uiop, cr); if (!error) { @@ -1641,11 +1641,11 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td, break; case VLNK: uiop->uio_offset = (off_t)0; - NFSINCRGLOBAL(newnfsstats.readlink_bios); + NFSINCRGLOBAL(nfsstatsv1.readlink_bios); error = ncl_readlinkrpc(vp, uiop, cr); break; case VDIR: - NFSINCRGLOBAL(newnfsstats.readdir_bios); + NFSINCRGLOBAL(nfsstatsv1.readdir_bios); uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; if ((nmp->nm_flag & NFSMNT_RDIRPLUS) != 0) { error = ncl_readdirplusrpc(vp, uiop, cr, td); @@ -1707,7 +1707,7 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td, + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; - NFSINCRGLOBAL(newnfsstats.write_bios); + NFSINCRGLOBAL(nfsstatsv1.write_bios); if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC) iomode = NFSWRITE_UNSTABLE; diff --git a/sys/fs/nfsclient/nfs_clcomsubs.c b/sys/fs/nfsclient/nfs_clcomsubs.c index 1fc7d1b7ce41..e108b4bf9553 100644 --- a/sys/fs/nfsclient/nfs_clcomsubs.c +++ b/sys/fs/nfsclient/nfs_clcomsubs.c @@ -42,7 +42,7 @@ __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; extern int ncl_mbuf_mlen; extern enum vtype newnv2tov_type[8]; @@ -241,8 +241,8 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } - if (procnum < NFSV4_NPROCS) - NFSINCRGLOBAL(newnfsstats.rpccnt[procnum]); + if (procnum < NFSV41_NPROCS) + NFSINCRGLOBAL(nfsstatsv1.rpccnt[procnum]); } #ifndef APPLE diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index 20adaf1cddd7..e5b871f99579 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -84,7 +84,7 @@ __FBSDID("$FreeBSD$"); /* * Global variables */ -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern u_int32_t newnfs_false, newnfs_true; extern int nfscl_debuglevel; @@ -343,10 +343,10 @@ nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp, nowp->nfsow_defunct = 0; nfscl_lockinit(&nowp->nfsow_rwlock); if (dp != NULL) { - newnfsstats.cllocalopenowners++; + nfsstatsv1.cllocalopenowners++; LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list); } else { - newnfsstats.clopenowners++; + nfsstatsv1.clopenowners++; LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list); } owp = *owpp = nowp; @@ -380,9 +380,9 @@ nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp, TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; - newnfsstats.cllocalopens++; + nfsstatsv1.cllocalopens++; } else { - newnfsstats.clopens++; + nfsstatsv1.clopens++; } LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list); *opp = nop; @@ -430,7 +430,7 @@ nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp, LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp, nfsdl_hash); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; - newnfsstats.cldelegates++; + nfsstatsv1.cldelegates++; nfscl_delegcnt++; } else { /* @@ -1071,10 +1071,10 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, LIST_INIT(&nlp->nfsl_lock); if (donelocally) { nlp->nfsl_open = NULL; - newnfsstats.cllocallockowners++; + nfsstatsv1.cllocallockowners++; } else { nlp->nfsl_open = op; - newnfsstats.cllockowners++; + nfsstatsv1.cllockowners++; } LIST_INSERT_HEAD(lhp, nlp, nfsl_list); lp = nlp; @@ -1402,9 +1402,9 @@ nfscl_freeopen(struct nfsclopen *op, int local) nfscl_freealllocks(&op->nfso_lock, local); FREE((caddr_t)op, M_NFSCLOPEN); if (local) - newnfsstats.cllocalopens--; + nfsstatsv1.cllocalopens--; else - newnfsstats.clopens--; + nfsstatsv1.clopens--; } /* @@ -1483,9 +1483,9 @@ nfscl_freeopenowner(struct nfsclowner *owp, int local) LIST_REMOVE(owp, nfsow_list); FREE((caddr_t)owp, M_NFSCLOWNER); if (local) - newnfsstats.cllocalopenowners--; + nfsstatsv1.cllocalopenowners--; else - newnfsstats.clopenowners--; + nfsstatsv1.clopenowners--; } /* @@ -1502,9 +1502,9 @@ nfscl_freelockowner(struct nfscllockowner *lp, int local) } FREE((caddr_t)lp, M_NFSCLLOCKOWNER); if (local) - newnfsstats.cllocallockowners--; + nfsstatsv1.cllocallockowners--; else - newnfsstats.cllockowners--; + nfsstatsv1.cllockowners--; } /* @@ -1517,9 +1517,9 @@ nfscl_freelock(struct nfscllock *lop, int local) LIST_REMOVE(lop, nfslo_list); FREE((caddr_t)lop, M_NFSCLLOCK); if (local) - newnfsstats.cllocallocks--; + nfsstatsv1.cllocallocks--; else - newnfsstats.cllocks--; + nfsstatsv1.cllocks--; } /* @@ -1553,7 +1553,7 @@ nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp) TAILQ_REMOVE(hdp, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); FREE((caddr_t)dp, M_NFSCLDELEG); - newnfsstats.cldelegates--; + nfsstatsv1.cldelegates--; nfscl_delegcnt--; } @@ -1621,18 +1621,18 @@ nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp, LIST_REMOVE(op, nfso_list); op->nfso_own = towp; LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list); - newnfsstats.cllocalopens--; - newnfsstats.clopens++; + nfsstatsv1.cllocalopens--; + nfsstatsv1.clopens++; } } else { /* Just add the openowner to the client list */ LIST_REMOVE(owp, nfsow_list); owp->nfsow_clp = clp; LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list); - newnfsstats.cllocalopenowners--; - newnfsstats.clopenowners++; - newnfsstats.cllocalopens--; - newnfsstats.clopens++; + nfsstatsv1.cllocalopenowners--; + nfsstatsv1.clopenowners++; + nfsstatsv1.cllocalopens--; + nfsstatsv1.clopens++; } } owp = nowp; @@ -2282,9 +2282,9 @@ nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop, else LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list); if (local) - newnfsstats.cllocallocks++; + nfsstatsv1.cllocallocks++; else - newnfsstats.cllocks++; + nfsstatsv1.cllocks++; } /* @@ -2571,7 +2571,7 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; - newnfsstats.cldelegates--; + nfsstatsv1.cldelegates--; } NFSLOCKCLSTATE(); } @@ -2612,7 +2612,7 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; - newnfsstats.cldelegates--; + nfsstatsv1.cldelegates--; } } dp = ndp; @@ -3215,8 +3215,8 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) break; } nd->nd_procnum = op; - if (op < NFSV4OP_CBNOPS) - newnfsstats.cbrpccnt[nd->nd_procnum]++; + if (op < NFSV41_CBNOPS) + nfsstatsv1.cbrpccnt[nd->nd_procnum]++; switch (op) { case NFSV4OP_CBGETATTR: NFSCL_DEBUG(4, "cbgetattr\n"); diff --git a/sys/fs/nfsclient/nfs_clsubs.c b/sys/fs/nfsclient/nfs_clsubs.c index 836a183f29e2..a8e94fc2872e 100644 --- a/sys/fs/nfsclient/nfs_clsubs.c +++ b/sys/fs/nfsclient/nfs_clsubs.c @@ -83,7 +83,7 @@ extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; extern int ncl_numasync; extern unsigned int ncl_iodmax; -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; struct task ncl_nfsiodnew_task; @@ -219,12 +219,12 @@ ncl_getattrcache(struct vnode *vp, struct vattr *vaper) if ((time_second - np->n_attrstamp) >= timeo && (mustflush != 0 || np->n_attrstamp == 0)) { - newnfsstats.attrcache_misses++; + nfsstatsv1.attrcache_misses++; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_GET_MISS(vp); return( ENOENT); } - newnfsstats.attrcache_hits++; + nfsstatsv1.attrcache_hits++; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index a3e2c7096d10..524a372d897a 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -78,7 +78,6 @@ FEATURE(nfscl, "NFSv4 client"); extern int nfscl_ticks; extern struct timeval nfsboottime; -extern struct nfsstats newnfsstats; extern int nfsrv_useacl; extern int nfscl_debuglevel; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 55e514befaa4..69c1fe7848c5 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -100,7 +100,7 @@ uint32_t nfscl_accesscache_load_done_id; #define TRUE 1 #define FALSE 0 -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_useacl; extern int nfscl_debuglevel; MALLOC_DECLARE(M_NEWNFSREQ); @@ -258,14 +258,6 @@ int newnfs_directio_allow_mmap = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); -#if 0 -SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, - &newnfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); - -SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, - &newnfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); -#endif - #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ | NFSACCESS_DELETE | NFSACCESS_LOOKUP) @@ -418,7 +410,7 @@ nfs_access(struct vop_access_args *ap) if (time_second < (np->n_accesscache[i].stamp + nfsaccess_cache_timeout) && (np->n_accesscache[i].mode & mode) == mode) { - NFSINCRGLOBAL(newnfsstats.accesscache_hits); + NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); gotahit = 1; } break; @@ -437,7 +429,7 @@ nfs_access(struct vop_access_args *ap) /* * Either a no, or a don't know. Go to the wire. */ - NFSINCRGLOBAL(newnfsstats.accesscache_misses); + NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); error = nfs34_access_otw(vp, wmode, ap->a_td, ap->a_cred, &rmode); if (!error && @@ -857,7 +849,7 @@ nfs_getattr(struct vop_getattr_args *ap) if (NFS_ISV34(vp) && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) { - NFSINCRGLOBAL(newnfsstats.accesscache_misses); + NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); if (ncl_getattrcache(vp, ap->a_vap) == 0) { nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); @@ -1114,7 +1106,7 @@ nfs_lookup(struct vop_lookup_args *ap) ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_ctime, &nctime, ==))) { - NFSINCRGLOBAL(newnfsstats.lookupcache_hits); + NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; @@ -1141,7 +1133,7 @@ nfs_lookup(struct vop_lookup_args *ap) if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_mtime, &nctime, ==)) { - NFSINCRGLOBAL(newnfsstats.lookupcache_hits); + NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); return (ENOENT); } cache_purge_negative(dvp); @@ -1149,7 +1141,7 @@ nfs_lookup(struct vop_lookup_args *ap) error = 0; newvp = NULLVP; - NFSINCRGLOBAL(newnfsstats.lookupcache_misses); + NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); @@ -2227,7 +2219,7 @@ nfs_readdir(struct vop_readdir_args *ap) if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { mtx_unlock(&np->n_mtx); - NFSINCRGLOBAL(newnfsstats.direofcache_hits); + NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); if (ap->a_eofflag != NULL) *ap->a_eofflag = 1; return (0); @@ -2254,7 +2246,7 @@ nfs_readdir(struct vop_readdir_args *ap) error = ncl_bioread(vp, uio, 0, ap->a_cred); if (!error && uio->uio_resid == tresid) { - NFSINCRGLOBAL(newnfsstats.direofcache_misses); + NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); if (ap->a_eofflag != NULL) *ap->a_eofflag = 1; } diff --git a/sys/fs/nfsserver/nfs_nfsdcache.c b/sys/fs/nfsserver/nfs_nfsdcache.c index 0f78b3f06ec1..0b7bf8f3e1fb 100644 --- a/sys/fs/nfsserver/nfs_nfsdcache.c +++ b/sys/fs/nfsserver/nfs_nfsdcache.c @@ -159,7 +159,7 @@ __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct mtx nfsrc_udpmtx; extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; extern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; @@ -318,8 +318,8 @@ nfsrvd_initcache(void) TAILQ_INIT(&nfsrvudplru); nfsrc_tcpsavedreplies = 0; nfsrc_udpcachesize = 0; - newnfsstats.srvcache_tcppeak = 0; - newnfsstats.srvcache_size = 0; + nfsstatsv1.srvcache_tcppeak = 0; + nfsstatsv1.srvcache_size = 0; } /* @@ -395,14 +395,14 @@ nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); if (rp->rc_flag & RC_INPROG) { - newnfsstats.srvcache_inproghits++; + nfsstatsv1.srvcache_inproghits++; mtx_unlock(mutex); ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { /* * V2 only. */ - newnfsstats.srvcache_nonidemdonehits++; + nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); nfsrvd_rephead(nd); *(nd->nd_errp) = rp->rc_status; @@ -410,7 +410,7 @@ nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_UDPTIMEOUT; } else if (rp->rc_flag & RC_REPMBUF) { - newnfsstats.srvcache_nonidemdonehits++; + nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAITOK); @@ -425,8 +425,8 @@ nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) goto out; } } - newnfsstats.srvcache_misses++; - atomic_add_int(&newnfsstats.srvcache_size, 1); + nfsstatsv1.srvcache_misses++; + atomic_add_int(&nfsstatsv1.srvcache_size, 1); nfsrc_udpcachesize++; newrp->rc_flag |= RC_INPROG; @@ -480,7 +480,7 @@ nfsrvd_updatecache(struct nfsrv_descript *nd) * Reply from cache is a special case returned by nfsrv_checkseqid(). */ if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { - newnfsstats.srvcache_nonidemdonehits++; + nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); nd->nd_repstat = 0; if (nd->nd_mreq) @@ -519,8 +519,8 @@ nfsrvd_updatecache(struct nfsrv_descript *nd) if (!(rp->rc_flag & RC_UDP)) { atomic_add_int(&nfsrc_tcpsavedreplies, 1); if (nfsrc_tcpsavedreplies > - newnfsstats.srvcache_tcppeak) - newnfsstats.srvcache_tcppeak = + nfsstatsv1.srvcache_tcppeak) + nfsstatsv1.srvcache_tcppeak = nfsrc_tcpsavedreplies; } mtx_unlock(mutex); @@ -678,7 +678,7 @@ nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) panic("nfs tcp cache0"); rp->rc_flag |= RC_LOCKED; if (rp->rc_flag & RC_INPROG) { - newnfsstats.srvcache_inproghits++; + nfsstatsv1.srvcache_inproghits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -687,7 +687,7 @@ nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) /* * V2 only. */ - newnfsstats.srvcache_nonidemdonehits++; + nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -696,7 +696,7 @@ nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) *(nd->nd_errp) = rp->rc_status; rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; } else if (rp->rc_flag & RC_REPMBUF) { - newnfsstats.srvcache_nonidemdonehits++; + nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -711,8 +711,8 @@ nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) free((caddr_t)newrp, M_NFSRVCACHE); goto out; } - newnfsstats.srvcache_misses++; - atomic_add_int(&newnfsstats.srvcache_size, 1); + nfsstatsv1.srvcache_misses++; + atomic_add_int(&nfsstatsv1.srvcache_size, 1); /* * For TCP, multiple entries for a key are allowed, so don't @@ -801,7 +801,7 @@ nfsrc_freecache(struct nfsrvcache *rp) atomic_add_int(&nfsrc_tcpsavedreplies, -1); } FREE((caddr_t)rp, M_NFSRVCACHE); - atomic_add_int(&newnfsstats.srvcache_size, -1); + atomic_add_int(&nfsstatsv1.srvcache_size, -1); } /* @@ -825,7 +825,7 @@ nfsrvd_cleancache(void) nfsrc_freecache(rp); } } - newnfsstats.srvcache_size = 0; + nfsstatsv1.srvcache_size = 0; mtx_unlock(&nfsrc_udpmtx); nfsrc_tcpsavedreplies = 0; } diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 2f6782994c33..52896a844708 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -62,6 +62,7 @@ extern struct nfsclienthashhead *nfsclienthash; extern struct nfslockhashhead *nfslockhash; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; +extern struct nfsstatsv1 nfsstatsv1; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; @@ -686,6 +687,8 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, uiop->uio_td = NULL; nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; + /* XXX KDM make this more systematic? */ + nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); FREE((caddr_t)iv2, M_TEMP); if (error) { @@ -758,6 +761,8 @@ nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable, uiop->uio_offset = off; nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; + /* XXX KDM make this more systematic? */ + nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c index e09116cf2caa..f45bba4986d3 100644 --- a/sys/fs/nfsserver/nfs_nfsdsocket.c +++ b/sys/fs/nfsserver/nfs_nfsdsocket.c @@ -41,7 +41,7 @@ __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern struct nfsrvfh nfs_pubfh, nfs_rootfh; extern int nfs_pubfhset, nfs_rootfhset; extern struct nfsv4lock nfsv4rootfs_lock; @@ -400,6 +400,68 @@ static int nfsv3to4op[NFS_V3NPROCS] = { NFSV4OP_COMMIT, }; +static struct mtx nfsrvd_statmtx; +MTX_SYSINIT(nfsst, &nfsrvd_statmtx, "NFSstat", MTX_DEF); + +static void +nfsrvd_statstart(int op, struct bintime *now) +{ + if (op > (NFSV42_NOPS + NFSV4OP_FAKENOPS)) { + printf("%s: op %d invalid\n", __func__, op); + return; + } + + mtx_lock(&nfsrvd_statmtx); + if (nfsstatsv1.srvstartcnt == nfsstatsv1.srvdonecnt) { + if (now != NULL) + nfsstatsv1.busyfrom = *now; + else + binuptime(&nfsstatsv1.busyfrom); + + } + nfsstatsv1.srvrpccnt[op]++; + nfsstatsv1.srvstartcnt++; + mtx_unlock(&nfsrvd_statmtx); + +} + +static void +nfsrvd_statend(int op, uint64_t bytes, struct bintime *now, + struct bintime *then) +{ + struct bintime dt, lnow; + + if (op > (NFSV42_NOPS + NFSV4OP_FAKENOPS)) { + printf("%s: op %d invalid\n", __func__, op); + return; + } + + if (now == NULL) { + now = &lnow; + binuptime(now); + } + + mtx_lock(&nfsrvd_statmtx); + + nfsstatsv1.srvbytes[op] += bytes; + nfsstatsv1.srvops[op]++; + + if (then != NULL) { + dt = *now; + bintime_sub(&dt, then); + bintime_add(&nfsstatsv1.srvduration[op], &dt); + } + + dt = *now; + bintime_sub(&dt, &nfsstatsv1.busyfrom); + bintime_add(&nfsstatsv1.busytime, &dt); + nfsstatsv1.busyfrom = *now; + + nfsstatsv1.srvdonecnt++; + + mtx_unlock(&nfsrvd_statmtx); +} + /* * Do an RPC. Basically, get the file handles translated to vnode pointers * and then call the appropriate server routine. The server routines are @@ -476,7 +538,9 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, */ if (nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) { *nd->nd_errp = nfsd_errmap(nd); - NFSINCRGLOBAL(newnfsstats.srvrpccnt[nfsv3to4op[nd->nd_procnum]]); + nfsrvd_statstart(nfsv3to4op[nd->nd_procnum], /*now*/ NULL); + nfsrvd_statend(nfsv3to4op[nd->nd_procnum], /*bytes*/ 0, + /*now*/ NULL, /*then*/ NULL); if (mp != NULL && nfs_writerpc[nd->nd_procnum] != 0) vn_finished_write(mp); goto out; @@ -491,6 +555,11 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, if (nd->nd_flag & ND_NFSV4) { nfsrvd_compound(nd, isdgram, tag, taglen, minorvers, p); } else { + struct bintime start_time; + + binuptime(&start_time); + nfsrvd_statstart(nfsv3to4op[nd->nd_procnum], &start_time); + if (nfs_retfh[nd->nd_procnum] == 1) { if (vp) NFSVOPUNLOCK(vp, 0); @@ -505,7 +574,9 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, } if (mp != NULL && nfs_writerpc[nd->nd_procnum] != 0) vn_finished_write(mp); - NFSINCRGLOBAL(newnfsstats.srvrpccnt[nfsv3to4op[nd->nd_procnum]]); + + nfsrvd_statend(nfsv3to4op[nd->nd_procnum], /*bytes*/ 0, + /*now*/ NULL, /*then*/ &start_time); } if (error) { if (error != EBADRPC) @@ -547,7 +618,7 @@ static void nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, u_int32_t minorvers, NFSPROC_T *p) { - int i, lktype, op, op0 = 0; + int i, lktype, op, op0 = 0, statsinprog = 0; u_int32_t *tl; struct nfsclient *clp, *nclp; int numops, error = 0, igotlock; @@ -559,6 +630,7 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, struct nfsexstuff nes, vpnes, savevpnes; fsid_t cur_fsid, save_fsid; static u_int64_t compref = 0; + struct bintime start_time; NFSVNO_EXINIT(&vpnes); NFSVNO_EXINIT(&savevpnes); @@ -686,6 +758,11 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, *repp = *tl; op = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "op=%d\n", op); + + binuptime(&start_time); + nfsrvd_statstart(op, &start_time); + statsinprog = 1; + if (op < NFSV4OP_ACCESS || (op >= NFSV4OP_NOPS && (nd->nd_flag & ND_NFSV41) == 0) || (op >= NFSV41_NOPS && (nd->nd_flag & ND_NFSV41) != 0)) { @@ -771,12 +848,6 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, } if (nfsv4_opflag[op].savereply) nd->nd_flag |= ND_SAVEREPLY; - /* - * For now, newnfsstats.srvrpccnt[] doesn't have entries - * for the NFSv4.1 operations. - */ - if (nd->nd_procnum < NFSV4OP_NOPS) - NFSINCRGLOBAL(newnfsstats.srvrpccnt[nd->nd_procnum]); switch (op) { case NFSV4OP_PUTFH: error = nfsrv_mtofh(nd, &fh); @@ -1007,6 +1078,13 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, } error = 0; } + + if (statsinprog != 0) { + nfsrvd_statend(op, /*bytes*/ 0, /*now*/ NULL, + /*then*/ &start_time); + statsinprog = 0; + } + retops++; if (nd->nd_repstat) { *repp = nfsd_errmap(nd); @@ -1016,6 +1094,11 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, } } nfsmout: + if (statsinprog != 0) { + nfsrvd_statend(op, /*bytes*/ 0, /*now*/ NULL, + /*then*/ &start_time); + statsinprog = 0; + } if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) nd->nd_repstat = NFSERR_BADXDR; diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index b0965017847d..1b90d021d6ca 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -37,7 +37,7 @@ int nfsrv_dolocallocks = 0; struct nfsv4lock nfsv4rootfs_lock; extern int newnfs_numnfsd; -extern struct nfsstats newnfsstats; +extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_lease; extern struct timeval nfsboottime; extern u_int32_t newnfs_true, newnfs_false; @@ -273,7 +273,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, LIST_INIT(&new_clp->lc_stateid[i]); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - newnfsstats.srvclients++; + nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); @@ -377,7 +377,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, } LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - newnfsstats.srvclients++; + nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); @@ -441,7 +441,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, } LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - newnfsstats.srvclients++; + nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; } @@ -815,7 +815,7 @@ nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p) /* * Dump out stats for all clients. Called from nfssvc(2), that is used - * newnfsstats. + * nfsstatsv1. */ APPLESTATIC void nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt) @@ -1219,7 +1219,7 @@ nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p) free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); NFSLOCKSTATE(); - newnfsstats.srvclients--; + nfsstatsv1.srvclients--; nfsrv_openpluslock--; nfsrv_clients--; NFSUNLOCKSTATE(); @@ -1260,7 +1260,7 @@ nfsrv_freedeleg(struct nfsstate *stp) nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); FREE((caddr_t)stp, M_NFSDSTATE); - newnfsstats.srvdelegates--; + nfsstatsv1.srvdelegates--; nfsrv_openpluslock--; nfsrv_delegatecnt--; } @@ -1286,7 +1286,7 @@ nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p) if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); FREE((caddr_t)stp, M_NFSDSTATE); - newnfsstats.srvopenowners--; + nfsstatsv1.srvopenowners--; nfsrv_openpluslock--; } @@ -1336,7 +1336,7 @@ nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) if (cansleep != 0) NFSUNLOCKSTATE(); FREE((caddr_t)stp, M_NFSDSTATE); - newnfsstats.srvopens--; + nfsstatsv1.srvopens--; nfsrv_openpluslock--; return (ret); } @@ -1355,7 +1355,7 @@ nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); FREE((caddr_t)stp, M_NFSDSTATE); - newnfsstats.srvlockowners--; + nfsstatsv1.srvlockowners--; nfsrv_openpluslock--; } @@ -1430,7 +1430,7 @@ nfsrv_freenfslock(struct nfslock *lop) if (lop->lo_lckfile.le_prev != NULL) { LIST_REMOVE(lop, lo_lckfile); - newnfsstats.srvlocks--; + nfsstatsv1.srvlocks--; nfsrv_openpluslock--; } LIST_REMOVE(lop, lo_lckowner); @@ -2200,7 +2200,7 @@ nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp, LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list); *new_lopp = NULL; *new_stpp = NULL; - newnfsstats.srvlockowners++; + nfsstatsv1.srvlockowners++; nfsrv_openpluslock++; } if (filestruct_locked != 0) { @@ -2849,12 +2849,12 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; - newnfsstats.srvopenowners++; + nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; - newnfsstats.srvopens++; + nfsstatsv1.srvopens++; nfsrv_openpluslock++; break; } @@ -2913,7 +2913,7 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) || !NFSVNO_DELEGOK(vp)) *rflagsp |= NFSV4OPEN_RECALL; - newnfsstats.srvdelegates++; + nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; @@ -2953,12 +2953,12 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; - newnfsstats.srvopenowners++; + nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; - newnfsstats.srvopens++; + nfsstatsv1.srvopens++; nfsrv_openpluslock++; } else { error = NFSERR_RECLAIMCONFLICT; @@ -3027,7 +3027,7 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; - newnfsstats.srvdelegates++; + nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } @@ -3049,7 +3049,7 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, new_open, ls_hash); openstp = new_open; new_open = NULL; - newnfsstats.srvopens++; + nfsstatsv1.srvopens++; nfsrv_openpluslock++; /* @@ -3094,7 +3094,7 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; - newnfsstats.srvdelegates++; + nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } @@ -3173,7 +3173,7 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; - newnfsstats.srvdelegates++; + nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } @@ -3191,9 +3191,9 @@ nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, openstp = new_open; new_open = NULL; *new_stpp = NULL; - newnfsstats.srvopens++; + nfsstatsv1.srvopens++; nfsrv_openpluslock++; - newnfsstats.srvopenowners++; + nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } if (!error) { @@ -3645,7 +3645,7 @@ nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, else LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner); if (stp != NULL) { - newnfsstats.srvlocks++; + nfsstatsv1.srvlocks++; nfsrv_openpluslock++; } } @@ -3843,7 +3843,7 @@ nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, * just set lc_program to 0 to indicate no callbacks are possible. * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set * the address to the client's transport address. This won't be used - * for callbacks, but can be printed out by newnfsstats for info.) + * for callbacks, but can be printed out by nfsstats for info.) * Return error if the xdr can't be parsed, 0 otherwise. */ APPLESTATIC int diff --git a/sys/fs/smbfs/smbfs_node.c b/sys/fs/smbfs/smbfs_node.c index 05d19e98ef97..bf4223336f59 100644 --- a/sys/fs/smbfs/smbfs_node.c +++ b/sys/fs/smbfs/smbfs_node.c @@ -132,7 +132,7 @@ smbfs_node_alloc(struct mount *mp, struct vnode *dvp, const char *dirnm, } dnp = dvp ? VTOSMB(dvp) : NULL; if (dnp == NULL && dvp != NULL) { - vprint("smbfs_node_alloc: dead parent vnode", dvp); + vn_printf(dvp, "smbfs_node_alloc: dead parent vnode "); return EINVAL; } error = vfs_hash_get(mp, smbfs_hash(name, nmlen), LK_EXCLUSIVE, td, diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index 72e879205581..7ce4789c72ab 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -819,10 +819,13 @@ tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, goto out; } - MPASS((cookie & TMPFS_DIRCOOKIE_MASK) == cookie); - dekey.td_hash = cookie; - /* Recover if direntry for cookie was removed */ - de = RB_NFIND(tmpfs_dir, dirhead, &dekey); + if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) { + de = NULL; + } else { + dekey.td_hash = cookie; + /* Recover if direntry for cookie was removed */ + de = RB_NFIND(tmpfs_dir, dirhead, &dekey); + } dc->tdc_tree = de; dc->tdc_current = de; if (de != NULL && tmpfs_dirent_duphead(de)) { diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c index 6b60dbd47fb5..16cc438a2205 100644 --- a/sys/fs/unionfs/union_vnops.c +++ b/sys/fs/unionfs/union_vnops.c @@ -1753,9 +1753,9 @@ unionfs_print(struct vop_print_args *ap) */ if (unp->un_uppervp != NULLVP) - vprint("unionfs: upper", unp->un_uppervp); + vn_printf(unp->un_uppervp, "unionfs: upper "); if (unp->un_lowervp != NULLVP) - vprint("unionfs: lower", unp->un_lowervp); + vn_printf(unp->un_lowervp, "unionfs: lower "); return (0); } diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 40f72042d27f..c540a49a00e5 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -540,8 +540,8 @@ trap(struct trapframe *frame) case T_DNA: #ifdef DEV_NPX - KASSERT(!PCB_USER_FPU(td->td_pcb), - ("Unregistered use of FPU in kernel")); + if (PCB_USER_FPU(td->td_pcb)) + panic("Unregistered use of FPU in kernel"); if (npxdna()) goto out; #endif diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 1005830df416..f3959acfbd5c 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -942,6 +942,8 @@ funsetown(struct sigio **sigiop) { struct sigio *sigio; + if (*sigiop == NULL) + return; SIGIO_LOCK(); sigio = *sigiop; if (sigio == NULL) { diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index 72c01f0f0d40..04f3423fb958 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -320,11 +320,13 @@ void kthread_exit(void) { struct proc *p; + struct thread *td; - p = curthread->td_proc; + td = curthread; + p = td->td_proc; /* A module may be waiting for us to exit. */ - wakeup(curthread); + wakeup(td); /* * The last exiting thread in a kernel process must tear down @@ -337,9 +339,10 @@ kthread_exit(void) rw_wunlock(&tidhash_lock); kproc_exit(0); } - LIST_REMOVE(curthread, td_hash); + LIST_REMOVE(td, td_hash); rw_wunlock(&tidhash_lock); - umtx_thread_exit(curthread); + umtx_thread_exit(td); + tdsigcleanup(td); PROC_SLOCK(p); thread_exit(); } diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index 0e30fc9cd9c8..676cd438420b 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -572,9 +572,14 @@ void ktrprocfork(struct proc *p1, struct proc *p2) { + MPASS(p2->p_tracevp == NULL); + MPASS(p2->p_traceflag == 0); + + if (p1->p_traceflag == 0) + return; + PROC_LOCK(p1); mtx_lock(&ktrace_mtx); - KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); if (p1->p_traceflag & KTRFAC_INHERIT) { p2->p_traceflag = p1->p_traceflag; if ((p2->p_tracevp = p1->p_tracevp) != NULL) { diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index e6d6d4491e64..942a78a44e03 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -984,7 +984,7 @@ callout_when(sbintime_t sbt, sbintime_t precision, int flags, if ((flags & C_HARDCLOCK) == 0) to_sbt += tick_sbt; } else - to_sbt = sbinuptime(); + to_sbt = sbinuptime(); if (SBT_MAX - to_sbt < sbt) to_sbt = SBT_MAX; else diff --git a/sys/kern/subr_gtaskqueue.c b/sys/kern/subr_gtaskqueue.c new file mode 100644 index 000000000000..2d655bde0b25 --- /dev/null +++ b/sys/kern/subr_gtaskqueue.c @@ -0,0 +1,864 @@ +/*- + * Copyright (c) 2000 Doug Rabson + * Copyright (c) 2014 Jeff Roberson + * Copyright (c) 2016 Matthew Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues"); +static void gtaskqueue_thread_enqueue(void *); +static void gtaskqueue_thread_loop(void *arg); + + +struct gtaskqueue_busy { + struct gtask *tb_running; + TAILQ_ENTRY(gtaskqueue_busy) tb_link; +}; + +static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1; + +struct gtaskqueue { + STAILQ_HEAD(, gtask) tq_queue; + gtaskqueue_enqueue_fn tq_enqueue; + void *tq_context; + char *tq_name; + TAILQ_HEAD(, gtaskqueue_busy) tq_active; + struct mtx tq_mutex; + struct thread **tq_threads; + int tq_tcount; + int tq_spin; + int tq_flags; + int tq_callouts; + taskqueue_callback_fn tq_callbacks[TASKQUEUE_NUM_CALLBACKS]; + void *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS]; +}; + +#define TQ_FLAGS_ACTIVE (1 << 0) +#define TQ_FLAGS_BLOCKED (1 << 1) +#define TQ_FLAGS_UNLOCKED_ENQUEUE (1 << 2) + +#define DT_CALLOUT_ARMED (1 << 0) + +#define TQ_LOCK(tq) \ + do { \ + if ((tq)->tq_spin) \ + mtx_lock_spin(&(tq)->tq_mutex); \ + else \ + mtx_lock(&(tq)->tq_mutex); \ + } while (0) +#define TQ_ASSERT_LOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_OWNED) + +#define TQ_UNLOCK(tq) \ + do { \ + if ((tq)->tq_spin) \ + mtx_unlock_spin(&(tq)->tq_mutex); \ + else \ + mtx_unlock(&(tq)->tq_mutex); \ + } while (0) +#define TQ_ASSERT_UNLOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED) + +static __inline int +TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm, + int t) +{ + if (tq->tq_spin) + return (msleep_spin(p, m, wm, t)); + return (msleep(p, m, pri, wm, t)); +} + +static struct gtaskqueue * +_gtaskqueue_create(const char *name, int mflags, + taskqueue_enqueue_fn enqueue, void *context, + int mtxflags, const char *mtxname __unused) +{ + struct gtaskqueue *queue; + char *tq_name; + + tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO); + if (!tq_name) + return (NULL); + + snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue"); + + queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO); + if (!queue) + return (NULL); + + STAILQ_INIT(&queue->tq_queue); + TAILQ_INIT(&queue->tq_active); + queue->tq_enqueue = enqueue; + queue->tq_context = context; + queue->tq_name = tq_name; + queue->tq_spin = (mtxflags & MTX_SPIN) != 0; + queue->tq_flags |= TQ_FLAGS_ACTIVE; + if (enqueue == gtaskqueue_thread_enqueue) + queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE; + mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags); + + return (queue); +} + + +/* + * Signal a taskqueue thread to terminate. + */ +static void +gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq) +{ + + while (tq->tq_tcount > 0 || tq->tq_callouts > 0) { + wakeup(tq); + TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0); + } +} + +static void +gtaskqueue_free(struct gtaskqueue *queue) +{ + + TQ_LOCK(queue); + queue->tq_flags &= ~TQ_FLAGS_ACTIVE; + gtaskqueue_terminate(queue->tq_threads, queue); + KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?")); + KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks")); + mtx_destroy(&queue->tq_mutex); + free(queue->tq_threads, M_GTASKQUEUE); + free(queue->tq_name, M_GTASKQUEUE); + free(queue, M_GTASKQUEUE); +} + +int +grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) +{ + TQ_LOCK(queue); + if (gtask->ta_flags & TASK_ENQUEUED) { + TQ_UNLOCK(queue); + return (0); + } + STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link); + gtask->ta_flags |= TASK_ENQUEUED; + TQ_UNLOCK(queue); + if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) + queue->tq_enqueue(queue->tq_context); + return (0); +} + +static void +gtaskqueue_task_nop_fn(void *context) +{ +} + +/* + * Block until all currently queued tasks in this taskqueue + * have begun execution. Tasks queued during execution of + * this function are ignored. + */ +static void +gtaskqueue_drain_tq_queue(struct gtaskqueue *queue) +{ + struct gtask t_barrier; + + if (STAILQ_EMPTY(&queue->tq_queue)) + return; + + /* + * Enqueue our barrier after all current tasks, but with + * the highest priority so that newly queued tasks cannot + * pass it. Because of the high priority, we can not use + * taskqueue_enqueue_locked directly (which drops the lock + * anyway) so just insert it at tail while we have the + * queue lock. + */ + GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier); + STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link); + t_barrier.ta_flags |= TASK_ENQUEUED; + + /* + * Once the barrier has executed, all previously queued tasks + * have completed or are currently executing. + */ + while (t_barrier.ta_flags & TASK_ENQUEUED) + TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0); +} + +/* + * Block until all currently executing tasks for this taskqueue + * complete. Tasks that begin execution during the execution + * of this function are ignored. + */ +static void +gtaskqueue_drain_tq_active(struct gtaskqueue *queue) +{ + struct gtaskqueue_busy tb_marker, *tb_first; + + if (TAILQ_EMPTY(&queue->tq_active)) + return; + + /* Block taskq_terminate().*/ + queue->tq_callouts++; + + /* + * Wait for all currently executing taskqueue threads + * to go idle. + */ + tb_marker.tb_running = TB_DRAIN_WAITER; + TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link); + while (TAILQ_FIRST(&queue->tq_active) != &tb_marker) + TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0); + TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link); + + /* + * Wakeup any other drain waiter that happened to queue up + * without any intervening active thread. + */ + tb_first = TAILQ_FIRST(&queue->tq_active); + if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER) + wakeup(tb_first); + + /* Release taskqueue_terminate(). */ + queue->tq_callouts--; + if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0) + wakeup_one(queue->tq_threads); +} + +void +gtaskqueue_block(struct gtaskqueue *queue) +{ + + TQ_LOCK(queue); + queue->tq_flags |= TQ_FLAGS_BLOCKED; + TQ_UNLOCK(queue); +} + +void +gtaskqueue_unblock(struct gtaskqueue *queue) +{ + + TQ_LOCK(queue); + queue->tq_flags &= ~TQ_FLAGS_BLOCKED; + if (!STAILQ_EMPTY(&queue->tq_queue)) + queue->tq_enqueue(queue->tq_context); + TQ_UNLOCK(queue); +} + +static void +gtaskqueue_run_locked(struct gtaskqueue *queue) +{ + struct gtaskqueue_busy tb; + struct gtaskqueue_busy *tb_first; + struct gtask *gtask; + + KASSERT(queue != NULL, ("tq is NULL")); + TQ_ASSERT_LOCKED(queue); + tb.tb_running = NULL; + + while (STAILQ_FIRST(&queue->tq_queue)) { + TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link); + + /* + * Carefully remove the first task from the queue and + * clear its TASK_ENQUEUED flag + */ + gtask = STAILQ_FIRST(&queue->tq_queue); + KASSERT(gtask != NULL, ("task is NULL")); + STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link); + gtask->ta_flags &= ~TASK_ENQUEUED; + tb.tb_running = gtask; + TQ_UNLOCK(queue); + + KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL")); + gtask->ta_func(gtask->ta_context); + + TQ_LOCK(queue); + tb.tb_running = NULL; + wakeup(gtask); + + TAILQ_REMOVE(&queue->tq_active, &tb, tb_link); + tb_first = TAILQ_FIRST(&queue->tq_active); + if (tb_first != NULL && + tb_first->tb_running == TB_DRAIN_WAITER) + wakeup(tb_first); + } +} + +static int +task_is_running(struct gtaskqueue *queue, struct gtask *gtask) +{ + struct gtaskqueue_busy *tb; + + TQ_ASSERT_LOCKED(queue); + TAILQ_FOREACH(tb, &queue->tq_active, tb_link) { + if (tb->tb_running == gtask) + return (1); + } + return (0); +} + +static int +gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask) +{ + + if (gtask->ta_flags & TASK_ENQUEUED) + STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link); + gtask->ta_flags &= ~TASK_ENQUEUED; + return (task_is_running(queue, gtask) ? EBUSY : 0); +} + +int +gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask) +{ + int error; + + TQ_LOCK(queue); + error = gtaskqueue_cancel_locked(queue, gtask); + TQ_UNLOCK(queue); + + return (error); +} + +void +gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask) +{ + + if (!queue->tq_spin) + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); + + TQ_LOCK(queue); + while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask)) + TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0); + TQ_UNLOCK(queue); +} + +void +gtaskqueue_drain_all(struct gtaskqueue *queue) +{ + + if (!queue->tq_spin) + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); + + TQ_LOCK(queue); + gtaskqueue_drain_tq_queue(queue); + gtaskqueue_drain_tq_active(queue); + TQ_UNLOCK(queue); +} + +static int +_gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, + cpuset_t *mask, const char *name, va_list ap) +{ + char ktname[MAXCOMLEN + 1]; + struct thread *td; + struct gtaskqueue *tq; + int i, error; + + if (count <= 0) + return (EINVAL); + + vsnprintf(ktname, sizeof(ktname), name, ap); + tq = *tqp; + + tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE, + M_NOWAIT | M_ZERO); + if (tq->tq_threads == NULL) { + printf("%s: no memory for %s threads\n", __func__, ktname); + return (ENOMEM); + } + + for (i = 0; i < count; i++) { + if (count == 1) + error = kthread_add(gtaskqueue_thread_loop, tqp, NULL, + &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname); + else + error = kthread_add(gtaskqueue_thread_loop, tqp, NULL, + &tq->tq_threads[i], RFSTOPPED, 0, + "%s_%d", ktname, i); + if (error) { + /* should be ok to continue, taskqueue_free will dtrt */ + printf("%s: kthread_add(%s): error %d", __func__, + ktname, error); + tq->tq_threads[i] = NULL; /* paranoid */ + } else + tq->tq_tcount++; + } + for (i = 0; i < count; i++) { + if (tq->tq_threads[i] == NULL) + continue; + td = tq->tq_threads[i]; + if (mask) { + error = cpuset_setthread(td->td_tid, mask); + /* + * Failing to pin is rarely an actual fatal error; + * it'll just affect performance. + */ + if (error) + printf("%s: curthread=%llu: can't pin; " + "error=%d\n", + __func__, + (unsigned long long) td->td_tid, + error); + } + thread_lock(td); + sched_prio(td, pri); + sched_add(td, SRQ_BORING); + thread_unlock(td); + } + + return (0); +} + +static int +gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, + const char *name, ...) +{ + va_list ap; + int error; + + va_start(ap, name); + error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap); + va_end(ap); + return (error); +} + +static inline void +gtaskqueue_run_callback(struct gtaskqueue *tq, + enum taskqueue_callback_type cb_type) +{ + taskqueue_callback_fn tq_callback; + + TQ_ASSERT_UNLOCKED(tq); + tq_callback = tq->tq_callbacks[cb_type]; + if (tq_callback != NULL) + tq_callback(tq->tq_cb_contexts[cb_type]); +} + +static void +gtaskqueue_thread_loop(void *arg) +{ + struct gtaskqueue **tqp, *tq; + + tqp = arg; + tq = *tqp; + gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT); + TQ_LOCK(tq); + while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { + /* XXX ? */ + gtaskqueue_run_locked(tq); + /* + * Because taskqueue_run() can drop tq_mutex, we need to + * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the + * meantime, which means we missed a wakeup. + */ + if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0) + break; + TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0); + } + gtaskqueue_run_locked(tq); + /* + * This thread is on its way out, so just drop the lock temporarily + * in order to call the shutdown callback. This allows the callback + * to look at the taskqueue, even just before it dies. + */ + TQ_UNLOCK(tq); + gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN); + TQ_LOCK(tq); + + /* rendezvous with thread that asked us to terminate */ + tq->tq_tcount--; + wakeup_one(tq->tq_threads); + TQ_UNLOCK(tq); + kthread_exit(); +} + +static void +gtaskqueue_thread_enqueue(void *context) +{ + struct gtaskqueue **tqp, *tq; + + tqp = context; + tq = *tqp; + wakeup_one(tq); +} + + +static struct gtaskqueue * +gtaskqueue_create_fast(const char *name, int mflags, + taskqueue_enqueue_fn enqueue, void *context) +{ + return _gtaskqueue_create(name, mflags, enqueue, context, + MTX_SPIN, "fast_taskqueue"); +} + + +struct taskqgroup_cpu { + LIST_HEAD(, grouptask) tgc_tasks; + struct gtaskqueue *tgc_taskq; + int tgc_cnt; + int tgc_cpu; +}; + +struct taskqgroup { + struct taskqgroup_cpu tqg_queue[MAXCPU]; + struct mtx tqg_lock; + char * tqg_name; + int tqg_adjusting; + int tqg_stride; + int tqg_cnt; +}; + +struct taskq_bind_task { + struct gtask bt_task; + int bt_cpuid; +}; + +static void +taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx) +{ + struct taskqgroup_cpu *qcpu; + + qcpu = &qgroup->tqg_queue[idx]; + LIST_INIT(&qcpu->tgc_tasks); + qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK, + taskqueue_thread_enqueue, &qcpu->tgc_taskq); + gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT, + "%s_%d", qgroup->tqg_name, idx); + qcpu->tgc_cpu = idx * qgroup->tqg_stride; +} + +static void +taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx) +{ + + gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq); +} + +/* + * Find the taskq with least # of tasks that doesn't currently have any + * other queues from the uniq identifier. + */ +static int +taskqgroup_find(struct taskqgroup *qgroup, void *uniq) +{ + struct grouptask *n; + int i, idx, mincnt; + int strict; + + mtx_assert(&qgroup->tqg_lock, MA_OWNED); + if (qgroup->tqg_cnt == 0) + return (0); + idx = -1; + mincnt = INT_MAX; + /* + * Two passes; First scan for a queue with the least tasks that + * does not already service this uniq id. If that fails simply find + * the queue with the least total tasks; + */ + for (strict = 1; mincnt == INT_MAX; strict = 0) { + for (i = 0; i < qgroup->tqg_cnt; i++) { + if (qgroup->tqg_queue[i].tgc_cnt > mincnt) + continue; + if (strict) { + LIST_FOREACH(n, + &qgroup->tqg_queue[i].tgc_tasks, gt_list) + if (n->gt_uniq == uniq) + break; + if (n != NULL) + continue; + } + mincnt = qgroup->tqg_queue[i].tgc_cnt; + idx = i; + } + } + if (idx == -1) + panic("taskqgroup_find: Failed to pick a qid."); + + return (idx); +} + +void +taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, + void *uniq, int irq, char *name) +{ + cpuset_t mask; + int qid; + + gtask->gt_uniq = uniq; + gtask->gt_name = name; + gtask->gt_irq = irq; + gtask->gt_cpu = -1; + mtx_lock(&qgroup->tqg_lock); + qid = taskqgroup_find(qgroup, uniq); + qgroup->tqg_queue[qid].tgc_cnt++; + LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); + gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; + if (irq != -1 && smp_started) { + CPU_ZERO(&mask); + CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); + mtx_unlock(&qgroup->tqg_lock); + intr_setaffinity(irq, &mask); + } else + mtx_unlock(&qgroup->tqg_lock); +} + +int +taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, + void *uniq, int cpu, int irq, char *name) +{ + cpuset_t mask; + int i, qid; + + qid = -1; + gtask->gt_uniq = uniq; + gtask->gt_name = name; + gtask->gt_irq = irq; + gtask->gt_cpu = cpu; + mtx_lock(&qgroup->tqg_lock); + if (smp_started) { + for (i = 0; i < qgroup->tqg_cnt; i++) + if (qgroup->tqg_queue[i].tgc_cpu == cpu) { + qid = i; + break; + } + if (qid == -1) { + mtx_unlock(&qgroup->tqg_lock); + return (EINVAL); + } + } else + qid = 0; + qgroup->tqg_queue[qid].tgc_cnt++; + LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); + gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; + if (irq != -1 && smp_started) { + CPU_ZERO(&mask); + CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); + mtx_unlock(&qgroup->tqg_lock); + intr_setaffinity(irq, &mask); + } else + mtx_unlock(&qgroup->tqg_lock); + return (0); +} + +void +taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask) +{ + int i; + + mtx_lock(&qgroup->tqg_lock); + for (i = 0; i < qgroup->tqg_cnt; i++) + if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue) + break; + if (i == qgroup->tqg_cnt) + panic("taskqgroup_detach: task not in group\n"); + qgroup->tqg_queue[i].tgc_cnt--; + LIST_REMOVE(gtask, gt_list); + mtx_unlock(&qgroup->tqg_lock); + gtask->gt_taskqueue = NULL; +} + +static void +taskqgroup_binder(void *ctx) +{ + struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx; + cpuset_t mask; + int error; + + CPU_ZERO(&mask); + CPU_SET(gtask->bt_cpuid, &mask); + error = cpuset_setthread(curthread->td_tid, &mask); + thread_lock(curthread); + sched_bind(curthread, gtask->bt_cpuid); + thread_unlock(curthread); + + if (error) + printf("taskqgroup_binder: setaffinity failed: %d\n", + error); + free(gtask, M_DEVBUF); +} + +static void +taskqgroup_bind(struct taskqgroup *qgroup) +{ + struct taskq_bind_task *gtask; + int i; + + /* + * Bind taskqueue threads to specific CPUs, if they have been assigned + * one. + */ + for (i = 0; i < qgroup->tqg_cnt; i++) { + gtask = malloc(sizeof (*gtask), M_DEVBUF, M_NOWAIT); + GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask); + gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu; + grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq, + >ask->bt_task); + } +} + +static int +_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) +{ + LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); + cpuset_t mask; + struct grouptask *gtask; + int i, old_cnt, qid; + + mtx_assert(&qgroup->tqg_lock, MA_OWNED); + + if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) { + printf("taskqgroup_adjust failed cnt: %d stride: %d mp_ncpus: %d smp_started: %d\n", + cnt, stride, mp_ncpus, smp_started); + return (EINVAL); + } + if (qgroup->tqg_adjusting) { + printf("taskqgroup_adjust failed: adjusting\n"); + return (EBUSY); + } + qgroup->tqg_adjusting = 1; + old_cnt = qgroup->tqg_cnt; + mtx_unlock(&qgroup->tqg_lock); + /* + * Set up queue for tasks added before boot. + */ + if (old_cnt == 0) { + LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks, + grouptask, gt_list); + qgroup->tqg_queue[0].tgc_cnt = 0; + } + + /* + * If new taskq threads have been added. + */ + for (i = old_cnt; i < cnt; i++) + taskqgroup_cpu_create(qgroup, i); + mtx_lock(&qgroup->tqg_lock); + qgroup->tqg_cnt = cnt; + qgroup->tqg_stride = stride; + + /* + * Adjust drivers to use new taskqs. + */ + for (i = 0; i < old_cnt; i++) { + while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) { + LIST_REMOVE(gtask, gt_list); + qgroup->tqg_queue[i].tgc_cnt--; + LIST_INSERT_HEAD(>ask_head, gtask, gt_list); + } + } + + while ((gtask = LIST_FIRST(>ask_head))) { + LIST_REMOVE(gtask, gt_list); + if (gtask->gt_cpu == -1) + qid = taskqgroup_find(qgroup, gtask->gt_uniq); + else { + for (i = 0; i < qgroup->tqg_cnt; i++) + if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) { + qid = i; + break; + } + } + qgroup->tqg_queue[qid].tgc_cnt++; + LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, + gt_list); + gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; + } + /* + * Set new CPU and IRQ affinity + */ + for (i = 0; i < cnt; i++) { + qgroup->tqg_queue[i].tgc_cpu = i * qgroup->tqg_stride; + CPU_ZERO(&mask); + CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask); + LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) { + if (gtask->gt_irq == -1) + continue; + intr_setaffinity(gtask->gt_irq, &mask); + } + } + mtx_unlock(&qgroup->tqg_lock); + + /* + * If taskq thread count has been reduced. + */ + for (i = cnt; i < old_cnt; i++) + taskqgroup_cpu_remove(qgroup, i); + + mtx_lock(&qgroup->tqg_lock); + qgroup->tqg_adjusting = 0; + + taskqgroup_bind(qgroup); + + return (0); +} + +int +taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride) +{ + int error; + + mtx_lock(&qgroup->tqg_lock); + error = _taskqgroup_adjust(qgroup, cpu, stride); + mtx_unlock(&qgroup->tqg_lock); + + return (error); +} + +struct taskqgroup * +taskqgroup_create(char *name) +{ + struct taskqgroup *qgroup; + + qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO); + mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF); + qgroup->tqg_name = name; + LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks); + + return (qgroup); +} + +void +taskqgroup_destroy(struct taskqgroup *qgroup) +{ + +} diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c index 12124b8aa005..5a20148f573c 100644 --- a/sys/kern/subr_taskqueue.c +++ b/sys/kern/subr_taskqueue.c @@ -260,22 +260,6 @@ taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task) return (0); } -int -grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task) -{ - TQ_LOCK(queue); - if (task->ta_pending) { - TQ_UNLOCK(queue); - return (0); - } - STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link); - task->ta_pending = 1; - TQ_UNLOCK(queue); - if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) - queue->tq_enqueue(queue->tq_context); - return (0); -} - int taskqueue_enqueue(struct taskqueue *queue, struct task *task) { @@ -806,347 +790,3 @@ taskqueue_member(struct taskqueue *queue, struct thread *td) } return (ret); } - -struct taskqgroup_cpu { - LIST_HEAD(, grouptask) tgc_tasks; - struct taskqueue *tgc_taskq; - int tgc_cnt; - int tgc_cpu; -}; - -struct taskqgroup { - struct taskqgroup_cpu tqg_queue[MAXCPU]; - struct mtx tqg_lock; - char * tqg_name; - int tqg_adjusting; - int tqg_stride; - int tqg_cnt; -}; - -struct taskq_bind_task { - struct task bt_task; - int bt_cpuid; -}; - -static void -taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx) -{ - struct taskqgroup_cpu *qcpu; - int i, j; - - qcpu = &qgroup->tqg_queue[idx]; - LIST_INIT(&qcpu->tgc_tasks); - qcpu->tgc_taskq = taskqueue_create_fast(NULL, M_WAITOK, - taskqueue_thread_enqueue, &qcpu->tgc_taskq); - taskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT, - "%s_%d", qgroup->tqg_name, idx); - - for (i = CPU_FIRST(), j = 0; j < idx * qgroup->tqg_stride; - j++, i = CPU_NEXT(i)) { - /* - * Wait: evaluate the idx * qgroup->tqg_stride'th CPU, - * potentially wrapping the actual count - */ - } - qcpu->tgc_cpu = i; -} - -static void -taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx) -{ - - taskqueue_free(qgroup->tqg_queue[idx].tgc_taskq); -} - -/* - * Find the taskq with least # of tasks that doesn't currently have any - * other queues from the uniq identifier. - */ -static int -taskqgroup_find(struct taskqgroup *qgroup, void *uniq) -{ - struct grouptask *n; - int i, idx, mincnt; - int strict; - - mtx_assert(&qgroup->tqg_lock, MA_OWNED); - if (qgroup->tqg_cnt == 0) - return (0); - idx = -1; - mincnt = INT_MAX; - /* - * Two passes; First scan for a queue with the least tasks that - * does not already service this uniq id. If that fails simply find - * the queue with the least total tasks; - */ - for (strict = 1; mincnt == INT_MAX; strict = 0) { - for (i = 0; i < qgroup->tqg_cnt; i++) { - if (qgroup->tqg_queue[i].tgc_cnt > mincnt) - continue; - if (strict) { - LIST_FOREACH(n, - &qgroup->tqg_queue[i].tgc_tasks, gt_list) - if (n->gt_uniq == uniq) - break; - if (n != NULL) - continue; - } - mincnt = qgroup->tqg_queue[i].tgc_cnt; - idx = i; - } - } - if (idx == -1) - panic("taskqgroup_find: Failed to pick a qid."); - - return (idx); -} - -void -taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, - void *uniq, int irq, char *name) -{ - cpuset_t mask; - int qid; - - gtask->gt_uniq = uniq; - gtask->gt_name = name; - gtask->gt_irq = irq; - gtask->gt_cpu = -1; - mtx_lock(&qgroup->tqg_lock); - qid = taskqgroup_find(qgroup, uniq); - qgroup->tqg_queue[qid].tgc_cnt++; - LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); - gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; - if (irq != -1 && smp_started) { - CPU_ZERO(&mask); - CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); - mtx_unlock(&qgroup->tqg_lock); - intr_setaffinity(irq, &mask); - } else - mtx_unlock(&qgroup->tqg_lock); -} - -int -taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, - void *uniq, int cpu, int irq, char *name) -{ - cpuset_t mask; - int i, qid; - - qid = -1; - gtask->gt_uniq = uniq; - gtask->gt_name = name; - gtask->gt_irq = irq; - gtask->gt_cpu = cpu; - mtx_lock(&qgroup->tqg_lock); - if (smp_started) { - for (i = 0; i < qgroup->tqg_cnt; i++) - if (qgroup->tqg_queue[i].tgc_cpu == cpu) { - qid = i; - break; - } - if (qid == -1) { - mtx_unlock(&qgroup->tqg_lock); - return (EINVAL); - } - } else - qid = 0; - qgroup->tqg_queue[qid].tgc_cnt++; - LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); - gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; - if (irq != -1 && smp_started) { - CPU_ZERO(&mask); - CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); - mtx_unlock(&qgroup->tqg_lock); - intr_setaffinity(irq, &mask); - } else - mtx_unlock(&qgroup->tqg_lock); - return (0); -} - -void -taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask) -{ - int i; - - mtx_lock(&qgroup->tqg_lock); - for (i = 0; i < qgroup->tqg_cnt; i++) - if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue) - break; - if (i == qgroup->tqg_cnt) - panic("taskqgroup_detach: task not in group\n"); - qgroup->tqg_queue[i].tgc_cnt--; - LIST_REMOVE(gtask, gt_list); - mtx_unlock(&qgroup->tqg_lock); - gtask->gt_taskqueue = NULL; -} - -static void -taskqgroup_binder(void *ctx, int pending) -{ - struct taskq_bind_task *task = (struct taskq_bind_task *)ctx; - cpuset_t mask; - int error; - - CPU_ZERO(&mask); - CPU_SET(task->bt_cpuid, &mask); - error = cpuset_setthread(curthread->td_tid, &mask); - thread_lock(curthread); - sched_bind(curthread, task->bt_cpuid); - thread_unlock(curthread); - - if (error) - printf("taskqgroup_binder: setaffinity failed: %d\n", - error); - free(task, M_DEVBUF); -} - -static void -taskqgroup_bind(struct taskqgroup *qgroup) -{ - struct taskq_bind_task *task; - int i; - - /* - * Bind taskqueue threads to specific CPUs, if they have been assigned - * one. - */ - for (i = 0; i < qgroup->tqg_cnt; i++) { - task = malloc(sizeof (*task), M_DEVBUF, M_NOWAIT); - TASK_INIT(&task->bt_task, 0, taskqgroup_binder, task); - task->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu; - taskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq, - &task->bt_task); - } -} - -static int -_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) -{ - LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); - cpuset_t mask; - struct grouptask *gtask; - int i, k, old_cnt, qid, cpu; - - mtx_assert(&qgroup->tqg_lock, MA_OWNED); - - if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) { - printf("taskqgroup_adjust failed cnt: %d stride: %d " - "mp_ncpus: %d smp_started: %d\n", cnt, stride, mp_ncpus, - smp_started); - return (EINVAL); - } - if (qgroup->tqg_adjusting) { - printf("taskqgroup_adjust failed: adjusting\n"); - return (EBUSY); - } - qgroup->tqg_adjusting = 1; - old_cnt = qgroup->tqg_cnt; - mtx_unlock(&qgroup->tqg_lock); - /* - * Set up queue for tasks added before boot. - */ - if (old_cnt == 0) { - LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks, - grouptask, gt_list); - qgroup->tqg_queue[0].tgc_cnt = 0; - } - - /* - * If new taskq threads have been added. - */ - for (i = old_cnt; i < cnt; i++) - taskqgroup_cpu_create(qgroup, i); - mtx_lock(&qgroup->tqg_lock); - qgroup->tqg_cnt = cnt; - qgroup->tqg_stride = stride; - - /* - * Adjust drivers to use new taskqs. - */ - for (i = 0; i < old_cnt; i++) { - while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) { - LIST_REMOVE(gtask, gt_list); - qgroup->tqg_queue[i].tgc_cnt--; - LIST_INSERT_HEAD(>ask_head, gtask, gt_list); - } - } - - while ((gtask = LIST_FIRST(>ask_head))) { - LIST_REMOVE(gtask, gt_list); - if (gtask->gt_cpu == -1) - qid = taskqgroup_find(qgroup, gtask->gt_uniq); - else { - for (i = 0; i < qgroup->tqg_cnt; i++) - if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) { - qid = i; - break; - } - } - qgroup->tqg_queue[qid].tgc_cnt++; - LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, - gt_list); - gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; - } - /* - * Set new CPU and IRQ affinity - */ - cpu = CPU_FIRST(); - for (i = 0; i < cnt; i++) { - qgroup->tqg_queue[i].tgc_cpu = cpu; - for (k = 0; k < qgroup->tqg_stride; k++) - cpu = CPU_NEXT(cpu); - CPU_ZERO(&mask); - CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask); - LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) { - if (gtask->gt_irq == -1) - continue; - intr_setaffinity(gtask->gt_irq, &mask); - } - } - mtx_unlock(&qgroup->tqg_lock); - - /* - * If taskq thread count has been reduced. - */ - for (i = cnt; i < old_cnt; i++) - taskqgroup_cpu_remove(qgroup, i); - - mtx_lock(&qgroup->tqg_lock); - qgroup->tqg_adjusting = 0; - - taskqgroup_bind(qgroup); - - return (0); -} - -int -taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride) -{ - int error; - - mtx_lock(&qgroup->tqg_lock); - error = _taskqgroup_adjust(qgroup, cpu, stride); - mtx_unlock(&qgroup->tqg_lock); - - return (error); -} - -struct taskqgroup * -taskqgroup_create(char *name) -{ - struct taskqgroup *qgroup; - - qgroup = malloc(sizeof(*qgroup), M_TASKQUEUE, M_WAITOK | M_ZERO); - mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF); - qgroup->tqg_name = name; - LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks); - - return (qgroup); -} - -void -taskqgroup_destroy(struct taskqgroup *qgroup) -{ - -} diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 2378ad653f3f..cb9ee9a786b4 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1479,7 +1479,6 @@ buf_alloc(void) bp->b_npages = 0; bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_bufobj = NULL; - bp->b_pin_count = 0; bp->b_data = bp->b_kvabase = unmapped_buf; bp->b_fsprivate1 = NULL; bp->b_fsprivate2 = NULL; @@ -1908,9 +1907,6 @@ bufwrite(struct buf *bp) BUF_ASSERT_HELD(bp); - if (bp->b_pin_count > 0) - bunpin_wait(bp); - KASSERT(!(bp->b_vflags & BV_BKGRDINPROG), ("FFS background buffer should not get here %p", bp)); @@ -3123,10 +3119,7 @@ flushbufqueues(struct vnode *lvp, int target, int flushdeps) mtx_unlock(&bqlocks[queue]); if (error != 0) continue; - if (bp->b_pin_count > 0) { - BUF_UNLOCK(bp); - continue; - } + /* * BKGRDINPROG can only be set with the buf and bufobj * locks both held. We tolerate a race to clear it here. @@ -3546,19 +3539,6 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo, if ((bp->b_flags & B_VMIO) == 0 || (size > bp->b_kvasize)) { if (bp->b_flags & B_DELWRI) { - /* - * If buffer is pinned and caller does - * not want sleep waiting for it to be - * unpinned, bail out - * */ - if (bp->b_pin_count > 0) { - if (flags & GB_LOCK_NOWAIT) { - bqrelse(bp); - return (NULL); - } else { - bunpin_wait(bp); - } - } bp->b_flags |= B_NOCACHE; bwrite(bp); } else { @@ -4632,41 +4612,6 @@ bufobj_wwait(struct bufobj *bo, int slpflag, int timeo) return (error); } -void -bpin(struct buf *bp) -{ - struct mtx *mtxp; - - mtxp = mtx_pool_find(mtxpool_sleep, bp); - mtx_lock(mtxp); - bp->b_pin_count++; - mtx_unlock(mtxp); -} - -void -bunpin(struct buf *bp) -{ - struct mtx *mtxp; - - mtxp = mtx_pool_find(mtxpool_sleep, bp); - mtx_lock(mtxp); - if (--bp->b_pin_count == 0) - wakeup(bp); - mtx_unlock(mtxp); -} - -void -bunpin_wait(struct buf *bp) -{ - struct mtx *mtxp; - - mtxp = mtx_pool_find(mtxpool_sleep, bp); - mtx_lock(mtxp); - while (bp->b_pin_count > 0) - msleep(bp, mtxp, PRIBIO, "bwunpin", 0); - mtx_unlock(mtxp); -} - /* * Set bio_data or bio_ma for struct bio from the struct buf. */ diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index f6be7a7e53dc..4606517dc7c7 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -836,12 +836,6 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len, --len; continue; } - if (tbp->b_pin_count > 0) { - BUF_UNLOCK(tbp); - ++start_lbn; - --len; - continue; - } bremfree(tbp); tbp->b_flags &= ~B_DONE; @@ -953,14 +947,6 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len, break; } - /* - * Do not pull in pinned buffers. - */ - if (tbp->b_pin_count > 0) { - BUF_UNLOCK(tbp); - break; - } - /* * Ok, it's passed all the tests, * so remove it from the free list diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index a7977bf9cf11..8679ad782a74 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -256,7 +256,7 @@ static int vop_nostrategy (struct vop_strategy_args *ap) { printf("No strategy for buffer at %p\n", ap->a_bp); - vprint("vnode", ap->a_vp); + vn_printf(ap->a_vp, "vnode "); ap->a_bp->b_ioflags |= BIO_ERROR; ap->a_bp->b_error = EOPNOTSUPP; bufdone(ap->a_bp); @@ -722,7 +722,7 @@ vop_stdfsync(ap) } BO_UNLOCK(bo); if (error == EAGAIN) - vprint("fsync: giving up on dirty", vp); + vn_printf(vp, "fsync: giving up on dirty "); return (error); } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index f33dc8bd7f06..07244c99ea29 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -721,7 +721,7 @@ lookup(struct nameidata *ndp) if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) cnp->cn_lkflags = LK_EXCLUSIVE; #ifdef NAMEI_DIAGNOSTIC - vprint("lookup in", dp); + vn_printf(dp, "lookup in "); #endif lkflags_save = cnp->cn_lkflags; cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags, @@ -1007,7 +1007,7 @@ relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) * We now have a segment name to search for, and a directory to search. */ #ifdef NAMEI_DIAGNOSTIC - vprint("search in:", dp); + vn_printf(dp, "search in "); #endif if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { KASSERT(*vpp == NULL, ("leaf should be empty")); diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 247714f07cb4..6e45a2c71b1e 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -510,7 +510,7 @@ vfs_mount_destroy(struct mount *mp) struct vnode *vp; TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) - vprint("", vp); + vn_printf(vp, "dangling vnode "); panic("unmount: dangling vnode"); } KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers")); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 14991f2d8a5a..be6163c55a4b 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -2645,7 +2645,7 @@ vputx(struct vnode *vp, int func) error = 0; if (vp->v_usecount != 0) { - vprint("vputx: usecount not zero", vp); + vn_printf(vp, "vputx: usecount not zero for vnode "); panic("vputx: usecount not zero"); } @@ -3036,7 +3036,7 @@ vflush(struct mount *mp, int rootrefs, int flags, struct thread *td) busy++; #ifdef DIAGNOSTIC if (busyprt) - vprint("vflush: busy vnode", vp); + vn_printf(vp, "vflush: busy vnode "); #endif } VOP_UNLOCK(vp, 0); @@ -3409,7 +3409,7 @@ DB_SHOW_COMMAND(lockedvnods, lockedvnodes) TAILQ_FOREACH(mp, &mountlist, mnt_list) { TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && VOP_ISLOCKED(vp)) - vprint("", vp); + vn_printf(vp, "vnode "); } } } @@ -4402,6 +4402,10 @@ int vfs_badlock_print = 1; /* Print lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print, 0, "Print lock violations"); +int vfs_badlock_vnode = 1; /* Print vnode details on lock violations. */ +SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_vnode, CTLFLAG_RW, &vfs_badlock_vnode, + 0, "Print vnode details on lock violations"); + #ifdef KDB int vfs_badlock_backtrace = 1; /* Print backtrace at lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW, @@ -4416,6 +4420,8 @@ vfs_badlock(const char *msg, const char *str, struct vnode *vp) if (vfs_badlock_backtrace) kdb_backtrace(); #endif + if (vfs_badlock_vnode) + vn_printf(vp, "vnode "); if (vfs_badlock_print) printf("%s: %p %s\n", str, (void *)vp, msg); if (vfs_badlock_ddb) diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index b55336255e90..9f3aa140d7ed 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -44,7 +44,6 @@ # U: unlocked. # -: not applicable. vnode does not yet (or no longer) exists. # =: the same on input and output, may be either L or U. -# X: locked if not nil. # # The paramater named "vpp" is assumed to be always used with double # indirection (**vpp) and that name is hard-coded in vnode_if.awk ! diff --git a/sys/modules/cloudabi/Makefile b/sys/modules/cloudabi/Makefile index bba50362eab1..c608580c7f30 100644 --- a/sys/modules/cloudabi/Makefile +++ b/sys/modules/cloudabi/Makefile @@ -5,6 +5,6 @@ KMOD= cloudabi SRCS= cloudabi_clock.c cloudabi_errno.c cloudabi_fd.c cloudabi_file.c \ cloudabi_futex.c cloudabi_mem.c cloudabi_proc.c cloudabi_random.c \ - cloudabi_sock.c cloudabi_thread.c vnode_if.h + cloudabi_sock.c cloudabi_thread.c cloudabi_vdso.c vnode_if.h .include diff --git a/sys/modules/cloudabi64/Makefile b/sys/modules/cloudabi64/Makefile index 3aee96d97525..b6fe38057ba8 100644 --- a/sys/modules/cloudabi64/Makefile +++ b/sys/modules/cloudabi64/Makefile @@ -1,11 +1,39 @@ # $FreeBSD$ -.PATH: ${.CURDIR}/../../compat/cloudabi64 -.PATH: ${.CURDIR}/../../${MACHINE}/cloudabi64 +SYSDIR?=${.CURDIR}/../.. + +.PATH: ${SYSDIR}/compat/cloudabi64 +.PATH: ${SYSDIR}/${MACHINE}/cloudabi64 KMOD= cloudabi64 SRCS= cloudabi64_fd.c cloudabi64_module.c cloudabi64_poll.c \ cloudabi64_sock.c cloudabi64_syscalls.c cloudabi64_sysent.c \ cloudabi64_sysvec.c cloudabi64_thread.c +OBJS= cloudabi64_vdso_blob.o +CLEANFILES=cloudabi64_vdso.o + +.if ${MACHINE_CPUARCH} == "aarch64" +VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_aarch64.c +OUTPUT_TARGET=elf64-littleaarch64 +BINARY_ARCHITECTURE=aarch64 +.elif ${MACHINE_CPUARCH} == "amd64" +VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_x86_64.c +OUTPUT_TARGET=elf64-x86-64-freebsd +BINARY_ARCHITECTURE=i386 +.endif + +cloudabi64_vdso.o: ${VDSO_SRCS} + ${CC} -shared -nostdinc -nostdlib \ + -Wl,-T${SYSDIR}/compat/cloudabi64/cloudabi64_vdso.lds.s \ + -D_KERNEL -I. -I${SYSDIR} -I${SYSDIR}/contrib/cloudabi \ + -O2 -fomit-frame-pointer \ + ${VDSO_SRCS} -o ${.TARGET} + +cloudabi64_vdso_blob.o: cloudabi64_vdso.o + ${OBJCOPY} --input-target binary \ + --output-target ${OUTPUT_TARGET} \ + --binary-architecture ${BINARY_ARCHITECTURE} \ + cloudabi64_vdso.o ${.TARGET} + .include diff --git a/sys/modules/dtb/allwinner/Makefile b/sys/modules/dtb/allwinner/Makefile index 9dd0799724ae..541e8c044cb7 100644 --- a/sys/modules/dtb/allwinner/Makefile +++ b/sys/modules/dtb/allwinner/Makefile @@ -7,7 +7,7 @@ DTS= \ cubieboard2.dts \ olimex-a20-som-evb.dts \ olinuxino-lime.dts \ - pcduino3b.dts \ + pcduino3.dts \ sinovoip-bpi-m3.dts .include diff --git a/sys/modules/hyperv/vmbus/Makefile b/sys/modules/hyperv/vmbus/Makefile index fe1e7fc9ed48..14eacf5a085e 100644 --- a/sys/modules/hyperv/vmbus/Makefile +++ b/sys/modules/hyperv/vmbus/Makefile @@ -10,7 +10,8 @@ SRCS= hyperv.c \ vmbus.c \ vmbus_br.c \ vmbus_chan.c \ - vmbus_et.c + vmbus_et.c \ + vmbus_xact.c SRCS+= acpi_if.h bus_if.h device_if.h opt_acpi.h vmbus_if.h # XXX: for assym.s diff --git a/sys/net/ifdi_if.m b/sys/net/ifdi_if.m index 60629e46b6cd..301a6ee88f8e 100644 --- a/sys/net/ifdi_if.m +++ b/sys/net/ifdi_if.m @@ -60,9 +60,10 @@ CODE { return (0); } - static void + static int null_queue_intr_enable(if_ctx_t _ctx __unused, uint16_t _qid __unused) { + return (ENOTSUP); } static void @@ -194,7 +195,7 @@ METHOD void intr_disable { if_ctx_t _ctx; }; -METHOD void queue_intr_enable { +METHOD int queue_intr_enable { if_ctx_t _ctx; uint16_t _qid; } DEFAULT null_queue_intr_enable; diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 89409beb9fa4..5938aca90877 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -101,7 +102,6 @@ __FBSDID("$FreeBSD$"); * Enable mbuf vectors for compressing long mbuf chains */ - /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead @@ -181,8 +181,10 @@ struct iflib_ctx { struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; - uint16_t ifc_sysctl_ntxds; - uint16_t ifc_sysctl_nrxds; + uint16_t ifc_sysctl_qs_eq_override; + + uint16_t ifc_sysctl_ntxds[8]; + uint16_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush @@ -294,10 +296,11 @@ typedef struct iflib_sw_tx_desc_array { #define IFLIB_RESTART_BUDGET 8 -#define IFC_LEGACY 0x1 -#define IFC_QFLUSH 0x2 -#define IFC_MULTISEG 0x4 -#define IFC_DMAR 0x8 +#define IFC_LEGACY 0x01 +#define IFC_QFLUSH 0x02 +#define IFC_MULTISEG 0x04 +#define IFC_DMAR 0x08 +#define IFC_SC_ALLOCATED 0x10 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ @@ -311,6 +314,7 @@ struct iflib_txq { uint8_t ift_db_pending; uint8_t ift_db_pending_queued; uint8_t ift_npending; + uint8_t ift_br_offset; /* implicit pad */ uint64_t ift_processed; uint64_t ift_cleaned; @@ -414,6 +418,7 @@ struct iflib_rxq { uint16_t ifr_cq_cidx; uint16_t ifr_cq_pidx; uint8_t ifr_cq_gen; + uint8_t ifr_fl_offset; if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; @@ -604,7 +609,7 @@ static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); -static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx); +static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str); @@ -875,7 +880,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) { nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); - avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i); + avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i, INT_MAX); for (n = 0; avail > 0; n++, avail--) { error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (error) @@ -930,7 +935,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) /* * XXX we should be batching this operation - TODO */ - ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1); + ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1, fl->ifl_buf_size); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map, BUS_DMASYNC_PREREAD); nm_i = nm_next(nm_i, lim); @@ -958,6 +963,7 @@ static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; bzero(&na, sizeof(na)); @@ -966,8 +972,8 @@ iflib_netmap_attach(if_ctx_t ctx) MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); - na.num_tx_desc = ctx->ifc_sctx->isc_ntxd; - na.num_rx_desc = ctx->ifc_sctx->isc_ntxd; + na.num_tx_desc = scctx->isc_ntxd[0]; + na.num_rx_desc = scctx->isc_nrxd[0]; na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; @@ -986,7 +992,7 @@ iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) if (slot == 0) return; - for (int i = 0; i < ctx->ifc_sctx->isc_ntxd; i++) { + for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. @@ -1011,7 +1017,7 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) if (slot == 0) return; sd = rxq->ifr_fl[0].ifl_sds; - nrxd = ctx->ifc_sctx->isc_nrxd; + nrxd = ctx->ifc_softc_ctx.isc_nrxd[0]; for (int i = 0; i < nrxd; i++, sd++) { int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i); uint64_t paddr; @@ -1021,7 +1027,7 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) vaddr = addr = PNMB(na, slot + sj, &paddr); netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, sd->ifsd_map, addr); /* Update descriptor and the cached value */ - ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1); + ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1, rxq->ifr_fl[0].ifl_buf_size); } /* preserve queue */ if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) { @@ -1236,7 +1242,8 @@ iflib_txsd_alloc(iflib_txq_t txq) nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; - MPASS(sctx->isc_ntxd > 0); + MPASS(scctx->isc_ntxd[0] > 0); + MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); MPASS(ntsosegments > 0); /* @@ -1259,13 +1266,11 @@ iflib_txsd_alloc(iflib_txq_t txq) sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize); goto fail; } -#ifdef INVARIANTS +#ifdef IFLIB_DIAGNOSTICS device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n", sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize); + #endif - device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n", - scctx->isc_tx_tso_size_max, ntsosegments, - scctx->isc_tx_tso_segsize_max); if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ @@ -1282,21 +1287,21 @@ iflib_txsd_alloc(iflib_txq_t txq) goto fail; } -#ifdef INVARIANTS +#ifdef IFLIB_DIAGNOSTICS device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n", scctx->isc_tx_tso_size_max, ntsosegments, scctx->isc_tx_tso_segsize_max); #endif if (!(txq->ift_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * - sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) { + scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * - sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) { + scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; @@ -1308,13 +1313,13 @@ iflib_txsd_alloc(iflib_txq_t txq) return (0); if (!(txq->ift_sds.ifsd_map = - (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) { + (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } - for (int i = 0; i < sctx->isc_ntxd; i++) { + for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); @@ -1348,9 +1353,8 @@ static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; - if_shared_ctx_t sctx = ctx->ifc_sctx; - for (int i = 0; i < sctx->isc_ntxd; i++) + for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); @@ -1390,7 +1394,7 @@ iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) bus_dmamap_unload(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i]); } - m_freem(*mp); + m_free(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } @@ -1399,7 +1403,7 @@ static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; - if_shared_ctx_t sctx = ctx->ifc_sctx; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; int i; @@ -1408,7 +1412,7 @@ iflib_txq_setup(iflib_txq_t txq) /* Reset indices */ txq->ift_cidx_processed = txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; - txq->ift_size = sctx->isc_ntxd; + txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); @@ -1433,22 +1437,25 @@ iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; iflib_rxsd_t rxsd; int err; - MPASS(sctx->isc_nrxd > 0); + MPASS(scctx->isc_nrxd[0] > 0); + MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_sds = malloc(sizeof(struct iflib_sw_rx_desc) * - sctx->isc_nrxd, M_IFLIB, M_WAITOK | M_ZERO); + scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, + M_WAITOK | M_ZERO); if (fl->ifl_sds == NULL) { device_printf(dev, "Unable to allocate rx sw desc memory\n"); return (ENOMEM); } - fl->ifl_size = sctx->isc_nrxd; /* this isn't necessarily the same */ + fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ @@ -1468,7 +1475,7 @@ iflib_rxsd_alloc(iflib_rxq_t rxq) } rxsd = fl->ifl_sds; - for (int i = 0; i < sctx->isc_nrxd; i++, rxsd++) { + for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++, rxsd++) { err = bus_dmamap_create(fl->ifl_desc_tag, 0, &rxsd->ifsd_map); if (err) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", @@ -1626,7 +1633,7 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { ctx->isc_rxd_refill(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx, - fl->ifl_bus_addrs, fl->ifl_vm_addrs, i); + fl->ifl_bus_addrs, fl->ifl_vm_addrs, i, fl->ifl_buf_size); i = 0; pidx = fl->ifl_pidx; } @@ -1854,7 +1861,11 @@ iflib_init_locked(if_ctx_t ctx) for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { iflib_netmap_rxq_init(ctx, rxq); } +#ifdef INVARIANTS + i = if_getdrvflags(ifp); +#endif IFDI_INIT(ctx); + MPASS(if_getdrvflags(ifp) == i); for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { @@ -1902,7 +1913,6 @@ iflib_stop(if_ctx_t ctx) iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; - if_shared_ctx_t sctx = ctx->ifc_sctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; @@ -1920,7 +1930,7 @@ iflib_stop(if_ctx_t ctx) /* clean any enqueued buffers */ iflib_txq_check_drain(txq, 0); /* Free any existing tx buffers. */ - for (j = 0; j < sctx->isc_ntxd; j++) { + for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; @@ -1990,13 +2000,24 @@ assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri) caddr_t cl; i = 0; + mh = NULL; do { sd = rxd_frag_to_sd(rxq, &ri->iri_frags[i], &cltype, TRUE); MPASS(sd->ifsd_cl != NULL); MPASS(sd->ifsd_m != NULL); + + /* Don't include zero-length frags */ + if (ri->iri_frags[i].irf_len == 0) { + /* XXX we can save the cluster here, but not the mbuf */ + m_init(sd->ifsd_m, M_NOWAIT, MT_DATA, 0); + m_free(sd->ifsd_m); + sd->ifsd_m = NULL; + continue; + } + m = sd->ifsd_m; - if (i == 0) { + if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; @@ -2019,14 +2040,12 @@ assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri) */ m->m_data += padlen; ri->iri_len -= padlen; - m->m_len = ri->iri_len; + m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } - - /* * Process one software descriptor */ @@ -2037,13 +2056,14 @@ iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) iflib_rxsd_t sd; /* should I merge this back in now that the two paths are basically duplicated? */ - if (ri->iri_len <= IFLIB_RX_COPY_THRESH) { + if (ri->iri_nfrags == 1 && + ri->iri_frags[0].irf_len <= IFLIB_RX_COPY_THRESH) { sd = rxd_frag_to_sd(rxq, &ri->iri_frags[0], NULL, FALSE); m = sd->ifsd_m; sd->ifsd_m = NULL; m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); memcpy(m->m_data, sd->ifsd_cl, ri->iri_len); - m->m_len = ri->iri_len; + m->m_len = ri->iri_frags[0].irf_len; } else { m = assemble_segments(rxq, ri); } @@ -2063,13 +2083,13 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; uint16_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; struct ifnet *ifp; - struct lro_entry *queued; int lro_enabled; /* * XXX early demux data packets so that if_input processing only handles @@ -2084,11 +2104,11 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) mh = mt = NULL; MPASS(budget > 0); rx_pkts = rx_bytes = 0; - if (sctx->isc_flags & IFLIB_HAS_CQ) + if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; - if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp)) == 0) { + if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); DBG_COUNTER_INC(rx_unavail); @@ -2112,10 +2132,11 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) /* in lieu of handling correctly - make sure it isn't being unhandled */ MPASS(err == 0); - if (sctx->isc_flags & IFLIB_HAS_CQ) { - /* we know we consumed _one_ CQ entry */ - if (++rxq->ifr_cq_cidx == sctx->isc_nrxd) { - rxq->ifr_cq_cidx = 0; + if (sctx->isc_flags & IFLIB_HAS_RXCQ) { + *cidxp = ri.iri_cidx; + /* Update our consumer index */ + while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { + rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; rxq->ifr_cq_gen = 0; } /* was this only a completion queue message? */ @@ -2128,7 +2149,7 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); if (avail == 0 && budget_left) - avail = iflib_rxd_avail(ctx, rxq, *cidxp); + avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) { DBG_COUNTER_INC(rx_mbuf_null); @@ -2148,7 +2169,6 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) ifp = ctx->ifc_ifp; lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); - while (mh != NULL) { m = mh; mh = mh->m_nextpkt; @@ -2162,32 +2182,33 @@ iflib_rxeof(iflib_rxq_t rxq, int budget) DBG_COUNTER_INC(rx_if_input); ifp->if_input(ifp, m); } + if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work */ - while ((queued = LIST_FIRST(&rxq->ifr_lc.lro_active)) != NULL) { - LIST_REMOVE(queued, next); #if defined(INET6) || defined(INET) - tcp_lro_flush(&rxq->ifr_lc, queued); + tcp_lro_flush_all(&rxq->ifr_lc); #endif - } - return (iflib_rxd_avail(ctx, rxq, *cidxp)); + if (avail) + return true; + return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) -#define TXQ_MAX_DB_DEFERRED(ctx) (ctx->ifc_sctx->isc_ntxd >> 5) -#define TXQ_MAX_DB_CONSUMED(ctx) (ctx->ifc_sctx->isc_ntxd >> 4) +#define TXQ_MAX_DB_DEFERRED(size) (size >> 5) +#define TXQ_MAX_DB_CONSUMED(size) (size >> 4) static __inline void iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring) { uint32_t dbval; - if (ring || txq->ift_db_pending >= TXQ_MAX_DB_DEFERRED(ctx)) { + if (ring || txq->ift_db_pending >= + TXQ_MAX_DB_DEFERRED(txq->ift_size)) { /* the lock will only ever be contended in the !min_latency case */ if (!TXDB_TRYLOCK(txq)) @@ -2233,9 +2254,9 @@ static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { struct ether_vlan_header *eh; - struct mbuf *m; + struct mbuf *m, *n; - m = *mp; + n = m = *mp; /* * Determine where frame payload starts. * Jump over vlan headers if already present, @@ -2261,7 +2282,6 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { struct ip *ip = NULL; struct tcphdr *th = NULL; - struct mbuf *n; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); @@ -2403,37 +2423,31 @@ collapse_pkthdr(struct mbuf *m0) /* * If dodgy hardware rejects the scatter gather chain we've handed it - * we'll need to rebuild the mbuf chain before we can call m_defrag + * we'll need to remove the mbuf chain from ifsg_m[] before we can add the + * m_defrag'd mbufs */ static __noinline struct mbuf * -iflib_rebuild_mbuf(iflib_txq_t txq) +iflib_remove_mbuf(iflib_txq_t txq) { - - int ntxd, mhlen, len, i, pidx; + int ntxd, i, pidx; struct mbuf *m, *mh, **ifsd_m; - if_shared_ctx_t sctx; pidx = txq->ift_pidx; ifsd_m = txq->ift_sds.ifsd_m; - sctx = txq->ift_ctx->ifc_sctx; - ntxd = sctx->isc_ntxd; + ntxd = txq->ift_size; mh = m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif - len = m->m_len; - mhlen = m->m_pkthdr.len; i = 1; - while (len < mhlen && (m->m_next == NULL)) { - m->m_next = ifsd_m[(pidx + i) & (ntxd-1)]; + while (m) { ifsd_m[(pidx + i) & (ntxd -1)] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif m = m->m_next; - len += m->m_len; i++; } return (mh); @@ -2446,6 +2460,7 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, { if_ctx_t ctx; if_shared_ctx_t sctx; + if_softc_ctx_t scctx; int i, next, pidx, mask, err, maxsegsz, ntxd, count; struct mbuf *m, *tmp, **ifsd_m, **mp; @@ -2459,8 +2474,9 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; + scctx = &ctx->ifc_softc_ctx; ifsd_m = txq->ift_sds.ifsd_m; - ntxd = sctx->isc_ntxd; + ntxd = txq->ift_size; pidx = txq->ift_pidx; if (map != NULL) { uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; @@ -2472,13 +2488,12 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, ifsd_flags[pidx] |= TX_SW_DESC_MAPPED; i = 0; next = pidx; - mask = (sctx->isc_ntxd-1); + mask = (txq->ift_size-1); m = *m0; do { mp = &ifsd_m[next]; *mp = m; m = m->m_next; - (*mp)->m_next = NULL; if (__predict_false((*mp)->m_len == 0)) { m_free(*mp); *mp = NULL; @@ -2529,13 +2544,12 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, count++; tmp = m; m = m->m_next; - tmp->m_next = NULL; } while (m != NULL); *nsegs = i; } return (0); err: - *m0 = iflib_rebuild_mbuf(txq); + *m0 = iflib_remove_mbuf(txq); return (EFBIG); } @@ -2558,7 +2572,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; - ntxd = sctx->isc_ntxd; + ntxd = txq->ift_size; m_head = *m_headp; map = NULL; @@ -2645,14 +2659,14 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) if (map != NULL) bus_dmamap_unload(desc_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); - if (txq->ift_task.gt_task.ta_pending == 0) + if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; - MPASS(pidx >= 0 && pidx < sctx->isc_ntxd); + MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif @@ -2661,11 +2675,12 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); - MPASS(pi.ipi_new_pidx >= 0 && pi.ipi_new_pidx < sctx->isc_ntxd); + MPASS(pi.ipi_new_pidx >= 0 && + pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { - ndesc += sctx->isc_ntxd; + ndesc += txq->ift_size; txq->ift_gen = 1; } MPASS(pi.ipi_new_pidx != pidx); @@ -2678,7 +2693,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else if (__predict_false(err == EFBIG && remap < 2)) { - *m_headp = m_head = iflib_rebuild_mbuf(txq); + *m_headp = m_head = iflib_remove_mbuf(txq); remap = 1; txq->ift_txd_encap_efbig++; goto defrag; @@ -2700,7 +2715,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) -#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NRXQSETS(ctx)) + FIRST_QSET(ctx)) +#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max) @@ -2712,7 +2727,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) * * ORing with 2 assures that min occupancy is never less than 2 without any conditional logic */ -#define TXQ_MIN_OCCUPANCY(ctx) ((ctx->ifc_sctx->isc_ntxd >> 6)| 0x2) +#define TXQ_MIN_OCCUPANCY(size) ((size >> 6)| 0x2) static inline int iflib_txq_min_occupancy(iflib_txq_t txq) @@ -2720,7 +2735,9 @@ iflib_txq_min_occupancy(iflib_txq_t txq) if_ctx_t ctx; ctx = txq->ift_ctx; - return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen) < TXQ_MIN_OCCUPANCY(ctx) + MAX_TX_DESC(ctx)); + return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, + txq->ift_gen) < TXQ_MIN_OCCUPANCY(txq->ift_size) + + MAX_TX_DESC(ctx)); } static void @@ -2734,7 +2751,7 @@ iflib_tx_desc_free(iflib_txq_t txq, int n) cidx = txq->ift_cidx; gen = txq->ift_gen; - qsize = txq->ift_ctx->ifc_sctx->isc_ntxd; + qsize = txq->ift_size; mask = qsize-1; hasmap = txq->ift_sds.ifsd_map != NULL; ifsd_flags = txq->ift_sds.ifsd_flags; @@ -2760,7 +2777,7 @@ iflib_tx_desc_free(iflib_txq_t txq, int n) /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); - m_freem(m); + m_free(m); ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; @@ -2856,7 +2873,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { - m_freem(r->items[(cidx + i) & (r->size-1)]); + m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); @@ -2903,7 +2920,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) break; - if (desc_used > TXQ_MAX_DB_CONSUMED(ctx)) + if (desc_used > TXQ_MAX_DB_CONSUMED(txq->ift_size)) break; } @@ -2924,7 +2941,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) } static void -_task_fn_tx(void *context, int pending) +_task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; @@ -2935,11 +2952,12 @@ _task_fn_tx(void *context, int pending) } static void -_task_fn_rx(void *context, int pending) +_task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; bool more; + int rc; DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) @@ -2950,7 +2968,8 @@ _task_fn_rx(void *context, int pending) IFDI_INTR_ENABLE(ctx); else { DBG_COUNTER_INC(rx_intr_enables); - IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); + rc = IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); + KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) @@ -2960,7 +2979,7 @@ _task_fn_rx(void *context, int pending) } static void -_task_fn_admin(void *context, int pending) +_task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; @@ -2990,7 +3009,7 @@ _task_fn_admin(void *context, int pending) static void -_task_fn_iov(void *context, int pending) +_task_fn_iov(void *context) { if_ctx_t ctx = context; @@ -3049,8 +3068,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; - struct mbuf *marr[8], **mp, *next; - int err, i, count, qidx; + int err, qidx; if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); @@ -3058,6 +3076,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) return (0); } + MPASS(m->m_nextpkt == NULL); qidx = 0; if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) qidx = QIDX(ctx, m); @@ -3077,6 +3096,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) return (ENOBUFS); } #endif +#ifdef notyet qidx = count = 0; mp = marr; next = m; @@ -3098,22 +3118,21 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } +#endif DBG_COUNTER_INC(tx_seen); - err = ifmp_ring_enqueue(txq->ift_br[0], (void **)mp, count, TX_BATCH_SIZE); + err = ifmp_ring_enqueue(txq->ift_br[0], (void **)&m, 1, TX_BATCH_SIZE); - if (iflib_txq_can_drain(txq->ift_br[0])) - GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { + GROUPTASK_ENQUEUE(&txq->ift_task); /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif - for (i = 0; i < count; i++) - m_freem(mp[i]); ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE); + m_freem(m); + } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) { + GROUPTASK_ENQUEUE(&txq->ift_task); } - if (count > nitems(marr)) - free(mp, M_IFLIB); return (err); } @@ -3138,13 +3157,13 @@ iflib_if_qflush(if_t ifp) if_qflush(ifp); } -#define IFCAP_REINIT (IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_TSO6|IFCAP_VLAN_HWTAGGING|IFCAP_VLAN_MTU | \ - IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) -#define IFCAP_FLAGS (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ +#define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) +#define IFCAP_REINIT IFCAP_FLAGS + static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { @@ -3428,6 +3447,9 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; + int i; + uint16_t main_txq; + uint16_t main_rxq; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); @@ -3435,6 +3457,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); + ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; @@ -3447,28 +3470,112 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct return (err); } iflib_add_device_sysctl_pre(ctx); + + scctx = &ctx->ifc_softc_ctx; + /* + * XXX sanity check that ntxd & nrxd are a power of 2 + */ + if (ctx->ifc_sysctl_ntxqs != 0) + scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; + if (ctx->ifc_sysctl_nrxqs != 0) + scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; + + for (i = 0; i < sctx->isc_ntxqs; i++) { + if (ctx->ifc_sysctl_ntxds[i] != 0) + scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; + else + scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; + } + + for (i = 0; i < sctx->isc_nrxqs; i++) { + if (ctx->ifc_sysctl_nrxds[i] != 0) + scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; + else + scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; + } + + for (i = 0; i < sctx->isc_nrxqs; i++) { + if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { + device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", + i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); + scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; + } + if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { + device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", + i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); + scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; + } + } + + for (i = 0; i < sctx->isc_ntxqs; i++) { + if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { + device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", + i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); + scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; + } + if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { + device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", + i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); + scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; + } + } + if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); return (err); } + if (scctx->isc_ntxqsets_max) + scctx->isc_ntxqsets = min(scctx->isc_ntxqsets, scctx->isc_ntxqsets_max); + if (scctx->isc_nrxqsets_max) + scctx->isc_nrxqsets = min(scctx->isc_nrxqsets, scctx->isc_nrxqsets_max); + #ifdef ACPI_DMAR if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL) ctx->ifc_flags |= IFC_DMAR; #endif - scctx = &ctx->ifc_softc_ctx; msix_bar = scctx->isc_msix_bar; - if (scctx->isc_tx_nsegments > sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION) - scctx->isc_tx_nsegments = max(1, sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION); - if (scctx->isc_tx_tso_segments_max > sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION) - scctx->isc_tx_tso_segments_max = max(1, sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION); - ifp = ctx->ifc_ifp; - /* - * XXX sanity check that ntxd & nrxd are a power of 2 - */ + if(sctx->isc_flags & IFLIB_HAS_TXCQ) + main_txq = 1; + else + main_txq = 0; + + if(sctx->isc_flags & IFLIB_HAS_RXCQ) + main_rxq = 1; + else + main_rxq = 0; + + /* XXX change for per-queue sizes */ + device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", + scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); + for (i = 0; i < sctx->isc_nrxqs; i++) { + if (!powerof2(scctx->isc_nrxd[i])) { + /* round down instead? */ + device_printf(dev, "# rx descriptors must be a power of 2\n"); + err = EINVAL; + goto fail; + } + } + for (i = 0; i < sctx->isc_ntxqs; i++) { + if (!powerof2(scctx->isc_ntxd[i])) { + device_printf(dev, + "# tx descriptors must be a power of 2"); + err = EINVAL; + goto fail; + } + } + + if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / + MAX_SINGLE_PACKET_FRACTION) + scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / + MAX_SINGLE_PACKET_FRACTION); + if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / + MAX_SINGLE_PACKET_FRACTION) + scctx->isc_tx_tso_segments_max = max(1, + scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); /* * Protect the stack against modern hardware @@ -3482,7 +3589,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max; if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; - scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;; + scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; /* ** Now setup MSI or MSI/X, should ** return us the number of supported @@ -3520,7 +3627,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct MPASS(msix == 1); rid = 1; } - if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx, &rid, "irq0")) != 0) { + if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_intr_free; } @@ -3536,6 +3643,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct } *ctxp = ctx; + if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); return (0); fail_detach: @@ -3599,7 +3707,7 @@ iflib_device_deregister(if_ctx_t ctx) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; - for (txq = ctx->ifc_txqs, i = 0, rxq = ctx->ifc_rxqs; i < NTXQSETS(ctx); i++, txq++) { + for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); callout_drain(&txq->ift_db_check); if (txq->ift_task.gt_uniq != NULL) @@ -3616,6 +3724,7 @@ iflib_device_deregister(if_ctx_t ctx) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); IFDI_DETACH(ctx); + device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(dev); } @@ -3633,6 +3742,9 @@ iflib_device_deregister(if_ctx_t ctx) iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); + if (ctx->ifc_flags & IFC_SC_ALLOCATED) + free(ctx->ifc_softc, M_IFLIB); + free(ctx, M_IFLIB); return (0); } @@ -3782,7 +3894,13 @@ _iflib_assert(if_shared_ctx_t sctx) MPASS(sctx->isc_txrx->ift_rxd_pkt_get); MPASS(sctx->isc_txrx->ift_rxd_refill); MPASS(sctx->isc_txrx->ift_rxd_flush); - MPASS(sctx->isc_nrxd); + + MPASS(sctx->isc_nrxd_min[0]); + MPASS(sctx->isc_nrxd_max[0]); + MPASS(sctx->isc_nrxd_default[0]); + MPASS(sctx->isc_ntxd_min[0]); + MPASS(sctx->isc_ntxd_max[0]); + MPASS(sctx->isc_ntxd_default[0]); } static int @@ -3796,7 +3914,6 @@ iflib_register(if_ctx_t ctx) _iflib_assert(sctx); CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); - MPASS(ctx->ifc_flags == 0); ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER); if (ifp == NULL) { @@ -3818,7 +3935,6 @@ iflib_register(if_ctx_t ctx) if_setioctlfn(ifp, iflib_if_ioctl); if_settransmitfn(ifp, iflib_if_transmit); if_setqflushfn(ifp, iflib_if_qflush); - if_setgetcounterfn(ifp, iflib_if_get_counter); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setcapabilities(ifp, 0); @@ -3842,16 +3958,17 @@ static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; - int nrxqsets = ctx->ifc_softc_ctx.isc_nrxqsets; - int ntxqsets = ctx->ifc_softc_ctx.isc_ntxqsets; + int nrxqsets = scctx->isc_nrxqsets; + int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; - int i, j, cpu, err, txconf, rxconf, fl_ifdi_offset; + int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; - uint32_t *rxqsizes = sctx->isc_rxqsizes; - uint32_t *txqsizes = sctx->isc_txqsizes; + uint32_t *rxqsizes = scctx->isc_rxqsizes; + uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; @@ -3860,10 +3977,11 @@ iflib_queues_alloc(if_ctx_t ctx) struct ifmp_ring **brscp; int nbuf_rings = 1; /* XXX determine dynamically */ - KASSERT(ntxqs > 0, ("number of queues must be at least 1")); - KASSERT(nrxqs > 0, ("number of queues must be at least 1")); + KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); + KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); brscp = NULL; + txq = NULL; rxq = NULL; /* Allocate the TX ring struct memory */ @@ -3891,8 +4009,6 @@ iflib_queues_alloc(if_ctx_t ctx) ctx->ifc_txqs = txq; ctx->ifc_rxqs = rxq; - txq = NULL; - rxq = NULL; /* * XXX handle allocation failure @@ -3916,6 +4032,11 @@ iflib_queues_alloc(if_ctx_t ctx) } txq->ift_ctx = ctx; txq->ift_id = i; + if (sctx->isc_flags & IFLIB_HAS_TXCQ) { + txq->ift_br_offset = 1; + } else { + txq->ift_br_offset = 0; + } /* XXX fix this */ txq->ift_timer.c_cpu = cpu; txq->ift_db_check.c_cpu = cpu; @@ -3970,10 +4091,10 @@ iflib_queues_alloc(if_ctx_t ctx) } rxq->ifr_ctx = ctx; rxq->ifr_id = i; - if (sctx->isc_flags & IFLIB_HAS_CQ) { - fl_ifdi_offset = 1; + if (sctx->isc_flags & IFLIB_HAS_RXCQ) { + rxq->ifr_fl_offset = 1; } else { - fl_ifdi_offset = 0; + rxq->ifr_fl_offset = 0; } rxq->ifr_nfl = nfree_lists; if (!(fl = @@ -3986,7 +4107,8 @@ iflib_queues_alloc(if_ctx_t ctx) for (j = 0; j < nfree_lists; j++) { rxq->ifr_fl[j].ifl_rxq = rxq; rxq->ifr_fl[j].ifl_id = j; - rxq->ifr_fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + fl_ifdi_offset]; + rxq->ifr_fl[j].ifl_ifdi = + &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; } /* Allocate receive buffers for the ring*/ if (iflib_rxsd_alloc(rxq)) { @@ -4106,12 +4228,13 @@ iflib_rx_structures_setup(if_ctx_t ctx) for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) tcp_lro_free(&rxq->ifr_lc); - if ((err = tcp_lro_init(&rxq->ifr_lc)) != 0) { + if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, + TCP_LRO_ENTRIES, min(1024, + ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } rxq->ifr_lro_enabled = TRUE; - rxq->ifr_lc.ifp = ctx->ifc_ifp; #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } @@ -4142,7 +4265,7 @@ iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; - for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, rxq++) { + for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { iflib_rx_sds_free(rxq); } } @@ -4193,7 +4316,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, struct taskqgroup *tqg; iflib_filter_info_t info; cpuset_t cpus; - task_fn_t *fn; + gtask_fn_t *fn; int tqrid, err; void *q; @@ -4254,7 +4377,7 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void { struct grouptask *gtask; struct taskqgroup *tqg; - task_fn_t *fn; + gtask_fn_t *fn; void *q; switch (type) { @@ -4310,7 +4433,7 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int * iflib_filter_info_t info; struct grouptask *gtask; struct taskqgroup *tqg; - task_fn_t *fn; + gtask_fn_t *fn; int tqrid; void *q; int err; @@ -4385,7 +4508,7 @@ iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) } void -iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, task_fn_t *fn, +iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, char *name) { @@ -4394,14 +4517,21 @@ iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, task_fn_t *fn, } void -iflib_link_state_change(if_ctx_t ctx, int link_state) +iflib_config_gtask_deinit(struct grouptask *gtask) +{ + + taskqgroup_detach(qgroup_if_config_tqg, gtask); +} + +void +iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; -#if 0 + if_setbaudrate(ifp, baudrate); -#endif + /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) @@ -4431,10 +4561,11 @@ iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) } static int -iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx) +iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget) { - return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx)); + return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, + budget)); } void @@ -4468,8 +4599,9 @@ iflib_msix_init(if_ctx_t ctx) int iflib_num_tx_queues, iflib_num_rx_queues; int err, admincnt, bar; - iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; - iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; + iflib_num_tx_queues = scctx->isc_ntxqsets; + iflib_num_rx_queues = scctx->isc_nrxqsets; + bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; /* Override by tuneable */ @@ -4549,18 +4681,31 @@ iflib_msix_init(if_ctx_t ctx) if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif - if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queues) - queues = rx_queues = iflib_num_rx_queues; + if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) + rx_queues = iflib_num_rx_queues; else rx_queues = queues; + /* + * We want this to be all logical CPUs by default + */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else - tx_queues = queues; + tx_queues = mp_ncpus; + + if (ctx->ifc_sysctl_qs_eq_override == 0) { +#ifdef INVARIANTS + if (tx_queues != rx_queues) + device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", + min(rx_queues, tx_queues), min(rx_queues, tx_queues)); +#endif + tx_queues = min(rx_queues, tx_queues); + rx_queues = min(rx_queues, tx_queues); + } device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); - vectors = queues + admincnt; + vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); @@ -4568,6 +4713,7 @@ iflib_msix_init(if_ctx_t ctx) scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; + return (vectors); } else { device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); @@ -4617,7 +4763,58 @@ mp_ring_state_handler(SYSCTL_HANDLER_ARGS) return(rc); } +enum iflib_ndesc_handler { + IFLIB_NTXD_HANDLER, + IFLIB_NRXD_HANDLER, +}; +static int +mp_ndesc_handler(SYSCTL_HANDLER_ARGS) +{ + if_ctx_t ctx = (void *)arg1; + enum iflib_ndesc_handler type = arg2; + char buf[256] = {0}; + uint16_t *ndesc; + char *p, *next; + int nqs, rc, i; + + MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); + + nqs = 8; + switch(type) { + case IFLIB_NTXD_HANDLER: + ndesc = ctx->ifc_sysctl_ntxds; + if (ctx->ifc_sctx) + nqs = ctx->ifc_sctx->isc_ntxqs; + break; + case IFLIB_NRXD_HANDLER: + ndesc = ctx->ifc_sysctl_nrxds; + if (ctx->ifc_sctx) + nqs = ctx->ifc_sctx->isc_nrxqs; + break; + } + if (nqs == 0) + nqs = 8; + + for (i=0; i<8; i++) { + if (i >= nqs) + break; + if (i) + strcat(buf, ","); + sprintf(strchr(buf, 0), "%d", ndesc[i]); + } + + rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (rc || req->newptr == NULL) + return rc; + + for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; + i++, p = strsep(&next, " ,")) { + ndesc[i] = strtoul(p, NULL, 10); + } + + return(rc); +} #define NAME_BUFLEN 32 static void @@ -4634,19 +4831,29 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) CTLFLAG_RD, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); + SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", + CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0, + "driver version"); + SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", - CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, - "# of txqs to use, 0 => use default #"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxds", - CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxds, 0, - "# of tx descriptors to use, 0 => use default #"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxds", - CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxds, 0, - "# of rx descriptors to use, 0 => use default #"); + CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, + "# of rxqs to use, 0 => use default #"); + SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", + CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, + "permit #txq != #rxq"); + /* XXX change for per-queue sizes */ + SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", + CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, + mp_ndesc_handler, "A", + "list of # of tx descriptors to use, 0 = use default #"); + SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", + CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, + mp_ndesc_handler, "A", + "list of # of rx descriptors to use, 0 = use default #"); } static void @@ -4700,7 +4907,7 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, - &txq->ift_mbuf_defrag_failed, "# of times no descriptors were available"); + &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times dma map failed"); @@ -4763,7 +4970,7 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); - if (sctx->isc_flags & IFLIB_HAS_CQ) { + if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", CTLFLAG_RD, &rxq->ifr_cq_pidx, 1, "Producer Index"); diff --git a/sys/net/iflib.h b/sys/net/iflib.h index c301b917897c..cf4a786baa1b 100644 --- a/sys/net/iflib.h +++ b/sys/net/iflib.h @@ -35,6 +35,7 @@ #include #include #include +#include /* @@ -63,12 +64,14 @@ typedef struct if_int_delay_info *if_int_delay_info_t; typedef struct if_rxd_frag { uint8_t irf_flid; uint16_t irf_idx; + uint16_t irf_len; } *if_rxd_frag_t; typedef struct if_rxd_info { /* set by iflib */ uint16_t iri_qsidx; /* qset index */ uint16_t iri_vtag; /* vlan tag - if flag set */ + /* XXX redundant with the new irf_len field */ uint16_t iri_len; /* packet length */ uint16_t iri_cidx; /* consumer index of cq */ struct ifnet *iri_ifp; /* some drivers >1 interface per softc */ @@ -156,10 +159,11 @@ typedef struct if_txrx { void (*ift_txd_flush) (void *, uint16_t, uint32_t); int (*ift_txd_credits_update) (void *, uint16_t, uint32_t, bool); - int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx); + int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx, + int budget); int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri); void (*ift_rxd_refill) (void * , uint16_t qsidx, uint8_t flidx, uint32_t pidx, - uint64_t *paddrs, caddr_t *vaddrs, uint16_t count); + uint64_t *paddrs, caddr_t *vaddrs, uint16_t count, uint16_t buf_size); void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, uint32_t pidx); int (*ift_legacy_intr) (void *); } *if_txrx_t; @@ -170,11 +174,20 @@ typedef struct if_softc_ctx { int isc_ntxqsets; int isc_msix_bar; /* can be model specific - initialize in attach_pre */ int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */ + int isc_ntxd[8]; + int isc_nrxd[8]; + + uint32_t isc_txqsizes[8]; + uint32_t isc_rxqsizes[8]; + int isc_max_txqsets; + int isc_max_rxqsets; int isc_tx_tso_segments_max; int isc_tx_tso_size_max; int isc_tx_tso_segsize_max; int isc_rss_table_size; int isc_rss_table_mask; + int isc_nrxqsets_max; + int isc_ntxqsets_max; iflib_intr_mode_t isc_intr; uint16_t isc_max_frame_size; /* set at init time by driver */ @@ -188,8 +201,6 @@ struct if_shared_ctx { int isc_magic; if_txrx_t isc_txrx; driver_t *isc_driver; - int isc_ntxd; - int isc_nrxd; int isc_nfl; int isc_flags; bus_size_t isc_q_align; @@ -199,14 +210,11 @@ struct if_shared_ctx { bus_size_t isc_rx_maxsegsize; int isc_rx_nsegments; int isc_rx_process_limit; - - - uint32_t isc_txqsizes[8]; int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */ - uint32_t isc_rxqsizes[8]; int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */ int isc_admin_intrcnt; /* # of admin/link interrupts */ + int isc_tx_reclaim_thresh; /* fields necessary for probe */ @@ -215,6 +223,12 @@ struct if_shared_ctx { /* optional function to transform the read values to match the table*/ void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, uint16_t *subdevice_id, uint16_t *rev_id); + int isc_nrxd_min[8]; + int isc_nrxd_default[8]; + int isc_nrxd_max[8]; + int isc_ntxd_min[8]; + int isc_ntxd_default[8]; + int isc_ntxd_max[8]; }; typedef struct iflib_dma_info { @@ -240,9 +254,9 @@ typedef enum { /* - * Interface has a separate command queue + * Interface has a separate command queue for RX */ -#define IFLIB_HAS_CQ 0x1 +#define IFLIB_HAS_RXCQ 0x1 /* * Driver has already allocated vectors */ @@ -252,6 +266,10 @@ typedef enum { * Interface is a virtual function */ #define IFLIB_IS_VF 0x4 +/* + * Interface has a separate command queue for TX + */ +#define IFLIB_HAS_TXCQ 0x8 /* @@ -308,7 +326,10 @@ void iflib_irq_free(if_ctx_t ctx, if_irq_t irq); void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name); void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, - task_fn_t *fn, char *name); + gtask_fn_t *fn, char *name); + +void iflib_config_gtask_deinit(struct grouptask *gtask); + void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid); @@ -317,7 +338,7 @@ void iflib_admin_intr_deferred(if_ctx_t ctx); void iflib_iov_intr_deferred(if_ctx_t ctx); -void iflib_link_state_change(if_ctx_t ctx, int linkstate); +void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate); int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags); void iflib_dma_free(iflib_dma_info_t dma); diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index 6b07d1b3f177..0deac8a44fb5 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -60,6 +60,7 @@ #define IPFW_ARG_MAX 65534 #define IP_FW_TABLEARG 65535 /* Compat value for old clients */ #define IP_FW_TARG 0 /* Current tablearg value */ +#define IP_FW_NAT44_GLOBAL 65535 /* arg1 value for "nat global" */ /* * Number of entries in the call stack of the call/return commands. diff --git a/sys/netinet/sctp_indata.c b/sys/netinet/sctp_indata.c index e5ff349e7322..40112530f4fc 100644 --- a/sys/netinet/sctp_indata.c +++ b/sys/netinet/sctp_indata.c @@ -1747,21 +1747,27 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, * If its a fragmented message, lets see if we can find the control * on the reassembly queues. */ - if ((chtype == SCTP_IDATA) && ((chunk_flags & SCTP_DATA_FIRST_FRAG) == 0) && (fsn == 0)) { + if ((chtype == SCTP_IDATA) && + ((chunk_flags & SCTP_DATA_FIRST_FRAG) == 0) && + (fsn == 0)) { /* * The first *must* be fsn 0, and other (middle/end) pieces - * can *not* be fsn 0. + * can *not* be fsn 0. XXX: This can happen in case of a + * wrap around. Ignore is for now. */ + snprintf(msg, sizeof(msg), "FSN zero for MID=%8.8x, but flags=%2.2x", + msg_id, chunk_flags); goto err_out; } + control = sctp_find_reasm_entry(strm, msg_id, ordered, old_data); + SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags:0x%x look for control on queues %p\n", + chunk_flags, control); if ((chunk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) { /* See if we can find the re-assembly entity */ - control = sctp_find_reasm_entry(strm, msg_id, ordered, old_data); - SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags:0x%x look for control on queues %p\n", - chunk_flags, control); - if (control) { + if (control != NULL) { /* We found something, does it belong? */ if (ordered && (msg_id != control->sinfo_ssn)) { + snprintf(msg, sizeof(msg), "Reassembly problem (MID=%8.8x)", msg_id); err_out: op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg); stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15; @@ -1774,6 +1780,8 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, * We can't have a switched order with an * unordered chunk */ + snprintf(msg, sizeof(msg), "All fragments of a user message must be ordered or unordered (TSN=%8.8x)", + tsn); goto err_out; } if (!ordered && (((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED) == 0)) { @@ -1781,6 +1789,8 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, * We can't have a switched unordered with a * ordered chunk */ + snprintf(msg, sizeof(msg), "All fragments of a user message must be ordered or unordered (TSN=%8.8x)", + tsn); goto err_out; } } @@ -1790,14 +1800,21 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, * re-assembly going on with the same Stream/Seq (for * ordered) or in the same Stream for unordered. */ - SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags:0x%x look for msg in case we have dup\n", - chunk_flags); - if (sctp_find_reasm_entry(strm, msg_id, ordered, old_data)) { - SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags: 0x%x dup detected on msg_id: %u\n", - chunk_flags, - msg_id); - - goto err_out; + if (control != NULL) { + if (ordered || (old_data == 0)) { + SCTPDBG(SCTP_DEBUG_XXX, "chunk_flags: 0x%x dup detected on msg_id: %u\n", + chunk_flags, msg_id); + snprintf(msg, sizeof(msg), "Duplicate MID=%8.8x detected.", msg_id); + goto err_out; + } else { + if ((tsn == control->fsn_included + 1) && + (control->end_added == 0)) { + snprintf(msg, sizeof(msg), "Illegal message sequence, missing end for MID: %8.8x", control->fsn_included); + goto err_out; + } else { + control = NULL; + } + } } } /* now do the tests */ diff --git a/sys/netinet/tcp_fastopen.c b/sys/netinet/tcp_fastopen.c index ec02d18fada1..e2b3b8637f60 100644 --- a/sys/netinet/tcp_fastopen.c +++ b/sys/netinet/tcp_fastopen.c @@ -108,6 +108,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index 6b4a34b20b46..d336479e98dc 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -2508,7 +2508,7 @@ do { \ set_match(args, f_pos, chain); /* Check if this is 'global' nat rule */ - if (cmd->arg1 == 0) { + if (cmd->arg1 == IP_FW_NAT44_GLOBAL) { retval = ipfw_nat_ptr(args, NULL, m); break; } diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index a62b19e4d3da..8031e311e89d 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -530,9 +530,11 @@ import_rule0(struct rule_check_info *ci) /* * Alter opcodes: - * 1) convert tablearg value from 65335 to 0 - * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room for targ). + * 1) convert tablearg value from 65535 to 0 + * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room + * for targ). * 3) convert table number in iface opcodes to u16 + * 4) convert old `nat global` into new 65535 */ l = krule->cmd_len; cmd = krule->cmd; @@ -554,19 +556,21 @@ import_rule0(struct rule_check_info *ci) case O_NETGRAPH: case O_NGTEE: case O_NAT: - if (cmd->arg1 == 65535) + if (cmd->arg1 == IP_FW_TABLEARG) cmd->arg1 = IP_FW_TARG; + else if (cmd->arg1 == 0) + cmd->arg1 = IP_FW_NAT44_GLOBAL; break; case O_SETFIB: case O_SETDSCP: - if (cmd->arg1 == 65535) + if (cmd->arg1 == IP_FW_TABLEARG) cmd->arg1 = IP_FW_TARG; else cmd->arg1 |= 0x8000; break; case O_LIMIT: lcmd = (ipfw_insn_limit *)cmd; - if (lcmd->conn_limit == 65535) + if (lcmd->conn_limit == IP_FW_TABLEARG) lcmd->conn_limit = IP_FW_TARG; break; /* Interface tables */ @@ -612,7 +616,7 @@ export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len) /* * Alter opcodes: - * 1) convert tablearg value from 0 to 65335 + * 1) convert tablearg value from 0 to 65535 * 2) Remove highest bit from O_SETFIB/O_SETDSCP values. * 3) convert table number in iface opcodes to int */ @@ -637,19 +641,21 @@ export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len) case O_NGTEE: case O_NAT: if (cmd->arg1 == IP_FW_TARG) - cmd->arg1 = 65535; + cmd->arg1 = IP_FW_TABLEARG; + else if (cmd->arg1 == IP_FW_NAT44_GLOBAL) + cmd->arg1 = 0; break; case O_SETFIB: case O_SETDSCP: if (cmd->arg1 == IP_FW_TARG) - cmd->arg1 = 65535; + cmd->arg1 = IP_FW_TABLEARG; else cmd->arg1 &= ~0x8000; break; case O_LIMIT: lcmd = (ipfw_insn_limit *)cmd; if (lcmd->conn_limit == IP_FW_TARG) - lcmd->conn_limit = 65535; + lcmd->conn_limit = IP_FW_TABLEARG; break; /* Interface tables */ case O_XMIT: diff --git a/sys/powerpc/include/bus_dma.h b/sys/powerpc/include/bus_dma.h index d89985420f06..e070a9423815 100644 --- a/sys/powerpc/include/bus_dma.h +++ b/sys/powerpc/include/bus_dma.h @@ -30,6 +30,8 @@ #include -int bus_dma_tag_set_iommu(bus_dma_tag_t, device_t iommu, void *cookie); +struct device; + +int bus_dma_tag_set_iommu(bus_dma_tag_t, struct device *iommu, void *cookie); #endif /* _POWERPC_BUS_DMA_H_ */ diff --git a/sys/riscv/conf/GENERIC b/sys/riscv/conf/GENERIC index dd73c3a352ac..6c53da2a051c 100644 --- a/sys/riscv/conf/GENERIC +++ b/sys/riscv/conf/GENERIC @@ -76,7 +76,7 @@ options SMP # Uncomment for memory disk # options MD_ROOT -# options MD_ROOT_SIZE=8192 # 8MB ram disk +# options MD_ROOT_SIZE=32768 # 32MB ram disk # makeoptions MFS_IMAGE=/path/to/img # options ROOTDEVNAME=\"ufs:/dev/md0\" diff --git a/sys/riscv/conf/QEMU b/sys/riscv/conf/QEMU index 26c65430844f..7411aefc6b1d 100644 --- a/sys/riscv/conf/QEMU +++ b/sys/riscv/conf/QEMU @@ -21,8 +21,8 @@ include GENERIC ident QEMU -device htif -options ROOTDEVNAME=\"ufs:/dev/htif_blk0\" +device rcons +options ROOTDEVNAME=\"ufs:/dev/md0\" # RISCVTODO: This needs to be done via loader (when it's available). options FDT_DTB_STATIC diff --git a/sys/riscv/conf/ROCKET b/sys/riscv/conf/ROCKET index 9a9e3efe46e3..1b2b1c08b8dd 100644 --- a/sys/riscv/conf/ROCKET +++ b/sys/riscv/conf/ROCKET @@ -21,8 +21,8 @@ include GENERIC ident ROCKET -device htif -options ROOTDEVNAME=\"ufs:/dev/htif_blk0\" +device rcons +options ROOTDEVNAME=\"ufs:/dev/md0\" # RISCVTODO: This needs to be done via loader (when it's available). options FDT_DTB_STATIC diff --git a/sys/riscv/conf/SPIKE b/sys/riscv/conf/SPIKE index 6c4a7434bb4c..cc36e6f97b00 100644 --- a/sys/riscv/conf/SPIKE +++ b/sys/riscv/conf/SPIKE @@ -21,8 +21,8 @@ include GENERIC ident SPIKE -device htif -options ROOTDEVNAME=\"ufs:/dev/htif_blk0\" +device rcons +options ROOTDEVNAME=\"ufs:/dev/md0\" # RISCVTODO: This needs to be done via loader (when it's available). options FDT_DTB_STATIC diff --git a/sys/riscv/htif/htif.c b/sys/riscv/htif/htif.c deleted file mode 100644 index 9a42db2c427e..000000000000 --- a/sys/riscv/htif/htif.c +++ /dev/null @@ -1,278 +0,0 @@ -/*- - * Copyright (c) 2015-2016 Ruslan Bukin - * All rights reserved. - * - * Portions of this software were developed by SRI International and the - * University of Cambridge Computer Laboratory under DARPA/AFRL contract - * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. - * - * Portions of this software were developed by the University of Cambridge - * Computer Laboratory as part of the CTSRD Project, with support from the - * UK Higher Education Innovation Fund (HEIF). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "htif.h" - -static struct resource_spec htif_spec[] = { - { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE}, - { -1, 0 } -}; - -struct intr_entry { - void (*func) (void *, uint64_t); - void *arg; -}; - -struct intr_entry intrs[HTIF_NDEV]; - -uint64_t -htif_command(uint64_t arg) -{ - - return (machine_command(ECALL_HTIF_CMD, arg)); -} - -int -htif_setup_intr(int id, void *func, void *arg) -{ - - if (id >= HTIF_NDEV) - return (-1); - - intrs[id].func = func; - intrs[id].arg = arg; - - return (0); -} - -static void -htif_handle_entry(struct htif_softc *sc) -{ - uint64_t entry; - uint8_t devcmd; - uint8_t devid; - - entry = machine_command(ECALL_HTIF_GET_ENTRY, 0); - while (entry) { - devid = HTIF_DEV_ID(entry); - devcmd = HTIF_DEV_CMD(entry); - - if (devcmd == HTIF_CMD_IDENTIFY) { - /* Enumeration interrupt */ - if (devid == sc->identify_id) - sc->identify_done = 1; - } else { - /* Device interrupt */ - if (intrs[devid].func != NULL) - intrs[devid].func(intrs[devid].arg, entry); - } - - entry = machine_command(ECALL_HTIF_GET_ENTRY, 0); - } -} - -static int -htif_intr(void *arg) -{ - struct htif_softc *sc; - - sc = arg; - - csr_clear(sip, SIP_SSIP); - - htif_handle_entry(sc); - - return (FILTER_HANDLED); -} - -static int -htif_add_device(struct htif_softc *sc, int i, char *id, char *name) -{ - struct htif_dev_ivars *di; - - di = malloc(sizeof(struct htif_dev_ivars), M_DEVBUF, M_WAITOK | M_ZERO); - di->sc = sc; - di->index = i; - di->id = malloc(HTIF_ID_LEN, M_DEVBUF, M_WAITOK | M_ZERO); - memcpy(di->id, id, HTIF_ID_LEN); - - di->dev = device_add_child(sc->dev, name, -1); - device_set_ivars(di->dev, di); - - return (0); -} - -static int -htif_enumerate(struct htif_softc *sc) -{ - char id[HTIF_ID_LEN] __aligned(HTIF_ALIGN); - uint64_t paddr; - uint64_t data; - uint64_t cmd; - int len; - int i; - - device_printf(sc->dev, "Enumerating devices\n"); - - for (i = 0; i < HTIF_NDEV; i++) { - paddr = pmap_kextract((vm_offset_t)&id); - data = (paddr << IDENTIFY_PADDR_SHIFT); - data |= IDENTIFY_IDENT; - - sc->identify_id = i; - sc->identify_done = 0; - - cmd = i; - cmd <<= HTIF_DEV_ID_SHIFT; - cmd |= (HTIF_CMD_IDENTIFY << HTIF_CMD_SHIFT); - cmd |= data; - - htif_command(cmd); - - len = strnlen(id, sizeof(id)); - if (len <= 0) - break; - - if (bootverbose) - printf(" %d %s\n", i, id); - - if (strncmp(id, "disk", 4) == 0) - htif_add_device(sc, i, id, "htif_blk"); - else if (strncmp(id, "bcd", 3) == 0) - htif_add_device(sc, i, id, "htif_console"); - else if (strncmp(id, "syscall_proxy", 13) == 0) - htif_add_device(sc, i, id, "htif_syscall_proxy"); - } - - return (bus_generic_attach(sc->dev)); -} - -int -htif_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) -{ - struct htif_dev_ivars *ivars; - - ivars = device_get_ivars(child); - - switch (which) { - case HTIF_IVAR_INDEX: - *result = ivars->index; - break; - case HTIF_IVAR_ID: - *result = (uintptr_t)ivars->id; - default: - return (EINVAL); - } - - return (0); -} - -static int -htif_probe(device_t dev) -{ - - if (!ofw_bus_status_okay(dev)) - return (ENXIO); - - if (!ofw_bus_is_compatible(dev, "riscv,htif")) - return (ENXIO); - - device_set_desc(dev, "HTIF bus device"); - return (BUS_PROBE_DEFAULT); -} - -static int -htif_attach(device_t dev) -{ - struct htif_softc *sc; - int error; - - sc = device_get_softc(dev); - sc->dev = dev; - - if (bus_alloc_resources(dev, htif_spec, sc->res)) { - device_printf(dev, "could not allocate resources\n"); - return (ENXIO); - } - - /* Setup IRQs handler */ - error = bus_setup_intr(dev, sc->res[0], INTR_TYPE_CLK, - htif_intr, NULL, sc, &sc->ihl[0]); - if (error) { - device_printf(dev, "Unable to alloc int resource.\n"); - return (ENXIO); - } - - csr_set(sie, SIE_SSIE); - - return (htif_enumerate(sc)); -} - -static device_method_t htif_methods[] = { - DEVMETHOD(device_probe, htif_probe), - DEVMETHOD(device_attach, htif_attach), - - /* Bus interface */ - DEVMETHOD(bus_read_ivar, htif_read_ivar), - - DEVMETHOD_END -}; - -static driver_t htif_driver = { - "htif", - htif_methods, - sizeof(struct htif_softc) -}; - -static devclass_t htif_devclass; - -DRIVER_MODULE(htif, simplebus, htif_driver, - htif_devclass, 0, 0); diff --git a/sys/riscv/htif/htif_block.c b/sys/riscv/htif/htif_block.c deleted file mode 100644 index 93662d825c1e..000000000000 --- a/sys/riscv/htif/htif_block.c +++ /dev/null @@ -1,299 +0,0 @@ -/*- - * Copyright (c) 2015-2016 Ruslan Bukin - * All rights reserved. - * - * Portions of this software were developed by SRI International and the - * University of Cambridge Computer Laboratory under DARPA/AFRL contract - * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. - * - * Portions of this software were developed by the University of Cambridge - * Computer Laboratory as part of the CTSRD Project, with support from the - * UK Higher Education Innovation Fund (HEIF). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include "htif.h" - -#define SECTOR_SIZE_SHIFT (9) -#define SECTOR_SIZE (1 << SECTOR_SIZE_SHIFT) - -#define HTIF_BLK_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) -#define HTIF_BLK_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) -#define HTIF_BLK_LOCK_INIT(_sc) \ - mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ - "htif_blk", MTX_DEF) -#define HTIF_BLK_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); -#define HTIF_BLK_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); -#define HTIF_BLK_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); - -static void htif_blk_task(void *arg); - -static disk_open_t htif_blk_open; -static disk_close_t htif_blk_close; -static disk_strategy_t htif_blk_strategy; - -struct htif_blk_softc { - device_t dev; - struct disk *disk; - struct mtx htif_io_mtx; - struct mtx sc_mtx; - struct proc *p; - struct bio_queue_head bio_queue; - int running; - int intr_chan; - int cmd_done; - int index; - uint16_t curtag; -}; - -struct htif_blk_request { - uint64_t addr; - uint64_t offset; /* offset in bytes */ - uint64_t size; /* length in bytes */ - uint64_t tag; -}; - -static void -htif_blk_intr(void *arg, uint64_t entry) -{ - struct htif_blk_softc *sc; - uint64_t devcmd; - uint64_t data; - - sc = arg; - - devcmd = HTIF_DEV_CMD(entry); - data = HTIF_DEV_DATA(entry); - - if (sc->curtag == data) { - wmb(); - sc->cmd_done = 1; - wakeup(&sc->intr_chan); - } else { - device_printf(sc->dev, "Unexpected tag %d (should be %d)\n", - data, sc->curtag); - } -} - -static int -htif_blk_probe(device_t dev) -{ - - return (0); -} - -static int -htif_blk_attach(device_t dev) -{ - struct htif_blk_softc *sc; - char prefix[] = " size="; - char *str; - long size; - - sc = device_get_softc(dev); - sc->dev = dev; - - mtx_init(&sc->htif_io_mtx, device_get_nameunit(dev), "htif_blk", MTX_DEF); - HTIF_BLK_LOCK_INIT(sc); - - str = strstr(htif_get_id(dev), prefix); - - size = strtol((str + 6), NULL, 10); - if (size == 0) { - return (ENXIO); - } - - sc->index = htif_get_index(dev); - if (sc->index < 0) - return (EINVAL); - htif_setup_intr(sc->index, htif_blk_intr, sc); - - sc->disk = disk_alloc(); - sc->disk->d_drv1 = sc; - - sc->disk->d_maxsize = 4096; /* Max transfer */ - sc->disk->d_name = "htif_blk"; - sc->disk->d_open = htif_blk_open; - sc->disk->d_close = htif_blk_close; - sc->disk->d_strategy = htif_blk_strategy; - sc->disk->d_unit = 0; - sc->disk->d_sectorsize = SECTOR_SIZE; - sc->disk->d_mediasize = size; - disk_create(sc->disk, DISK_VERSION); - - bioq_init(&sc->bio_queue); - - sc->running = 1; - - kproc_create(&htif_blk_task, sc, &sc->p, 0, 0, "%s: transfer", - device_get_nameunit(dev)); - - return (0); -} - -static int -htif_blk_open(struct disk *dp) -{ - - return (0); -} - -static int -htif_blk_close(struct disk *dp) -{ - - return (0); -} - -static void -htif_blk_task(void *arg) -{ - struct htif_blk_request req __aligned(HTIF_ALIGN); - struct htif_blk_softc *sc; - uint64_t req_paddr; - struct bio *bp; - uint64_t paddr; - uint64_t resp; - uint64_t cmd; - int i; - - sc = (struct htif_blk_softc *)arg; - - while (1) { - HTIF_BLK_LOCK(sc); - do { - bp = bioq_takefirst(&sc->bio_queue); - if (bp == NULL) - msleep(sc, &sc->sc_mtx, PRIBIO, "jobqueue", 0); - } while (bp == NULL); - HTIF_BLK_UNLOCK(sc); - - if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { - HTIF_BLK_LOCK(sc); - - rmb(); - req.offset = (bp->bio_pblkno * sc->disk->d_sectorsize); - req.size = bp->bio_bcount; - paddr = vtophys(bp->bio_data); - KASSERT(paddr != 0, ("paddr is 0")); - req.addr = paddr; - sc->curtag++; - req.tag = sc->curtag; - - cmd = sc->index; - cmd <<= HTIF_DEV_ID_SHIFT; - if (bp->bio_cmd == BIO_READ) - cmd |= (HTIF_CMD_READ << HTIF_CMD_SHIFT); - else - cmd |= (HTIF_CMD_WRITE << HTIF_CMD_SHIFT); - req_paddr = vtophys(&req); - KASSERT(req_paddr != 0, ("req_paddr is 0")); - cmd |= req_paddr; - - sc->cmd_done = 0; - resp = htif_command(cmd); - htif_blk_intr(sc, resp); - - /* Wait for interrupt */ - i = 0; - while (sc->cmd_done == 0) { - msleep(&sc->intr_chan, &sc->sc_mtx, PRIBIO, "intr", hz/2); - - if (i++ > 2) { - /* TODO: try to re-issue operation on timeout ? */ - bp->bio_error = EIO; - bp->bio_flags |= BIO_ERROR; - disk_err(bp, "hard error", -1, 1); - break; - } - } - HTIF_BLK_UNLOCK(sc); - - biodone(bp); - } else { - printf("unknown op %d\n", bp->bio_cmd); - } - } -} - -static void -htif_blk_strategy(struct bio *bp) -{ - struct htif_blk_softc *sc; - - sc = bp->bio_disk->d_drv1; - - HTIF_BLK_LOCK(sc); - if (sc->running > 0) { - bioq_disksort(&sc->bio_queue, bp); - HTIF_BLK_UNLOCK(sc); - wakeup(sc); - } else { - HTIF_BLK_UNLOCK(sc); - biofinish(bp, NULL, ENXIO); - } -} - -static device_method_t htif_blk_methods[] = { - DEVMETHOD(device_probe, htif_blk_probe), - DEVMETHOD(device_attach, htif_blk_attach), -}; - -static driver_t htif_blk_driver = { - "htif_blk", - htif_blk_methods, - sizeof(struct htif_blk_softc) -}; - -static devclass_t htif_blk_devclass; - -DRIVER_MODULE(htif_blk, htif, htif_blk_driver, htif_blk_devclass, 0, 0); diff --git a/sys/riscv/include/cpufunc.h b/sys/riscv/include/cpufunc.h index 2db73ede884f..6cac649b4e95 100644 --- a/sys/riscv/include/cpufunc.h +++ b/sys/riscv/include/cpufunc.h @@ -81,21 +81,6 @@ intr_enable(void) ); } -static __inline register_t -machine_command(uint64_t cmd, uint64_t arg) -{ - uint64_t res; - - __asm __volatile( - "mv t5, %2\n" - "mv t6, %1\n" - "ecall\n" - "mv %0, t6" : "=&r"(res) : "r"(arg), "r"(cmd) - ); - - return (res); -} - #define cpu_nullop() riscv_nullop() #define cpufunc_nullop() riscv_nullop() #define cpu_setttb(a) riscv_setttb(a) diff --git a/sys/riscv/include/pcpu.h b/sys/riscv/include/pcpu.h index d72b11c012e1..ea5336029d68 100644 --- a/sys/riscv/include/pcpu.h +++ b/sys/riscv/include/pcpu.h @@ -46,8 +46,7 @@ #define PCPU_MD_FIELDS \ uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \ - uint64_t pc_reg; /* CPU MMIO base (PA) */ \ - char __pad[117] + char __pad[125] #ifdef _KERNEL diff --git a/sys/riscv/include/riscvreg.h b/sys/riscv/include/riscvreg.h index 8c6b4dce2ebd..2e3a4a2e9f5d 100644 --- a/sys/riscv/include/riscvreg.h +++ b/sys/riscv/include/riscvreg.h @@ -37,19 +37,6 @@ #ifndef _MACHINE_RISCVREG_H_ #define _MACHINE_RISCVREG_H_ -/* Machine mode requests */ -#define ECALL_MTIMECMP 0x01 -#define ECALL_HTIF_GET_ENTRY 0x02 -#define ECALL_MCPUID_GET 0x03 -#define ECALL_MIMPID_GET 0x04 -#define ECALL_SEND_IPI 0x05 -#define ECALL_CLEAR_IPI 0x06 -#define ECALL_MIE_SET 0x07 -#define ECALL_IO_IRQ_MASK 0x08 -#define ECALL_HTIF_CMD 0x09 -#define ECALL_HTIF_CMD_REQ 0x0a -#define ECALL_HTIF_CMD_RESP 0x0b - #define EXCP_SHIFT 0 #define EXCP_MASK (0xf << EXCP_SHIFT) #define EXCP_MISALIGNED_FETCH 0 @@ -65,9 +52,6 @@ #define EXCP_HYPERVISOR_ECALL 10 #define EXCP_MACHINE_ECALL 11 #define EXCP_INTR (1ul << 63) -#define EXCP_INTR_SOFTWARE 0 -#define EXCP_INTR_TIMER 1 -#define EXCP_INTR_HTIF 2 #define SSTATUS_UIE (1 << 0) #define SSTATUS_SIE (1 << 1) diff --git a/sys/riscv/htif/htif.h b/sys/riscv/include/sbi.h similarity index 53% rename from sys/riscv/htif/htif.h rename to sys/riscv/include/sbi.h index a1183d97c08e..76690ab84b08 100644 --- a/sys/riscv/htif/htif.h +++ b/sys/riscv/include/sbi.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the @@ -34,60 +34,32 @@ * $FreeBSD$ */ -#define HTIF_DEV_ID_SHIFT (56) -#define HTIF_DEV_ID_MASK (0xfful << HTIF_DEV_ID_SHIFT) -#define HTIF_CMD_SHIFT (48) -#define HTIF_CMD_MASK (0xfful << HTIF_CMD_SHIFT) -#define HTIF_DATA_SHIFT (0) -#define HTIF_DATA_MASK (0xffffffff << HTIF_DATA_SHIFT) +#ifndef _MACHINE_SBI_H_ +#define _MACHINE_SBI_H_ -#define HTIF_CMD_READ (0x00ul) -#define HTIF_CMD_WRITE (0x01ul) -#define HTIF_CMD_READ_CONTROL_REG (0x02ul) -#define HTIF_CMD_WRITE_CONTROL_REG (0x03ul) -#define HTIF_CMD_IDENTIFY (0xfful) -#define IDENTIFY_PADDR_SHIFT 8 -#define IDENTIFY_IDENT 0xff +typedef struct { + uint64_t base; + uint64_t size; + uint64_t node_id; +} memory_block_info; -#define HTIF_NDEV (256) -#define HTIF_ID_LEN (64) -#define HTIF_ALIGN (64) +uint64_t sbi_query_memory(uint64_t id, memory_block_info *p); +uint64_t sbi_hart_id(void); +uint64_t sbi_num_harts(void); +uint64_t sbi_timebase(void); +void sbi_set_timer(uint64_t stime_value); +void sbi_send_ipi(uint64_t hart_id); +uint64_t sbi_clear_ipi(void); +void sbi_shutdown(void); -#define HTIF_DEV_CMD(entry) ((entry & HTIF_CMD_MASK) >> HTIF_CMD_SHIFT) -#define HTIF_DEV_ID(entry) ((entry & HTIF_DEV_ID_MASK) >> HTIF_DEV_ID_SHIFT) -#define HTIF_DEV_DATA(entry) ((entry & HTIF_DATA_MASK) >> HTIF_DATA_SHIFT) +void sbi_console_putchar(unsigned char ch); +int sbi_console_getchar(void); -/* bus softc */ -struct htif_softc { - struct resource *res[1]; - void *ihl[1]; - device_t dev; - uint64_t identify_id; - uint64_t identify_done; -}; +void sbi_remote_sfence_vm(uint64_t hart_mask_ptr, uint64_t asid); +void sbi_remote_sfence_vm_range(uint64_t hart_mask_ptr, uint64_t asid, uint64_t start, uint64_t size); +void sbi_remote_fence_i(uint64_t hart_mask_ptr); -/* device private data */ -struct htif_dev_ivars { - char *id; - int index; - device_t dev; - struct htif_softc *sc; -}; +uint64_t sbi_mask_interrupt(uint64_t which); +uint64_t sbi_unmask_interrupt(uint64_t which); -uint64_t htif_command(uint64_t); -int htif_setup_intr(int id, void *func, void *arg); -int htif_read_ivar(device_t dev, device_t child, int which, uintptr_t *result); - -enum htif_device_ivars { - HTIF_IVAR_INDEX, - HTIF_IVAR_ID, -}; - -/* - * Simplified accessors for HTIF devices - */ -#define HTIF_ACCESSOR(var, ivar, type) \ - __BUS_ACCESSOR(htif, var, HTIF, ivar, type) - -HTIF_ACCESSOR(index, INDEX, int); -HTIF_ACCESSOR(id, ID, char *); +#endif /* !_MACHINE_SBI_H_ */ diff --git a/sys/riscv/include/vmparam.h b/sys/riscv/include/vmparam.h index 4e1cdcfd6e52..7ecd884f9da0 100644 --- a/sys/riscv/include/vmparam.h +++ b/sys/riscv/include/vmparam.h @@ -156,26 +156,26 @@ #define VM_MIN_KERNEL_ADDRESS (0xffffffc000000000UL) #define VM_MAX_KERNEL_ADDRESS (0xffffffc800000000UL) -/* Direct Map for 128 GiB of PA: 0x0 - 0x1fffffffff */ +/* 128 GiB maximum for the direct map region */ #define DMAP_MIN_ADDRESS (0xffffffd000000000UL) -#define DMAP_MAX_ADDRESS (0xffffffefffffffffUL) +#define DMAP_MAX_ADDRESS (0xfffffff000000000UL) -#define DMAP_MIN_PHYSADDR (0x0000000000000000UL) -#define DMAP_MAX_PHYSADDR (DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) +#define DMAP_MIN_PHYSADDR (dmap_phys_base) +#define DMAP_MAX_PHYSADDR (dmap_phys_max) /* True if pa is in the dmap range */ #define PHYS_IN_DMAP(pa) ((pa) >= DMAP_MIN_PHYSADDR && \ - (pa) <= DMAP_MAX_PHYSADDR) + (pa) < DMAP_MAX_PHYSADDR) /* True if va is in the dmap range */ #define VIRT_IN_DMAP(va) ((va) >= DMAP_MIN_ADDRESS && \ - (va) <= DMAP_MAX_ADDRESS) + (va) < (dmap_max_addr)) #define PHYS_TO_DMAP(pa) \ ({ \ KASSERT(PHYS_IN_DMAP(pa), \ ("%s: PA out of range, PA: 0x%lx", __func__, \ (vm_paddr_t)(pa))); \ - (pa) | DMAP_MIN_ADDRESS; \ + ((pa) - dmap_phys_base) + DMAP_MIN_ADDRESS; \ }) #define DMAP_TO_PHYS(va) \ @@ -183,7 +183,7 @@ KASSERT(VIRT_IN_DMAP(va), \ ("%s: VA out of range, VA: 0x%lx", __func__, \ (vm_offset_t)(va))); \ - (va) & ~DMAP_MIN_ADDRESS; \ + ((va) - DMAP_MIN_ADDRESS) + dmap_phys_base; \ }) #define VM_MIN_USER_ADDRESS (0x0000000000000000UL) @@ -196,7 +196,7 @@ #define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) #define USRSTACK SHAREDPAGE -#define KERNENTRY (0x80000000) +#define KERNENTRY (0) /* * How many physical pages per kmem arena virtual page. @@ -233,9 +233,14 @@ * #define UMA_MD_SMALL_ALLOC */ +#ifndef LOCORE +extern vm_paddr_t dmap_phys_base; +extern vm_paddr_t dmap_phys_max; +extern vm_offset_t dmap_max_addr; extern u_int tsb_kernel_ldd_phys; extern vm_offset_t vm_max_kernel_address; extern vm_offset_t init_pt_va; +#endif #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ diff --git a/sys/riscv/riscv/exception.S b/sys/riscv/riscv/exception.S index 5b8ef7a7ae49..d5efd732ca65 100644 --- a/sys/riscv/riscv/exception.S +++ b/sys/riscv/riscv/exception.S @@ -235,388 +235,3 @@ ENTRY(cpu_exception_handler_user) csrrw sp, sscratch, sp sret END(cpu_exception_handler_user) - -/* - * Trap handlers - */ - .text -bad_trap: - j bad_trap - -machine_trap: - /* Save state */ - csrrw sp, mscratch, sp - addi sp, sp, -64 - sd t0, (8 * 0)(sp) - sd t1, (8 * 1)(sp) - sd t2, (8 * 2)(sp) - sd t3, (8 * 3)(sp) - sd t4, (8 * 4)(sp) - sd t5, (8 * 5)(sp) - sd a0, (8 * 7)(sp) - - csrr t3, mstatus /* Required for debug */ - csrr t0, mcause - bltz t0, machine_interrupt - - li t1, EXCP_SUPERVISOR_ECALL - beq t0, t1, supervisor_call -4: - /* NOT REACHED */ - j 4b - -machine_interrupt: - /* Type of interrupt ? */ - csrr t0, mcause - andi t0, t0, EXCP_MASK -#if 0 - /* lowRISC TODO */ - li t1, 4 - beq t1, t0, io_interrupt /* lowRISC only */ -#endif - li t1, 1 - beq t1, t0, supervisor_software_interrupt - li t1, 3 - beq t1, t0, machine_software_interrupt - li t1, 5 - beq t1, t0, supervisor_timer_interrupt - li t1, 7 - beq t1, t0, machine_timer_interrupt - - /* NOT REACHED */ -1: - j 1b - -#if 0 - /* lowRISC TODO */ -io_interrupt: - /* Disable IO interrupts so we can go to supervisor mode */ - csrwi CSR_IO_IRQ, 0 - - /* Handle the trap in supervisor mode */ - j exit_mrts -#endif - -supervisor_software_interrupt: -1: - /* Nothing here as we are using mideleg feature */ - j 1b - -machine_software_interrupt: - /* Clear IPI */ - li t0, 0x40001000 - csrr t2, mhartid - li t3, 0x1000 - mul t2, t2, t3 - add t0, t0, t2 - li t2, 0 - sd t2, 0(t0) - - /* Clear machine software pending bit */ - li t0, MIP_MSIP - csrc mip, t0 - - /* Post supervisor software interrupt */ - li t0, MIP_SSIP - csrs mip, t0 - - j exit - -supervisor_timer_interrupt: -1: - /* Nothing here as we are using mideleg feature */ - j 1b - -machine_timer_interrupt: - /* Disable machine timer interrupts */ - li t0, MIE_MTIE - csrc mie, t0 - - /* Clear machine timer interrupt pending */ - li t0, MIP_MTIP - csrc mip, t0 - - /* Post supervisor timer interrupt */ - li t0, MIP_STIP - csrs mip, t0 - - /* - * Check for HTIF interrupts. - * The only interrupt expected here is key press. - */ - la t0, htif_lock - li t2, 1 - amoswap.d t3, t2, 0(t0) - bnez t3, 5f /* Another operation in progress, give up */ - - /* We have lock */ - la t1, fromhost - ld t5, 0(t1) - beqz t5, 4f - - /* Console GET intr ? */ - mv t1, t5 - li t0, 0x100 - srli t1, t1, 48 - beq t1, t0, 2f -1: - /* There is no interrupts except keypress */ - j 1b - -2: - /* Save entry */ - la t0, htif_ring - li t4, (HTIF_RING_SIZE) - add t0, t0, t4 /* t0 == htif_ring_cursor */ - - ld t1, 0(t0) /* load ptr to cursor */ - sd t5, 0(t1) /* put entry */ - li t4, 1 - sd t4, 8(t1) /* mark used */ - ld t4, 16(t1) /* take next */ - /* Update cursor */ - sd t4, 0(t0) - - /* Post supervisor software interrupt */ - li t0, MIP_SSIP - csrs mip, t0 - -3: - la t1, fromhost - li t5, 0 - sd t5, 0(t1) - -4: - /* Release lock */ - la t0, htif_lock - li t2, 0 - amoswap.d t3, t2, 0(t0) - -5: - j exit - -supervisor_call: - csrr t1, mepc - addi t1, t1, 4 /* Next instruction in t1 */ - li t4, ECALL_HTIF_CMD - beq t5, t4, htif_cmd - li t4, ECALL_HTIF_CMD_REQ - beq t5, t4, htif_cmd_req - li t4, ECALL_HTIF_CMD_RESP - beq t5, t4, htif_cmd_resp - li t4, ECALL_HTIF_GET_ENTRY - beq t5, t4, htif_get_entry - li t4, ECALL_MTIMECMP - beq t5, t4, set_mtimecmp - li t4, ECALL_MCPUID_GET - beq t5, t4, mcpuid_get - li t4, ECALL_MIMPID_GET - beq t5, t4, mimpid_get - li t4, ECALL_SEND_IPI - beq t5, t4, send_ipi - li t4, ECALL_CLEAR_IPI - beq t5, t4, clear_ipi - li t4, ECALL_MIE_SET - beq t5, t4, mie_set -#if 0 - /* lowRISC TODO */ - li t4, ECALL_IO_IRQ_MASK - beq t5, t4, io_irq_mask -#endif - j exit_next_instr - -#if 0 - /* lowRISC TODO */ -io_irq_mask: - csrw CSR_IO_IRQ, t6 - j exit_next_instr -#endif - -mie_set: - csrs mie, t6 - j exit_next_instr - -mcpuid_get: - csrr t6, misa - j exit_next_instr - -mimpid_get: - csrr t6, mimpid - j exit_next_instr - -send_ipi: - /* CPU ipi MMIO register in t6 */ - mv t0, t6 - li t2, 1 - sd t2, 0(t0) - j exit_next_instr - -clear_ipi: - /* Do only clear if there are no new entries in HTIF ring */ - la t0, htif_ring - li t4, (HTIF_RING_SIZE) - add t0, t0, t4 /* t0 == ptr to htif_ring_cursor */ - ld t2, 8(t0) /* load htif_ring_last */ - ld t2, 8(t2) /* load used */ - bnez t2, 1f - - /* Clear supervisor software interrupt pending bit */ - li t0, MIP_SSIP - csrc mip, t0 - -1: - j exit_next_instr - -htif_get_entry: - /* Get a htif_ring for current core */ - la t0, htif_ring - li t4, (HTIF_RING_SIZE + 8) - add t0, t0, t4 /* t0 == htif_ring_last */ - - /* Check for new entries */ - li t6, 0 /* preset return value */ - ld t2, 0(t0) /* load ptr to last */ - ld t4, 8(t2) /* get used */ - beqz t4, 1f /* No new entries. Exit */ - - /* Get one */ - ld t6, 0(t2) /* get entry */ - li t4, 0 - sd t4, 8(t2) /* mark free */ - sd t4, 0(t2) /* free entry, just in case */ - ld t4, 16(t2) /* take next */ - sd t4, 0(t0) /* update ptr to last */ -1: - /* Exit. Result is stored in t6 */ - j exit_next_instr - -htif_cmd_resp: - la t0, htif_lock - li t2, 1 -1: - amoswap.d t3, t2, 0(t0) - bnez t3, 1b - - /* We have lock. Read for data */ - la t4, fromhost - ld t6, 0(t4) - beqz t6, 2f - - /* Clear event */ - li t5, 0 - sd t5, 0(t4) - -2: - /* Release lock */ - la t0, htif_lock - li t2, 0 - amoswap.d t3, t2, 0(t0) - - j exit_next_instr - -htif_cmd_req: - la t0, htif_lock - li t2, 1 -1: - amoswap.d t3, t2, 0(t0) - bnez t3, 1b - - /* We have lock. Store new request */ - la t4, tohost - sd t6, 0(t4) - - /* Release lock */ - la t0, htif_lock - li t2, 0 - amoswap.d t3, t2, 0(t0) - - j exit_next_instr - -htif_cmd: - la t0, htif_lock - li t2, 1 -1: - amoswap.d t3, t2, 0(t0) - bnez t3, 1b - - mv t3, t6 - - /* We have lock. Store new request */ - la t4, tohost - sd t6, 0(t4) -2: - /* Poll for result */ - la t4, fromhost - ld t6, 0(t4) - beqz t6, 2b - - /* Check for unexpected event */ - srli t0, t6, 48 - srli t2, t3, 48 - beq t2, t0, 3f - - /* - * We have something unexpected (e.g. keyboard keypress) - * Save entry. - */ - la t0, htif_ring - li t4, (HTIF_RING_SIZE) - add t0, t0, t4 /* t0 == htif_ring_cursor */ - - ld t2, 0(t0) /* load ptr to cursor */ - sd t6, 0(t2) /* put entry */ - li t4, 1 - sd t4, 8(t2) /* mark used */ - ld t4, 16(t2) /* take next */ - /* Update cursor */ - sd t4, 0(t0) - - /* Post supervisor software interrupt */ - li t0, MIP_SSIP - csrs mip, t0 - - /* Clear and look for response again */ - la t2, fromhost - li t5, 0 - sd t5, 0(t2) - j 2b - -3: - la t2, fromhost - li t5, 0 - sd t5, 0(t2) - - /* Release lock */ - la t0, htif_lock - li t2, 0 - amoswap.d t3, t2, 0(t0) - - j exit_next_instr - -set_mtimecmp: - /* Enable interrupts */ - li t0, (MIE_MTIE | MIE_STIE) - csrs mie, t0 - j exit_next_instr - -/* - * Trap exit functions - */ -exit_next_instr: - /* Next instruction is in t1 */ - csrw mepc, t1 -exit: - /* Restore state */ - ld t0, (8 * 0)(sp) - ld t1, (8 * 1)(sp) - ld t2, (8 * 2)(sp) - ld t3, (8 * 3)(sp) - ld t4, (8 * 4)(sp) - ld t5, (8 * 5)(sp) - ld a0, (8 * 7)(sp) - addi sp, sp, 64 - csrrw sp, mscratch, sp - mret - -exit_mrts: - j exit_mrts diff --git a/sys/riscv/riscv/genassym.c b/sys/riscv/riscv/genassym.c index c6c8b8518fa5..c9e9bc1c991a 100644 --- a/sys/riscv/riscv/genassym.c +++ b/sys/riscv/riscv/genassym.c @@ -57,7 +57,6 @@ __FBSDID("$FreeBSD$"); #include ASSYM(KERNBASE, KERNBASE); -ASSYM(KERNENTRY, KERNENTRY); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); diff --git a/sys/riscv/riscv/identcpu.c b/sys/riscv/riscv/identcpu.c index 8226423b2fc1..94ad889e0ed9 100644 --- a/sys/riscv/riscv/identcpu.c +++ b/sys/riscv/riscv/identcpu.c @@ -101,8 +101,9 @@ identify_cpu(void) cpu_partsp = NULL; - mimpid = machine_command(ECALL_MIMPID_GET, 0); - misa = machine_command(ECALL_MCPUID_GET, 0); + /* TODO: can we get mimpid and misa somewhere ? */ + mimpid = 0; + misa = 0; cpu = PCPU_GET(cpuid); diff --git a/sys/riscv/riscv/intr_machdep.c b/sys/riscv/riscv/intr_machdep.c index 3dd988f712b3..879cea710e7e 100644 --- a/sys/riscv/riscv/intr_machdep.c +++ b/sys/riscv/riscv/intr_machdep.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #ifdef SMP #include @@ -267,7 +268,7 @@ ipi_send(struct pcpu *pc, int ipi) CTR3(KTR_SMP, "%s: cpu=%d, ipi=%x", __func__, pc->pc_cpuid, ipi); atomic_set_32(&pc->pc_pending_ipis, ipi); - machine_command(ECALL_SEND_IPI, pc->pc_reg); + sbi_send_ipi(pc->pc_cpuid); CTR1(KTR_SMP, "%s: sent", __func__); } diff --git a/sys/riscv/riscv/locore.S b/sys/riscv/riscv/locore.S index 1783fb908171..2b36e2e8aeb8 100644 --- a/sys/riscv/riscv/locore.S +++ b/sys/riscv/riscv/locore.S @@ -43,118 +43,47 @@ #include #include -#define HTIF_RING_NENTRIES (512) -#define HTIF_RING_ENTRY_SZ (24) -#define HTIF_RING_SIZE (HTIF_RING_ENTRY_SZ * HTIF_RING_NENTRIES) -#define HW_STACK_SIZE (96) - -/* - * Event queue: - * - * struct htif_ring { - * uint64_t data; - * uint64_t used; - * uint64_t next; - * } htif_ring[HTIF_RING_NENTRIES]; - * uint64_t htif_ring_cursor; - * uint64_t htif_ring_last; - */ - -.macro build_ring - la t0, htif_ring - li t1, 0 - sd t1, 0(t0) /* zero data */ - sd t1, 8(t0) /* zero used */ - mv t2, t0 - mv t3, t0 - li t5, (HTIF_RING_SIZE) - li t6, 0 - add t4, t0, t5 -1: - addi t3, t3, HTIF_RING_ENTRY_SZ /* pointer to next */ - beq t3, t4, 2f /* finish */ - sd t3, 16(t2) /* store pointer */ - addi t2, t2, HTIF_RING_ENTRY_SZ /* next entry */ - addi t6, t6, 1 /* counter */ - j 1b -2: - addi t3, t3, -HTIF_RING_ENTRY_SZ - sd t0, 16(t3) /* last -> first */ - - li t2, (HTIF_RING_SIZE) - add s0, t0, t2 - sd t0, 0(s0) /* cursor */ - sd t0, 8(s0) /* last */ - /* finish building ring */ -.endm - .globl kernbase .set kernbase, KERNBASE /* Trap entries */ .text -mentry: - /* Vectors */ - j _start /* reset */ - j bad_trap /* NMI (non-maskable interrupt) */ - j machine_trap - /* Reset vector */ .text .globl _start _start: - /* Setup machine trap vector */ - la t0, machine_trap - csrw mtvec, t0 - - /* Delegate interrupts to supervisor mode */ - li t0, (MIP_SSIP | MIP_STIP | MIP_SEIP) - csrw mideleg, t0 - - /* Delegate exceptions to supervisor mode */ - li t0, (1 << EXCP_MISALIGNED_FETCH) | \ - (1 << EXCP_FAULT_FETCH) | \ - (1 << EXCP_ILLEGAL_INSTRUCTION) | \ - (1 << EXCP_FAULT_LOAD) | \ - (1 << EXCP_FAULT_STORE) | \ - (1 << EXCP_BREAKPOINT) | \ - (1 << EXCP_USER_ECALL) - csrw medeleg, t0 - + /* Setup supervisor trap vector */ la t0, cpu_exception_handler - li t1, KERNBASE - add t0, t0, t1 csrw stvec, t0 - /* Direct secondary cores to mpentry */ - csrr a0, mhartid - bnez a0, mpentry - - li t1, 0 - la t0, tohost - sd t1, 0(t0) - la t0, fromhost - sd t1, 0(t0) - - /* Build event queue for current core */ - build_ring - - /* Setup machine-mode stack for CPU 0 */ - la t0, hardstack_end - csrw mscratch, t0 - + /* Ensure sscratch is zero */ li t0, 0 csrw sscratch, t0 - li s10, PAGE_SIZE - li s9, (PAGE_SIZE * KSTACK_PAGES) + /* Load physical memory information */ + li a0, 0 + la a1, memory_info + call sbi_query_memory - /* Page tables */ + /* Store base to s6 */ + la s6, memory_info + ld s6, 0(s6) /* s6 = physmem base */ + + /* Direct secondary cores to mpentry */ + call sbi_hart_id + bnez a0, mpentry + + /* + * Page tables + */ /* Create an L1 page for early devmap */ la s1, pagetable_l1 la s2, pagetable_l2_devmap /* Link to next level PN */ + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 srli s2, s2, PAGE_SHIFT li a5, (VM_MAX_KERNEL_ADDRESS - L2_SIZE) @@ -170,19 +99,74 @@ _start: add t0, s1, a5 sd t6, (t0) - /* Add single Level 1 entry for kernel */ + /* Create an L1 page for SBI */ + la s1, pagetable_l1 + la s2, pagetable_l2_sbi /* Link to next level PN */ + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 + srli s2, s2, PAGE_SHIFT + li a5, 511 + li t4, PTE_V + slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ + or t6, t4, t5 + + /* Store SBI L1 PTE entry to position */ + li a6, PTE_SIZE + mulw a5, a5, a6 + add t0, s1, a5 + sd t6, (t0) + + /* Create an L2 page for SBI */ + la s1, pagetable_l2_sbi + la s2, pagetable_l3_sbi /* Link to next level PN */ + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 + srli s2, s2, PAGE_SHIFT + li a5, 511 + li t4, PTE_V + slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ + or t6, t4, t5 + + /* Store SBI L2 PTE entry to position */ + li a6, PTE_SIZE + mulw a5, a5, a6 + add t0, s1, a5 + sd t6, (t0) + + /* Create an L3 page for SBI */ + la s1, pagetable_l3_sbi + li s2, 0x80009000 + srli s2, s2, PAGE_SHIFT + li a5, 511 + li t4, PTE_V | PTE_RX | PTE_W + slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ + or t6, t4, t5 + + /* Store SBI L3 PTE entry to position */ + li a6, PTE_SIZE + mulw a5, a5, a6 + add t0, s1, a5 + sd t6, (t0) + /* END SBI page creation */ + + /* Add L1 entry for kernel */ la s1, pagetable_l1 la s2, pagetable_l2 /* Link to next level PN */ + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 srli s2, s2, PAGE_SHIFT - li a5, (KERNBASE + KERNENTRY) + li a5, KERNBASE srli a5, a5, L1_SHIFT /* >> L1_SHIFT */ andi a5, a5, 0x1ff /* & 0x1ff */ li t4, PTE_V slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ or t6, t4, t5 - /* Store single level1 PTE entry to position */ + /* Store L1 PTE entry to position */ li a6, PTE_SIZE mulw a5, a5, a6 add t0, s1, a5 @@ -190,8 +174,7 @@ _start: /* Level 2 superpages (512 x 2MiB) */ la s1, pagetable_l2 - li t4, KERNENTRY - srli t4, t4, 21 /* Div by 2 MiB */ + srli t4, s6, 21 /* Div physmem base by 2 MiB */ li t2, 512 /* Build 512 entries */ add t3, t4, t2 li t5, 0 @@ -206,32 +189,13 @@ _start: bltu t4, t3, 2b /* Set page tables base register */ - la s1, pagetable_l1 - srli s1, s1, PAGE_SHIFT - csrw sptbr, s1 + la s2, pagetable_l1 + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 + srli s2, s2, PAGE_SHIFT + csrw sptbr, s2 - /* Page tables END */ - - /* Enter supervisor mode */ - li s0, ((MSTATUS_VM_SV39 << MSTATUS_VM_SHIFT) | \ - (MSTATUS_PRV_S << MSTATUS_MPP_SHIFT)); - csrw mstatus, s0 - - /* - * Enable machine-mode software interrupts - * so we can deliver IPI to this core. - */ - li t0, MIE_MSIE - csrs mie, t0 - - /* Exit from machine mode */ - la t0, .Lmmu_on - li s11, KERNBASE - add t0, t0, s11 - csrw mepc, t0 - mret - -.Lmmu_on: /* Initialize stack pointer */ la s3, initstack_end mv sp, s3 @@ -247,8 +211,10 @@ _start: /* Fill riscv_bootparams */ addi sp, sp, -16 + la t0, pagetable_l1 sd t0, 0(sp) /* kern_l1pt */ + la t0, initstack_end sd t0, 8(sp) /* kern_stack */ @@ -260,19 +226,6 @@ _start: initstack: .space (PAGE_SIZE * KSTACK_PAGES) initstack_end: -hardstack: - .space (HW_STACK_SIZE * MAXCPU) -hardstack_end: - - .globl htif_ring -htif_ring: - .space (HTIF_RING_SIZE + 16) -htif_lock: - .space (8) -tohost: - .space (8) -fromhost: - .space (8) ENTRY(sigcode) mv a0, sp @@ -305,6 +258,14 @@ pagetable_l2: .space PAGE_SIZE pagetable_l2_devmap: .space PAGE_SIZE +pagetable_l2_sbi: + .space PAGE_SIZE +pagetable_l3_sbi: + .space PAGE_SIZE + + .globl memory_info +memory_info: + .space (24) .globl init_pt_va init_pt_va: @@ -321,7 +282,6 @@ END(mpentry) * mpentry(unsigned long) * * Called by a core when it is being brought online. - * The data in x0 is passed straight to init_secondary. */ ENTRY(mpentry) /* @@ -340,44 +300,12 @@ ENTRY(mpentry) beqz t1, 1b /* Set page tables base register */ - la t0, pagetable_l1 - srli t0, t0, PAGE_SHIFT - csrw sptbr, t0 - - /* Configure mstatus */ - li s0, ((MSTATUS_VM_SV39 << MSTATUS_VM_SHIFT) | \ - (MSTATUS_PRV_S << MSTATUS_MPP_SHIFT)); - csrw mstatus, s0 - - /* Setup stack for machine mode exceptions */ - la t0, hardstack_end - li t1, HW_STACK_SIZE - mulw t1, t1, a0 - sub t0, t0, t1 - csrw mscratch, t0 - - li t0, 0 - csrw sscratch, t0 - - /* - * Enable machine-mode software interrupts - * so we can deliver IPI to this core. - */ - li t0, MIE_MSIE - csrs mie, t0 - - /* - * Exit from machine mode and go to - * the virtual address space. - */ - la t0, mp_virtdone - li s11, KERNBASE - add t0, t0, s11 - csrw mepc, t0 - mret - -mp_virtdone: - /* We are now in virtual address space */ + la s2, pagetable_l1 + li t0, KERNBASE + sub s2, s2, t0 + add s2, s2, s6 + srli s2, s2, PAGE_SHIFT + csrw sptbr, s2 /* Setup stack pointer */ la t0, secondary_stacks @@ -388,5 +316,3 @@ mp_virtdone: call init_secondary END(mpentry) #endif - -#include "exception.S" diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c index fb969f1e7702..7bc4402597de 100644 --- a/sys/riscv/riscv/machdep.c +++ b/sys/riscv/riscv/machdep.c @@ -80,6 +80,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -116,6 +117,7 @@ int64_t idcache_line_size; /* The minimum cache line size */ extern int *end; extern int *initstack_end; +extern memory_block_info memory_info; struct pcpu *pcpup; @@ -728,12 +730,9 @@ fake_preload_metadata(struct riscv_bootparams *rvbp __unused) void initriscv(struct riscv_bootparams *rvbp) { - struct mem_region mem_regions[FDT_MEM_REGIONS]; vm_offset_t lastaddr; - int mem_regions_sz; vm_size_t kernlen; caddr_t kmdp; - int i; /* Set the module data location */ lastaddr = fake_preload_metadata(rvbp); @@ -755,12 +754,20 @@ initriscv(struct riscv_bootparams *rvbp) /* Load the physical memory ranges */ physmap_idx = 0; +#if 0 + struct mem_region mem_regions[FDT_MEM_REGIONS]; + int mem_regions_sz; + int i; /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0) panic("Cannot get physical memory regions"); for (i = 0; i < mem_regions_sz; i++) add_physmap_entry(mem_regions[i].mr_start, mem_regions[i].mr_size, physmap, &physmap_idx); +#endif + + add_physmap_entry(memory_info.base, memory_info.size, + physmap, &physmap_idx); /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; @@ -778,14 +785,15 @@ initriscv(struct riscv_bootparams *rvbp) /* Bootstrap enough of pmap to enter the kernel proper */ kernlen = (lastaddr - KERNBASE); - pmap_bootstrap(rvbp->kern_l1pt, KERNENTRY, kernlen); + pmap_bootstrap(rvbp->kern_l1pt, memory_info.base, kernlen); cninit(); init_proc0(rvbp->kern_stack); /* set page table base register for thread0 */ - thread0.td_pcb->pcb_l1addr = (rvbp->kern_l1pt - KERNBASE); + thread0.td_pcb->pcb_l1addr = \ + (rvbp->kern_l1pt - KERNBASE + memory_info.base); msgbufinit(msgbufp, msgbufsize); mutex_init(); diff --git a/sys/riscv/riscv/mp_machdep.c b/sys/riscv/riscv/mp_machdep.c index 9152e700e36d..33353ac2a3c8 100644 --- a/sys/riscv/riscv/mp_machdep.c +++ b/sys/riscv/riscv/mp_machdep.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #ifdef VFP #include #endif @@ -273,14 +274,7 @@ ipi_handler(void *arg) u_int cpu, ipi; int bit; - /* - * We have shared interrupt line for both IPI and HTIF, - * so we don't really need to clear pending bit here - * as it will be cleared later in htif_intr. - * But lets assume HTIF is optional part, so do clear - * pending bit if there is no new entires in htif_ring. - */ - machine_command(ECALL_CLEAR_IPI, 0); + sbi_clear_ipi(); cpu = PCPU_GET(cpuid); @@ -382,12 +376,10 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) /* We are already running on cpu 0 */ if (id == 0) { - pcpup->pc_reg = target_cpu; return (1); } pcpu_init(pcpup, id, sizeof(struct pcpu)); - pcpup->pc_reg = target_cpu; dpcpu[id - 1] = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 73bc5c89687d..f09fc857a897 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -220,6 +220,14 @@ vm_offset_t kernel_vm_end = 0; struct msgbuf *msgbufp = NULL; +vm_paddr_t dmap_phys_base; /* The start of the dmap region */ +vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ +vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ + +/* This code assumes all L1 DMAP entries will be used */ +CTASSERT((DMAP_MIN_ADDRESS & ~L1_OFFSET) == DMAP_MIN_ADDRESS); +CTASSERT((DMAP_MAX_ADDRESS & ~L1_OFFSET) == DMAP_MAX_ADDRESS); + static struct rwlock_padalign pvh_global_lock; /* @@ -458,6 +466,10 @@ pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); + /* Check locore has used L2 superpages */ + KASSERT((l2[l2_slot] & PTE_RX) != 0, + ("Invalid bootstrap L2 table")); + /* L2 is superpages */ ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT; ret += (va & L2_OFFSET); @@ -466,7 +478,7 @@ pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) } static void -pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) +pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) { vm_offset_t va; vm_paddr_t pa; @@ -475,19 +487,12 @@ pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) pt_entry_t entry; pn_t pn; - /* - * Initialize DMAP starting from zero physical address. - * TODO: remove this once machine-mode code splitted out. - */ - kernstart = 0; - printf("%s: l1pt 0x%016lx kernstart 0x%016lx\n", __func__, l1pt, kernstart); - - pa = kernstart & ~L1_OFFSET; + pa = dmap_phys_base = min_pa & ~L1_OFFSET; va = DMAP_MIN_ADDRESS; - l1 = (pd_entry_t *)l1pt; + l1 = (pd_entry_t *)kern_l1; l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); - for (; va < DMAP_MAX_ADDRESS; + for (; va < DMAP_MAX_ADDRESS && pa < max_pa; pa += L1_SIZE, va += L1_SIZE, l1_slot++) { KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); @@ -498,6 +503,10 @@ pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) pmap_load_store(&l1[l1_slot], entry); } + /* Set the upper limit of the DMAP region */ + dmap_phys_max = pa; + dmap_max_addr = va; + cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); cpu_tlb_flushID(); } @@ -552,7 +561,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) pt_entry_t *l2; vm_offset_t va, freemempos; vm_offset_t dpcpu, msgbufpv; - vm_paddr_t pa, min_pa; + vm_paddr_t pa, min_pa, max_pa; int i; kern_delta = KERNBASE - kernstart; @@ -574,7 +583,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) LIST_INIT(&allpmaps); /* Assume the address we were loaded to is a valid physical address */ - min_pa = KERNBASE - kern_delta; + min_pa = max_pa = KERNBASE - kern_delta; /* * Find the minimum physical address. physmap is sorted, @@ -585,11 +594,13 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) continue; if (physmap[i] <= min_pa) min_pa = physmap[i]; + if (physmap[i + 1] > max_pa) + max_pa = physmap[i + 1]; break; } /* Create a direct map region early so we can use it for pa -> va */ - pmap_bootstrap_dmap(l1pt, min_pa); + pmap_bootstrap_dmap(l1pt, min_pa, max_pa); va = KERNBASE; pa = KERNBASE - kern_delta; diff --git a/sys/riscv/htif/htif_console.c b/sys/riscv/riscv/riscv_console.c similarity index 71% rename from sys/riscv/htif/htif_console.c rename to sys/riscv/riscv/riscv_console.c index 7eca398b799b..d59ed59c18ae 100644 --- a/sys/riscv/htif/htif_console.c +++ b/sys/riscv/riscv/riscv_console.c @@ -47,16 +47,41 @@ __FBSDID("$FreeBSD$"); #include #include #include - -#include -#include - -#include "htif.h" +#include #include - #include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static struct resource_spec rcons_spec[] = { + { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE}, + { -1, 0 } +}; + +/* bus softc */ +struct rcons_softc { + struct resource *res[1]; + void *ihl[1]; + device_t dev; +}; + +/* CN Console interface */ + static tsw_outwakeup_t riscvtty_outwakeup; static struct ttydevsw riscv_ttydevsw = { @@ -86,8 +111,6 @@ CONSOLE_DRIVER(riscv); #define MAX_BURST_LEN 1 #define QUEUE_SIZE 256 -#define CONSOLE_DEFAULT_ID 1ul -#define SPIN_IN_MACHINE_MODE 1 struct queue_entry { uint64_t data; @@ -102,13 +125,8 @@ struct queue_entry *entry_served; static void riscv_putc(int c) { - uint64_t cmd; - cmd = (HTIF_CMD_WRITE << HTIF_CMD_SHIFT); - cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT); - cmd |= c; - - machine_command(ECALL_HTIF_CMD, cmd); + sbi_console_putchar(c); } #ifdef EARLY_PRINTF @@ -215,39 +233,23 @@ riscv_cnungrab(struct consdev *cp) static int riscv_cngetc(struct consdev *cp) { -#if defined(KDB) - uint64_t devcmd; - uint64_t entry; - uint64_t devid; -#endif - uint64_t cmd; uint8_t data; int ch; - cmd = (HTIF_CMD_READ << HTIF_CMD_SHIFT); - cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT); - - machine_command(ECALL_HTIF_CMD_REQ, cmd); - #if defined(KDB) + /* + * RISCVTODO: BBL polls for console data on timer interrupt, + * but interrupts are turned off in KDB. + * So we currently do not have console in KDB. + */ if (kdb_active) { + ch = sbi_console_getchar(); + while (ch) { + entry_last->data = ch; + entry_last->used = 1; + entry_last = entry_last->next; - entry = machine_command(ECALL_HTIF_CMD_RESP, 0); - while (entry) { - devid = HTIF_DEV_ID(entry); - devcmd = HTIF_DEV_CMD(entry); - data = HTIF_DEV_DATA(entry); - - if (devid == CONSOLE_DEFAULT_ID && devcmd == 0) { - entry_last->data = data; - entry_last->used = 1; - entry_last = entry_last->next; - } else { - printf("Lost interrupt: devid %d\n", - devid); - } - - entry = machine_command(ECALL_HTIF_CMD_RESP, 0); + ch = sbi_console_getchar(); } } #endif @@ -275,75 +277,83 @@ riscv_cnputc(struct consdev *cp, int c) riscv_putc(c); } -/* - * Bus interface. - */ +/* Bus interface */ -struct htif_console_softc { - device_t dev; - int running; - int intr_chan; - int cmd_done; - int curtag; - int index; -}; - -static void -htif_console_intr(void *arg, uint64_t entry) +static int +rcons_intr(void *arg) { - struct htif_console_softc *sc; - uint8_t devcmd; - uint64_t data; + int c; - sc = arg; - - devcmd = HTIF_DEV_CMD(entry); - data = HTIF_DEV_DATA(entry); - - if (devcmd == 0) { - entry_last->data = data; + c = sbi_console_getchar(); + if (c > 0 && c < 0xff) { + entry_last->data = c; entry_last->used = 1; entry_last = entry_last->next; } + + csr_clear(sip, SIP_SSIP); + + return (FILTER_HANDLED); } static int -htif_console_probe(device_t dev) +rcons_probe(device_t dev) { - return (0); + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_is_compatible(dev, "riscv,console")) + return (ENXIO); + + device_set_desc(dev, "RISC-V console"); + return (BUS_PROBE_DEFAULT); } static int -htif_console_attach(device_t dev) +rcons_attach(device_t dev) { - struct htif_console_softc *sc; + struct rcons_softc *sc; + int error; sc = device_get_softc(dev); sc->dev = dev; - sc->index = htif_get_index(dev); - if (sc->index < 0) - return (EINVAL); + if (bus_alloc_resources(dev, rcons_spec, sc->res)) { + device_printf(dev, "could not allocate resources\n"); + return (ENXIO); + } - htif_setup_intr(sc->index, htif_console_intr, sc); + /* Setup IRQs handler */ + error = bus_setup_intr(dev, sc->res[0], INTR_TYPE_CLK, + rcons_intr, NULL, sc, &sc->ihl[0]); + if (error) { + device_printf(dev, "Unable to alloc int resource.\n"); + return (ENXIO); + } + + csr_set(sie, SIE_SSIE); + + bus_generic_attach(sc->dev); + + sbi_console_getchar(); return (0); } -static device_method_t htif_console_methods[] = { - DEVMETHOD(device_probe, htif_console_probe), - DEVMETHOD(device_attach, htif_console_attach), +static device_method_t rcons_methods[] = { + DEVMETHOD(device_probe, rcons_probe), + DEVMETHOD(device_attach, rcons_attach), + DEVMETHOD_END }; -static driver_t htif_console_driver = { - "htif_console", - htif_console_methods, - sizeof(struct htif_console_softc) +static driver_t rcons_driver = { + "rcons", + rcons_methods, + sizeof(struct rcons_softc) }; -static devclass_t htif_console_devclass; +static devclass_t rcons_devclass; -DRIVER_MODULE(htif_console, htif, htif_console_driver, - htif_console_devclass, 0, 0); +DRIVER_MODULE(rcons, simplebus, rcons_driver, rcons_devclass, 0, 0); diff --git a/sys/riscv/riscv/sbi.S b/sys/riscv/riscv/sbi.S new file mode 100644 index 000000000000..b5b2916e0f4f --- /dev/null +++ b/sys/riscv/riscv/sbi.S @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2016 Ruslan Bukin + * All rights reserved. + * + * Portions of this software were developed by SRI International and the + * University of Cambridge Computer Laboratory under DARPA/AFRL contract + * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Portions of this software were developed by the University of Cambridge + * Computer Laboratory as part of the CTSRD Project, with support from the + * UK Higher Education Innovation Fund (HEIF). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +.globl sbi_hart_id; sbi_hart_id = -2048 +.globl sbi_num_harts; sbi_num_harts = -2032 +.globl sbi_query_memory; sbi_query_memory = -2016 +.globl sbi_console_putchar; sbi_console_putchar = -2000 +.globl sbi_console_getchar; sbi_console_getchar = -1984 +.globl sbi_send_ipi; sbi_send_ipi = -1952 +.globl sbi_clear_ipi; sbi_clear_ipi = -1936 +.globl sbi_timebase; sbi_timebase = -1920 +.globl sbi_shutdown; sbi_shutdown = -1904 +.globl sbi_set_timer; sbi_set_timer = -1888 +.globl sbi_mask_interrupt; sbi_mask_interrupt = -1872 +.globl sbi_unmask_interrupt; sbi_unmask_interrupt = -1856 +.globl sbi_remote_sfence_vm; sbi_remote_sfence_vm = -1840 +.globl sbi_remote_sfence_vm_range; sbi_remote_sfence_vm_range = -1824 +.globl sbi_remote_fence_i; sbi_remote_fence_i = -1808 diff --git a/sys/riscv/riscv/timer.c b/sys/riscv/riscv/timer.c index ae8ec67b6e90..0462e62b1aac 100644 --- a/sys/riscv/riscv/timer.c +++ b/sys/riscv/riscv/timer.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -135,7 +136,7 @@ riscv_tmr_start(struct eventtimer *et, sbintime_t first, sbintime_t period) cpu = PCPU_GET(cpuid); WRITE8(sc, TIMER_MTIMECMP(cpu), counts); csr_set(sie, SIE_STIE); - machine_command(ECALL_MTIMECMP, counts); + sbi_set_timer(counts); return (0); } diff --git a/sys/riscv/riscv/vm_machdep.c b/sys/riscv/riscv/vm_machdep.c index 1f6613077142..d2d4c6e3d3ec 100644 --- a/sys/riscv/riscv/vm_machdep.c +++ b/sys/riscv/riscv/vm_machdep.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include /* * Finish a fork operation, with process p2 nearly set up. @@ -106,9 +107,9 @@ void cpu_reset(void) { - printf("cpu_reset"); - while(1) - __asm volatile("wfi" ::: "memory"); + sbi_shutdown(); + + while(1); } void diff --git a/sys/sys/_task.h b/sys/sys/_task.h index ce8978199230..d3be7198dc2f 100644 --- a/sys/sys/_task.h +++ b/sys/sys/_task.h @@ -42,6 +42,7 @@ * (q) taskqueue lock */ typedef void task_fn_t(void *context, int pending); +typedef void gtask_fn_t(void *context); struct task { STAILQ_ENTRY(task) ta_link; /* (q) link for queue */ @@ -51,8 +52,16 @@ struct task { void *ta_context; /* (c) argument for handler */ }; +struct gtask { + STAILQ_ENTRY(gtask) ta_link; /* (q) link for queue */ + uint16_t ta_flags; /* (q) state flags */ + u_short ta_priority; /* (c) Priority */ + gtask_fn_t *ta_func; /* (c) task handler */ + void *ta_context; /* (c) argument for handler */ +}; + struct grouptask { - struct task gt_task; + struct gtask gt_task; void *gt_taskqueue; LIST_ENTRY(grouptask) gt_list; void *gt_uniq; diff --git a/sys/sys/buf.h b/sys/sys/buf.h index d287fe068da4..c485e384ed64 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -139,7 +139,6 @@ struct buf { void *b_fsprivate1; void *b_fsprivate2; void *b_fsprivate3; - int b_pin_count; }; #define b_object b_bufobj->bo_object diff --git a/sys/sys/gtaskqueue.h b/sys/sys/gtaskqueue.h new file mode 100644 index 000000000000..88d4b54cdbbf --- /dev/null +++ b/sys/sys/gtaskqueue.h @@ -0,0 +1,125 @@ +/*- + * Copyright (c) 2014 Jeffrey Roberson + * Copyright (c) 2016 Matthew Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_GTASKQUEUE_H_ +#define _SYS_GTASKQUEUE_H_ +#include + +#ifndef _KERNEL +#error "no user-servicable parts inside" +#endif + +struct gtaskqueue; +typedef void (*gtaskqueue_enqueue_fn)(void *context); + +/* + * Taskqueue groups. Manages dynamic thread groups and irq binding for + * device and other tasks. + */ + +void gtaskqueue_block(struct gtaskqueue *queue); +void gtaskqueue_unblock(struct gtaskqueue *queue); + +int gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask); +void gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *task); +void gtaskqueue_drain_all(struct gtaskqueue *queue); + +int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *task); +void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *grptask, + void *uniq, int irq, char *name); +int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask, + void *uniq, int cpu, int irq, char *name); +void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask); +struct taskqgroup *taskqgroup_create(char *name); +void taskqgroup_destroy(struct taskqgroup *qgroup); +int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride); + +#define TASK_ENQUEUED 0x1 +#define TASK_SKIP_WAKEUP 0x2 + + +#define GTASK_INIT(task, flags, priority, func, context) do { \ + (task)->ta_flags = flags; \ + (task)->ta_priority = (priority); \ + (task)->ta_func = (func); \ + (task)->ta_context = (context); \ +} while (0) + +#define GROUPTASK_INIT(gtask, priority, func, context) \ + GTASK_INIT(&(gtask)->gt_task, TASK_SKIP_WAKEUP, priority, func, context) + +#define GROUPTASK_ENQUEUE(gtask) \ + grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task) + +#define TASKQGROUP_DECLARE(name) \ +extern struct taskqgroup *qgroup_##name + + +#ifdef EARLY_AP_STARTUP +#define TASKQGROUP_DEFINE(name, cnt, stride) \ + \ +struct taskqgroup *qgroup_##name; \ + \ +static void \ +taskqgroup_define_##name(void *arg) \ +{ \ + qgroup_##name = taskqgroup_create(#name); \ + taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ +} \ + \ +SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \ + taskqgroup_define_##name, NULL) +#else +#define TASKQGROUP_DEFINE(name, cnt, stride) \ + \ +struct taskqgroup *qgroup_##name; \ + \ +static void \ +taskqgroup_define_##name(void *arg) \ +{ \ + qgroup_##name = taskqgroup_create(#name); \ +} \ + \ +SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \ + taskqgroup_define_##name, NULL); \ + \ +static void \ +taskqgroup_adjust_##name(void *arg) \ +{ \ + taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ +} \ + \ +SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ + taskqgroup_adjust_##name, NULL); \ + \ +struct __hack +#endif +TASKQGROUP_DECLARE(net); + +#endif /* !_SYS_GTASKQUEUE_H_ */ diff --git a/sys/sys/taskqueue.h b/sys/sys/taskqueue.h index c986ffb4171f..a6c6655832ec 100644 --- a/sys/sys/taskqueue.h +++ b/sys/sys/taskqueue.h @@ -204,78 +204,4 @@ struct taskqueue *taskqueue_create_fast(const char *name, int mflags, taskqueue_enqueue_fn enqueue, void *context); -/* - * Taskqueue groups. Manages dynamic thread groups and irq binding for - * device and other tasks. - */ -int grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task); -void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, - void *uniq, int irq, char *name); -int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, - void *uniq, int cpu, int irq, char *name); -void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask); -struct taskqgroup *taskqgroup_create(char *name); -void taskqgroup_destroy(struct taskqgroup *qgroup); -int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride); - -#define TASK_SKIP_WAKEUP 0x1 - -#define GTASK_INIT(task, priority, func, context) do { \ - (task)->ta_pending = 0; \ - (task)->ta_priority = (priority); \ - (task)->ta_func = (func); \ - (task)->ta_context = (context); \ -} while (0) - -#define GROUPTASK_INIT(gtask, priority, func, context) \ - GTASK_INIT(&(gtask)->gt_task, priority, func, context) - -#define GROUPTASK_ENQUEUE(gtask) \ - grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task) - -#define TASKQGROUP_DECLARE(name) \ -extern struct taskqgroup *qgroup_##name - -#ifdef EARLY_AP_STARTUP -#define TASKQGROUP_DEFINE(name, cnt, stride) \ - \ -struct taskqgroup *qgroup_##name; \ - \ -static void \ -taskqgroup_define_##name(void *arg) \ -{ \ - qgroup_##name = taskqgroup_create(#name); \ - taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ -} \ - \ -SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \ - taskqgroup_define_##name, NULL) -#else -#define TASKQGROUP_DEFINE(name, cnt, stride) \ - \ -struct taskqgroup *qgroup_##name; \ - \ -static void \ -taskqgroup_define_##name(void *arg) \ -{ \ - qgroup_##name = taskqgroup_create(#name); \ -} \ - \ -SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \ - taskqgroup_define_##name, NULL); \ - \ -static void \ -taskqgroup_adjust_##name(void *arg) \ -{ \ - taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ -} \ - \ -SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ - taskqgroup_adjust_##name, NULL); \ - \ -struct __hack -#endif - -TASKQGROUP_DECLARE(net); - #endif /* !_SYS_TASKQUEUE_H_ */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index eb0672eb3009..02707117d21b 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -618,8 +618,6 @@ u_quad_t init_va_filerev(void); int speedup_syncer(void); int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen); -#define textvp_fullpath(p, rb, rfb) \ - vn_fullpath(FIRST_THREAD_IN_PROC(p), (p)->p_textvp, rb, rfb) int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf); int vn_fullpath_global(struct thread *td, struct vnode *vn, @@ -655,7 +653,6 @@ int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize); void vunref(struct vnode *); void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); -#define vprint(label, vp) vn_printf((vp), "%s\n", (label)) int vrecycle(struct vnode *vp); int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); diff --git a/sys/tools/embed_mfs.sh b/sys/tools/embed_mfs.sh index 3f20257b7a58..66c50c2da111 100644 --- a/sys/tools/embed_mfs.sh +++ b/sys/tools/embed_mfs.sh @@ -36,7 +36,7 @@ mfs_size=`stat -f '%z' $2 2> /dev/null` # If we can't determine MFS image size - bail. [ -z ${mfs_size} ] && echo "Can't determine MFS image size" && exit 1 -sec_info=`objdump -h $1 2> /dev/null | grep " oldmfs "` +sec_info=`${CROSS_BINUTILS_PREFIX}objdump -h $1 2> /dev/null | grep " oldmfs "` # If we can't find the mfs section within the given kernel - bail. [ -z "${sec_info}" ] && echo "Can't locate mfs section within kernel" && exit 1 diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index 59ae322686d8..5ef439f6a1a8 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -558,7 +558,7 @@ ffs_snapshot(mp, snapfile) } VI_UNLOCK(xvp); if (snapdebug) - vprint("ffs_snapshot: busy vnode", xvp); + vn_printf(xvp, "ffs_snapshot: busy vnode "); if (VOP_GETATTR(xvp, &vat, td->td_ucred) == 0 && vat.va_nlink > 0) { VOP_UNLOCK(xvp, 0); diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 5a70e5c1da15..69182850ffe2 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -342,7 +342,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags) goto loop; #ifdef INVARIANTS if (!vn_isdisk(vp, NULL)) - vprint("ffs_fsync: dirty", vp); + vn_printf(vp, "ffs_fsync: dirty "); #endif } BO_UNLOCK(bo); diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index 29d96a03ad64..dcff4fbe3d7a 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -1136,7 +1136,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename) error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_NORMAL | (DOINGASYNC(dvp) ? 0 : IO_SYNC), cr); if (error != 0) - vprint("ufs_direnter: failed to truncate", dvp); + vn_printf(dvp, "ufs_direnter: failed to truncate "); #ifdef UFS_DIRHASH if (error == 0 && dp->i_dirhash != NULL) ufsdirhash_dirtrunc(dp, dp->i_endoff); diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index 4fbb8a18032f..c236706f3346 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -469,7 +469,7 @@ chkdquot(struct inode *ip) continue; if (ip->i_dquot[i] == NODQUOT) { UFS_UNLOCK(ump); - vprint("chkdquot: missing dquot", ITOV(ip)); + vn_printf(ITOV(ip), "chkdquot: missing dquot "); panic("chkdquot: missing dquot"); } } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 297b1171423a..17723c7308f0 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -471,7 +471,7 @@ vm_object_vndeallocate(vm_object_t object) KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); #ifdef INVARIANTS if (object->ref_count == 0) { - vprint("vm_object_vndeallocate", vp); + vn_printf(vp, "vm_object_vndeallocate "); panic("vm_object_vndeallocate: bad object reference count"); } #endif diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 2bdecccaf8ec..02ded8b37c28 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -1188,15 +1188,13 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) /* * Scan the active queue for pages that can be deactivated. Update * the per-page activity counter and use it to identify deactivation - * candidates. + * candidates. Held pages may be deactivated. */ for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned < min_scan || (page_shortage > 0 && scanned < maxscan)); m = next, scanned++) { - KASSERT(m->queue == PQ_ACTIVE, ("vm_pageout_scan: page %p isn't active", m)); - next = TAILQ_NEXT(m, plinks.q); if ((m->flags & PG_MARKER) != 0) continue; @@ -1210,8 +1208,8 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) } /* - * The count for pagedaemon pages is done after checking the - * page for eligibility... + * The count for page daemon pages is updated after checking + * the page for eligibility. */ PCPU_INC(cnt.v_pdpages); @@ -1225,12 +1223,17 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) act_delta = 0; /* - * Unlocked object ref count check. Two races are possible. - * 1) The ref was transitioning to zero and we saw non-zero, - * the pmap bits will be checked unnecessarily. - * 2) The ref was transitioning to one and we saw zero. - * The page lock prevents a new reference to this page so - * we need not check the reference bits. + * Perform an unsynchronized object ref count check. While + * the page lock ensures that the page is not reallocated to + * another object, in particular, one with unmanaged mappings + * that cannot support pmap_ts_referenced(), two races are, + * nonetheless, possible: + * 1) The count was transitioning to zero, but we saw a non- + * zero value. pmap_ts_referenced() will return zero + * because the page is not mapped. + * 2) The count was transitioning to one, but we saw zero. + * This race delays the detection of a new reference. At + * worst, we will deactivate and reactivate the page. */ if (m->object->ref_count != 0) act_delta += pmap_ts_referenced(m); diff --git a/tools/tools/ath/ath_ee_9300_print/main.c b/tools/tools/ath/ath_ee_9300_print/main.c index acf646e38644..df278e18f888 100644 --- a/tools/tools/ath/ath_ee_9300_print/main.c +++ b/tools/tools/ath/ath_ee_9300_print/main.c @@ -72,8 +72,10 @@ eeprom_9300_base_print(const uint16_t *buf) ee_base->device_cap, ee_base->device_type); - printf("| pwrTableOffset: %d dB, feature_enable: 0x%02x MiscConfig: 0x%02x |\n", + printf("| pwrTableOffset: %d dB, TuningCaps=0x%02x 0x%02x feature_enable: 0x%02x MiscConfig: 0x%02x |\n", ee_base->pwrTableOffset, + ee_base->params_for_tuning_caps[0], + ee_base->params_for_tuning_caps[1], ee_base->feature_enable, ee_base->misc_configuration); diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index 8cfe5403388c..776909a13a37 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -105,8 +105,6 @@ void ktrgenio(struct ktr_genio *, int); void ktrpsig(struct ktr_psig *); void ktrcsw(struct ktr_csw *); void ktrcsw_old(struct ktr_csw_old *); -void ktruser_malloc(void *); -void ktruser_rtld(int, void *); void ktruser(int, void *); void ktrcaprights(cap_rights_t *); void ktritimerval(struct itimerval *it); diff --git a/usr.bin/kdump/linux32_syscalls.conf b/usr.bin/kdump/linux32_syscalls.conf deleted file mode 100644 index 66a67fd8e2f0..000000000000 --- a/usr.bin/kdump/linux32_syscalls.conf +++ /dev/null @@ -1,11 +0,0 @@ -# $FreeBSD$ -sysnames="linux32_syscalls.c" -sysproto="/dev/null" -sysproto_h=_LINUX32_SYSPROTO_H_ -syshdr="/dev/null" -syssw="/dev/null" -sysmk="/dev/null" -syscallprefix="LINUX32_SYS_" -switchname="/dev/null" -namesname="linux32_syscallnames" -systrace="/dev/null" diff --git a/usr.bin/kdump/linux_syscalls.conf b/usr.bin/kdump/linux_syscalls.conf deleted file mode 100644 index 82adb560c131..000000000000 --- a/usr.bin/kdump/linux_syscalls.conf +++ /dev/null @@ -1,11 +0,0 @@ -# $FreeBSD$ -sysnames="linux_syscalls.c" -sysproto="/dev/null" -sysproto_h=_LINUX_SYSPROTO_H_ -syshdr="/dev/null" -syssw="/dev/null" -sysmk="/dev/null" -syscallprefix="LINUX_SYS_" -switchname="/dev/null" -namesname="linux_syscallnames" -systrace="/dev/null" diff --git a/usr.bin/truss/makesyscallsconf.sh b/usr.bin/truss/makesyscallsconf.sh deleted file mode 100755 index 81eb78ea9f09..000000000000 --- a/usr.bin/truss/makesyscallsconf.sh +++ /dev/null @@ -1,21 +0,0 @@ -#! /bin/sh -# $FreeBSD$ - -ABI="$1" -CONF="$2" - -header="${ABI}_syscalls.h" - -cat > "${CONF}" << EOF -sysnames="${header}.tmp" -sysproto="/dev/null" -sysproto_h="/dev/null" -syshdr="/dev/null" -sysmk="/dev/null" -syssw="/dev/null" -syshide="/dev/null" -syscallprefix="SYS_" -switchname="sysent" -namesname="syscallnames" -systrace="/dev/null" -EOF diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index 3bf7b9d69ede..7046b56049c9 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -281,6 +281,8 @@ static struct syscall decoded_syscalls[] = { .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 } } }, { .name = "rfork", .ret_type = 1, .nargs = 1, .args = { { Rforkflags, 0 } } }, + { .name = "rmdir", .ret_type = 1, .nargs = 1, + .args = { { Name, 0 } } }, { .name = "select", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Fd_set, 1 }, { Fd_set, 2 }, { Fd_set, 3 }, { Timeval, 4 } } }, diff --git a/usr.bin/xinstall/xinstall.c b/usr.bin/xinstall/xinstall.c index f8b4568cd542..44ab194f9fbb 100644 --- a/usr.bin/xinstall/xinstall.c +++ b/usr.bin/xinstall/xinstall.c @@ -149,6 +149,7 @@ main(int argc, char *argv[]) char *p; const char *to_name; + fset = 0; iflags = 0; group = owner = NULL; while ((ch = getopt(argc, argv, "B:bCcD:df:g:h:l:M:m:N:o:pSsT:Uv")) != @@ -533,7 +534,9 @@ do_link(const char *from_name, const char *to_name, if (target_sb->st_flags & NOCHANGEBITS) (void)chflags(to_name, target_sb->st_flags & ~NOCHANGEBITS); - unlink(to_name); + if (verbose) + printf("install: link %s -> %s\n", + from_name, to_name); ret = rename(tmpl, to_name); /* * If rename has posix semantics, then the temporary @@ -543,8 +546,12 @@ do_link(const char *from_name, const char *to_name, (void)unlink(tmpl); } return (ret); - } else + } else { + if (verbose) + printf("install: link %s -> %s\n", + from_name, to_name); return (link(from_name, to_name)); + } } /* @@ -573,14 +580,18 @@ do_symlink(const char *from_name, const char *to_name, if (target_sb->st_flags & NOCHANGEBITS) (void)chflags(to_name, target_sb->st_flags & ~NOCHANGEBITS); - unlink(to_name); - + if (verbose) + printf("install: symlink %s -> %s\n", + from_name, to_name); if (rename(tmpl, to_name) == -1) { /* Remove temporary link before exiting. */ (void)unlink(tmpl); err(EX_OSERR, "%s: rename", to_name); } } else { + if (verbose) + printf("install: symlink %s -> %s\n", + from_name, to_name); if (symlink(from_name, to_name) == -1) err(EX_OSERR, "symlink %s -> %s", from_name, to_name); } @@ -749,8 +760,9 @@ install(const char *from_name, const char *to_name, u_long fset, u_int flags) } /* Build the target path. */ if (flags & DIRECTORY) { - (void)snprintf(pathbuf, sizeof(pathbuf), "%s/%s", + (void)snprintf(pathbuf, sizeof(pathbuf), "%s%s%s", to_name, + to_name[strlen(to_name) - 1] == '/' ? "" : "/", (p = strrchr(from_name, '/')) ? ++p : from_name); to_name = pathbuf; } @@ -891,11 +903,21 @@ install(const char *from_name, const char *to_name, u_long fset, u_int flags) } if (verbose) (void)printf("install: %s -> %s\n", to_name, backup); - if (rename(to_name, backup) < 0) { + if (unlink(backup) < 0 && errno != ENOENT) { serrno = errno; + if (to_sb.st_flags & NOCHANGEBITS) + (void)chflags(to_name, to_sb.st_flags); unlink(tempfile); errno = serrno; - err(EX_OSERR, "rename: %s to %s", to_name, + err(EX_OSERR, "unlink: %s", backup); + } + if (link(to_name, backup) < 0) { + serrno = errno; + unlink(tempfile); + if (to_sb.st_flags & NOCHANGEBITS) + (void)chflags(to_name, to_sb.st_flags); + errno = serrno; + err(EX_OSERR, "link: %s to %s", to_name, backup); } } @@ -1116,16 +1138,26 @@ create_newfile(const char *path, int target, struct stat *sbp) if (dobackup) { if ((size_t)snprintf(backup, MAXPATHLEN, "%s%s", - path, suffix) != strlen(path) + strlen(suffix)) + path, suffix) != strlen(path) + strlen(suffix)) { + saved_errno = errno; + if (sbp->st_flags & NOCHANGEBITS) + (void)chflags(path, sbp->st_flags); + errno = saved_errno; errx(EX_OSERR, "%s: backup filename too long", path); + } (void)snprintf(backup, MAXPATHLEN, "%s%s", path, suffix); if (verbose) (void)printf("install: %s -> %s\n", path, backup); - if (rename(path, backup) < 0) + if (rename(path, backup) < 0) { + saved_errno = errno; + if (sbp->st_flags & NOCHANGEBITS) + (void)chflags(path, sbp->st_flags); + errno = saved_errno; err(EX_OSERR, "rename: %s to %s", path, backup); + } } else if (unlink(path) < 0) saved_errno = errno; diff --git a/usr.sbin/acpi/acpidump/acpi.c b/usr.sbin/acpi/acpidump/acpi.c index e790ef118b74..6d3f31a1938c 100644 --- a/usr.sbin/acpi/acpidump/acpi.c +++ b/usr.sbin/acpi/acpidump/acpi.c @@ -1494,8 +1494,8 @@ aml_disassemble(ACPI_TABLE_HEADER *rsdt, ACPI_TABLE_HEADER *dsdp) perror("mkdtemp tmp working dir"); return; } - assert((size_t)snprintf(tmpstr, sizeof(tmpstr), "%s%s", wrkdir, iname) - <= sizeof(tmpstr) - 1); + len = (size_t)snprintf(tmpstr, sizeof(tmpstr), "%s%s", wrkdir, iname); + assert(len <= sizeof(tmpstr) - 1); fd = open(tmpstr, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); if (fd < 0) { perror("iasl tmp file"); @@ -1527,8 +1527,8 @@ aml_disassemble(ACPI_TABLE_HEADER *rsdt, ACPI_TABLE_HEADER *dsdp) } /* Dump iasl's output to stdout */ - assert((size_t)snprintf(tmpstr, sizeof(tmpstr), "%s%s", wrkdir, oname) - <= sizeof(tmpstr) -1); + len = (size_t)snprintf(tmpstr, sizeof(tmpstr), "%s%s", wrkdir, oname); + assert(len <= sizeof(tmpstr) - 1); fp = fopen(tmpstr, "r"); if (unlink(tmpstr) < 0) { perror("unlink");