diff --git a/Makefile.inc1 b/Makefile.inc1 index 77b8762b8b2d..e6b26101a585 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -1263,7 +1263,7 @@ reinstallkernel reinstallkernel.debug: _installcheck_kernel ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//} .endif -.if ${BUILDKERNELS:[#]} > 1 +.if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS) .for _kernel in ${BUILDKERNELS:[2..-1]} @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${_kernel}" @@ -1294,7 +1294,7 @@ distributekernel distributekernel.debug: ${DESTDIR}/${DISTDIR}/kernel.meta .endif .endif -.if ${BUILDKERNELS:[#]} > 1 +.if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS) .for _kernel in ${BUILDKERNELS:[2..-1]} .if defined(NO_ROOT) echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta @@ -1325,7 +1325,7 @@ packagekernel: tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz -.if ${BUILDKERNELS:[#]} > 1 +.if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS) .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' \ @@ -1346,7 +1346,7 @@ packagekernel: cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz -.if ${BUILDKERNELS:[#]} > 1 +.if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS) .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' . | \ diff --git a/contrib/llvm/projects/libunwind/src/AddressSpace.hpp b/contrib/llvm/projects/libunwind/src/AddressSpace.hpp index 73013c73ff71..55828df2856d 100644 --- a/contrib/llvm/projects/libunwind/src/AddressSpace.hpp +++ b/contrib/llvm/projects/libunwind/src/AddressSpace.hpp @@ -37,6 +37,7 @@ namespace libunwind { #if _LIBUNWIND_ARM_EHABI #if defined(__FreeBSD__) +#include typedef void *_Unwind_Ptr; #elif defined(__linux__) diff --git a/etc/mtree/BSD.tests.dist b/etc/mtree/BSD.tests.dist index ad764021857c..ff3232417cf7 100644 --- a/etc/mtree/BSD.tests.dist +++ b/etc/mtree/BSD.tests.dist @@ -622,6 +622,8 @@ .. pw .. + rpcbind + .. sa .. .. diff --git a/etc/rc b/etc/rc index 2c90f385b88b..576ddf937af4 100644 --- a/etc/rc +++ b/etc/rc @@ -130,11 +130,17 @@ for _rc_elem in ${files}; do done # Remove the firstboot sentinel, and reboot if it was requested. +# Be a bit paranoid about removing it to handle the common failure +# modes since the consequence of failure can be big. +# Note: this assumes firstboot_sentinel is on / when we have +# a read-only /, or that it is on media that's writable. if [ -e ${firstboot_sentinel} ]; then [ ${root_rw_mount} = "yes" ] || mount -uw / - /bin/rm ${firstboot_sentinel} + chflags -R 0 ${firstboot_sentinel} + rm -rf ${firstboot_sentinel} if [ -e ${firstboot_sentinel}-reboot ]; then - /bin/rm ${firstboot_sentinel}-reboot + chflags -R 0 ${firstboot_sentinel}-reboot + rm -rf ${firstboot_sentinel}-reboot [ ${root_rw_mount} = "yes" ] || mount -ur / kill -INT 1 fi diff --git a/gnu/usr.bin/binutils/ld/Makefile b/gnu/usr.bin/binutils/ld/Makefile index 5058c0a8e3e8..5bc3846a1434 100644 --- a/gnu/usr.bin/binutils/ld/Makefile +++ b/gnu/usr.bin/binutils/ld/Makefile @@ -6,7 +6,8 @@ ELF_SCR_EXT= x xbn xc xd xdc xdw xn xr xs xsc xsw xu xw .PATH: ${SRCDIR}/ld -PROG= ld +PROG= ld.bfd +MAN= ld.1 SCRIPTDIR= /usr/libdata/ldscripts SRCS+= ldcref.c \ ldctor.c \ @@ -48,7 +49,7 @@ CLEANFILES+= ldemul-list.h stringify.sed FILES= ${LDSCRIPTS:S|^|ldscripts/|} FILESDIR= ${SCRIPTDIR} -LINKS= ${BINDIR}/ld ${BINDIR}/ld.bfd +LINKS= ${BINDIR}/ld.bfd ${BINDIR}/ld HOST= ${TARGET_TUPLE} LIBSEARCHPATH= \"=/lib\":\"=/usr/lib\" diff --git a/lib/libstand/Makefile b/lib/libstand/Makefile index 9d1f6bcd44a0..ee8087be3ea6 100644 --- a/lib/libstand/Makefile +++ b/lib/libstand/Makefile @@ -38,8 +38,9 @@ SRCS+= ntoh.c # string functions from libc .PATH: ${LIBC_SRC}/string -SRCS+= bcmp.c bcopy.c bzero.c ffs.c memccpy.c memchr.c memcmp.c memcpy.c \ - memmove.c memset.c qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \ +SRCS+= bcmp.c bcopy.c bzero.c ffs.c fls.c \ + memccpy.c memchr.c memcmp.c memcpy.c memmove.c memset.c \ + qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \ strcspn.c strlcat.c strlcpy.c strlen.c strncat.c strncmp.c strncpy.c \ strpbrk.c strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c .if ${MACHINE_CPUARCH} == "arm" diff --git a/lib/libsysdecode/Makefile.depend b/lib/libsysdecode/Makefile.depend new file mode 100644 index 000000000000..653f52664dc0 --- /dev/null +++ b/lib/libsysdecode/Makefile.depend @@ -0,0 +1,22 @@ +# $FreeBSD$ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + gnu/lib/csu \ + gnu/lib/libgcc \ + include \ + include/rpc \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + + +.include + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +ioctl.So: ioctl.c +ioctl.o: ioctl.c +ioctl.po: ioctl.c +.endif diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 6daef2d13405..424f2eafd7ce 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -435,7 +435,7 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) trust = !issetugid(); - md_abi_variant_hook(aux_info); +/* md_abi_variant_hook(aux_info); */ ld_bind_now = getenv(_LD("BIND_NOW")); /* diff --git a/release/Makefile b/release/Makefile index 07b8048de483..ba1ca80c050c 100644 --- a/release/Makefile +++ b/release/Makefile @@ -281,7 +281,11 @@ ftp: packagesystem cp *.txz MANIFEST ftp release: real-release vm-release cloudware-release - touch ${.OBJDIR}/${.TARGET} + ${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} release-done + true + +release-done: + touch release real-release: ${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} obj diff --git a/sbin/geom/class/eli/Makefile b/sbin/geom/class/eli/Makefile index 50de65117c62..5eff32c50689 100644 --- a/sbin/geom/class/eli/Makefile +++ b/sbin/geom/class/eli/Makefile @@ -4,6 +4,7 @@ GEOM_CLASS= eli SRCS= g_eli_crypto.c +SRCS+= g_eli_hmac.c SRCS+= g_eli_key.c SRCS+= pkcs5v2.c SRCS+= sha256c.c diff --git a/share/man/man7/ascii.7 b/share/man/man7/ascii.7 index a9c75ad9031b..aa02af9a2832 100644 --- a/share/man/man7/ascii.7 +++ b/share/man/man7/ascii.7 @@ -28,7 +28,7 @@ .\" @(#)ascii.7 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd June 5, 1993 +.Dd January 6, 2016 .Dt ASCII 7 .Os .Sh NAME @@ -42,7 +42,7 @@ The set: .Bd -literal -offset left 000 NUL 001 SOH 002 STX 003 ETX 004 EOT 005 ENQ 006 ACK 007 BEL -010 BS 011 HT 012 NL 013 VT 014 NP 015 CR 016 SO 017 SI +010 BS 011 HT 012 LF 013 VT 014 FF 015 CR 016 SO 017 SI 020 DLE 021 DC1 022 DC2 023 DC3 024 DC4 025 NAK 026 SYN 027 ETB 030 CAN 031 EM 032 SUB 033 ESC 034 FS 035 GS 036 RS 037 US 040 SP 041 ! 042 " 043 # 044 $ 045 % 046 & 047 ' @@ -64,7 +64,7 @@ The set: .Bd -literal -offset left 00 NUL 01 SOH 02 STX 03 ETX 04 EOT 05 ENQ 06 ACK 07 BEL -08 BS 09 HT 0A NL 0B VT 0C NP 0D CR 0E SO 0F SI +08 BS 09 HT 0A LF 0B VT 0C FF 0D CR 0E SO 0F SI 10 DLE 11 DC1 12 DC2 13 DC3 14 DC4 15 NAK 16 SYN 17 ETB 18 CAN 19 EM 1A SUB 1B ESC 1C FS 1D GS 1E RS 1F US 20 SP 21 ! 22 " 23 # 24 $ 25 % 26 & 27 ' @@ -86,7 +86,7 @@ The set: .Bd -literal -offset left 0 NUL 1 SOH 2 STX 3 ETX 4 EOT 5 ENQ 6 ACK 7 BEL - 8 BS 9 HT 10 NL 11 VT 12 NP 13 CR 14 SO 15 SI + 8 BS 9 HT 10 LF 11 VT 12 FF 13 CR 14 SO 15 SI 16 DLE 17 DC1 18 DC2 19 DC3 20 DC4 21 NAK 22 SYN 23 ETB 24 CAN 25 EM 26 SUB 27 ESC 28 FS 29 GS 30 RS 31 US 32 SP 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' diff --git a/share/misc/ascii b/share/misc/ascii index b7bcef5c467a..2a71a4305dd8 100644 --- a/share/misc/ascii +++ b/share/misc/ascii @@ -1,5 +1,5 @@ |000 nul|001 soh|002 stx|003 etx|004 eot|005 enq|006 ack|007 bel| -|010 bs |011 ht |012 nl |013 vt |014 np |015 cr |016 so |017 si | +|010 bs |011 ht |012 lf |013 vt |014 ff |015 cr |016 so |017 si | |020 dle|021 dc1|022 dc2|023 dc3|024 dc4|025 nak|026 syn|027 etb| |030 can|031 em |032 sub|033 esc|034 fs |035 gs |036 rs |037 us | |040 sp |041 ! |042 " |043 # |044 $ |045 % |046 & |047 ' | @@ -16,7 +16,7 @@ |170 x |171 y |172 z |173 { |174 | |175 } |176 ~ |177 del| | 00 nul| 01 soh| 02 stx| 03 etx| 04 eot| 05 enq| 06 ack| 07 bel| -| 08 bs | 09 ht | 0a nl | 0b vt | 0c np | 0d cr | 0e so | 0f si | +| 08 bs | 09 ht | 0a lf | 0b vt | 0c ff | 0d cr | 0e so | 0f si | | 10 dle| 11 dc1| 12 dc2| 13 dc3| 14 dc4| 15 nak| 16 syn| 17 etb| | 18 can| 19 em | 1a sub| 1b esc| 1c fs | 1d gs | 1e rs | 1f us | | 20 sp | 21 ! | 22 " | 23 # | 24 $ | 25 % | 26 & | 27 ' | @@ -33,7 +33,7 @@ | 78 x | 79 y | 7a z | 7b { | 7c | | 7d } | 7e ~ | 7f del| | 0 nul| 1 soh| 2 stx| 3 etx| 4 eot| 5 enq| 6 ack| 7 bel| -| 8 bs | 9 ht | 10 nl | 11 vt | 12 np | 13 cr | 14 so | 15 si | +| 8 bs | 9 ht | 10 lf | 11 vt | 12 ff | 13 cr | 14 so | 15 si | | 16 dle| 17 dc1| 18 dc2| 19 dc3| 20 dc4| 21 nak| 22 syn| 23 etb| | 24 can| 25 em | 26 sub| 27 esc| 28 fs | 29 gs | 30 rs | 31 us | | 32 sp | 33 ! | 34 " | 35 # | 36 $ | 37 % | 38 & | 39 ' | diff --git a/sys/arm/arm/db_interface.c b/sys/arm/arm/db_interface.c index 25d1706c8ff1..43831462d896 100644 --- a/sys/arm/arm/db_interface.c +++ b/sys/arm/arm/db_interface.c @@ -152,6 +152,10 @@ int db_frame(struct db_variable *vp, db_expr_t *valp, int rw) void db_show_mdpcpu(struct pcpu *pc) { + +#if __ARM_ARCH >= 6 + db_printf("curpmap = %p\n", pc->pc_curpmap); +#endif } int db_validate_address(vm_offset_t addr) diff --git a/sys/boot/efi/boot1/boot1.c b/sys/boot/efi/boot1/boot1.c index be5999328636..e2e4c907dd01 100644 --- a/sys/boot/efi/boot1/boot1.c +++ b/sys/boot/efi/boot1/boot1.c @@ -132,8 +132,7 @@ EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab) conout->Reset(conout, TRUE); max_dim = best_mode = 0; for (i = 0; ; i++) { - status = conout->QueryMode(conout, i, - &cols, &rows); + status = conout->QueryMode(conout, i, &cols, &rows); if (EFI_ERROR(status)) break; if (cols * rows > max_dim) { @@ -331,20 +330,20 @@ load(const char *fname) buffer, bufsize, &loaderhandle); if (EFI_ERROR(status)) printf("LoadImage failed with error %lu\n", - status & ~EFI_ERROR_MASK); + EFI_ERROR_CODE(status)); status = systab->BootServices->HandleProtocol(loaderhandle, &LoadedImageGUID, (VOID**)&loaded_image); if (EFI_ERROR(status)) printf("HandleProtocol failed with error %lu\n", - status & ~EFI_ERROR_MASK); + EFI_ERROR_CODE(status)); loaded_image->DeviceHandle = bootdevhandle; status = systab->BootServices->StartImage(loaderhandle, NULL, NULL); if (EFI_ERROR(status)) printf("StartImage failed with error %lu\n", - status & ~EFI_ERROR_MASK); + EFI_ERROR_CODE(status)); } static void diff --git a/sys/boot/efi/include/amd64/efibind.h b/sys/boot/efi/include/amd64/efibind.h index 3d70b58a6271..d7a8dc04d601 100644 --- a/sys/boot/efi/include/amd64/efibind.h +++ b/sys/boot/efi/include/amd64/efibind.h @@ -39,7 +39,7 @@ Revision History // No ANSI C 1999/2000 stdint.h integer width declarations - #if _MSC_EXTENSIONS + #ifdef _MSC_EXTENSIONS // Use Microsoft C compiler integer width declarations @@ -164,7 +164,7 @@ typedef uint64_t UINTN; #endif #ifndef EFIAPI // Forces EFI calling conventions reguardless of compiler options - #if _MSC_EXTENSIONS + #ifdef _MSC_EXTENSIONS #define EFIAPI __cdecl // Force C calling convention for Microsoft C compiler #else #define EFIAPI // Substitute expresion to force C calling convention @@ -265,7 +265,7 @@ typedef uint64_t UINTN; #endif #endif /* __FreeBSD__ */ -#if _MSC_EXTENSIONS +#ifdef _MSC_EXTENSIONS #pragma warning ( disable : 4731 ) // Suppress warnings about modification of EBP #endif diff --git a/sys/boot/efi/include/arm64/efibind.h b/sys/boot/efi/include/arm64/efibind.h index 21f0d25d70e7..6569f96fcf84 100644 --- a/sys/boot/efi/include/arm64/efibind.h +++ b/sys/boot/efi/include/arm64/efibind.h @@ -39,7 +39,7 @@ Revision History // No ANSI C 1999/2000 stdint.h integer width declarations - #if _MSC_EXTENSIONS + #ifdef _MSC_EXTENSIONS // Use Microsoft C compiler integer width declarations @@ -159,7 +159,7 @@ typedef uint64_t UINTN; // #ifndef EFIAPI // Forces EFI calling conventions reguardless of compiler options - #if _MSC_EXTENSIONS + #ifdef _MSC_EXTENSIONS #define EFIAPI __cdecl // Force C calling convention for Microsoft C compiler #else #define EFIAPI // Substitute expresion to force C calling convention diff --git a/sys/boot/efi/include/efierr.h b/sys/boot/efi/include/efierr.h index dc57f0ed4333..921b297ed4fb 100644 --- a/sys/boot/efi/include/efierr.h +++ b/sys/boot/efi/include/efierr.h @@ -30,7 +30,8 @@ Revision History #define EFIWARN(a) (a) -#define EFI_ERROR(a) (((INTN) a) < 0) +#define EFI_ERROR(a) (((INTN) a) < 0) +#define EFI_ERROR_CODE(a) (a & ~EFI_ERROR_MASK) #define EFI_SUCCESS 0 diff --git a/sys/boot/efi/include/i386/efibind.h b/sys/boot/efi/include/i386/efibind.h index de3658fb95e0..6e5a7163a97c 100644 --- a/sys/boot/efi/include/i386/efibind.h +++ b/sys/boot/efi/include/i386/efibind.h @@ -39,7 +39,7 @@ Revision History // No ANSI C 1999/2000 stdint.h integer width declarations - #if _MSC_EXTENSIONS + #ifdef _MSC_EXTENSIONS // Use Microsoft C compiler integer width declarations @@ -160,7 +160,7 @@ typedef uint32_t UINTN; // #ifndef EFIAPI // Forces EFI calling conventions reguardless of compiler options - #if _MSC_EXTENSIONS + #ifdef _MSC_EXTENSIONS #define EFIAPI __cdecl // Force C calling convention for Microsoft C compiler #else #define EFIAPI // Substitute expresion to force C calling convention @@ -261,7 +261,7 @@ typedef uint32_t UINTN; #endif #endif /* __FreeBSD__ */ -#if _MSC_EXTENSIONS +#ifdef _MSC_EXTENSIONS #pragma warning ( disable : 4731 ) // Suppress warnings about modification of EBP #endif diff --git a/sys/boot/efi/libefi/Makefile b/sys/boot/efi/libefi/Makefile index d248927c2b90..25251c5319c2 100644 --- a/sys/boot/efi/libefi/Makefile +++ b/sys/boot/efi/libefi/Makefile @@ -21,5 +21,6 @@ CFLAGS+= -I${.CURDIR}/../../common # Handle FreeBSD specific %b and %D printf format specifiers CFLAGS+= ${FORMAT_EXTENSIONS} +CFLAGS+= -DTERM_EMU .include diff --git a/sys/boot/efi/libefi/efi_console.c b/sys/boot/efi/libefi/efi_console.c index 3538994ac104..52a372582898 100644 --- a/sys/boot/efi/libefi/efi_console.c +++ b/sys/boot/efi/libefi/efi_console.c @@ -35,6 +35,69 @@ __FBSDID("$FreeBSD$"); static SIMPLE_TEXT_OUTPUT_INTERFACE *conout; static SIMPLE_INPUT_INTERFACE *conin; +#ifdef TERM_EMU +#define DEFAULT_FGCOLOR EFI_LIGHTGRAY +#define DEFAULT_BGCOLOR EFI_BLACK + +#define MAXARGS 8 +static int args[MAXARGS], argc; +static int fg_c, bg_c, curx, cury; +static int esc; + +void get_pos(int *x, int *y); +void curs_move(int *_x, int *_y, int x, int y); +static void CL(int); +#endif + +static void efi_cons_probe(struct console *); +static int efi_cons_init(int); +void efi_cons_putchar(int); +int efi_cons_getchar(void); +void efi_cons_efiputchar(int); +int efi_cons_poll(void); + +struct console efi_console = { + "efi", + "EFI console", + 0, + efi_cons_probe, + efi_cons_init, + efi_cons_putchar, + efi_cons_getchar, + efi_cons_poll +}; + +#ifdef TERM_EMU + +/* Get cursor position. */ +void +get_pos(int *x, int *y) +{ + *x = conout->Mode->CursorColumn; + *y = conout->Mode->CursorRow; +} + +/* Move cursor to x rows and y cols (0-based). */ +void +curs_move(int *_x, int *_y, int x, int y) +{ + conout->SetCursorPosition(conout, x, y); + if (_x != NULL) + *_x = conout->Mode->CursorColumn; + if (_y != NULL) + *_y = conout->Mode->CursorRow; +} + +/* Clear internal state of the terminal emulation code. */ +void +end_term(void) +{ + esc = 0; + argc = -1; +} + +#endif + static void efi_cons_probe(struct console *cp) { @@ -46,22 +109,314 @@ efi_cons_probe(struct console *cp) static int efi_cons_init(int arg) { - conout->SetAttribute(conout, EFI_TEXT_ATTR(EFI_LIGHTGRAY, EFI_BLACK)); + conout->SetAttribute(conout, EFI_TEXT_ATTR(DEFAULT_FGCOLOR, + DEFAULT_BGCOLOR)); +#ifdef TERM_EMU + end_term(); + get_pos(&curx, &cury); + curs_move(&curx, &cury, curx, cury); + fg_c = DEFAULT_FGCOLOR; + bg_c = DEFAULT_BGCOLOR; +#endif + conout->EnableCursor(conout, TRUE); return 0; } +static void +efi_cons_rawputchar(int c) +{ + int i; + UINTN x, y; + conout->QueryMode(conout, conout->Mode->Mode, &x, &y); + + if (c == '\t') + /* XXX lame tab expansion */ + for (i = 0; i < 8; i++) + efi_cons_rawputchar(' '); + else { +#ifndef TERM_EMU + if (c == '\n') + efi_cons_efiputchar('\r'); + else + efi_cons_efiputchar(c); +#else + switch (c) { + case '\r': + curx = 0; + curs_move(&curx, &cury, curx, cury); + return; + case '\n': + cury++; + if (cury >= y) { + efi_cons_efiputchar('\n'); + cury--; + } else + curs_move(&curx, &cury, curx, cury); + return; + case '\b': + if (curx > 0) { + curx--; + curs_move(&curx, &cury, curx, cury); + } + return; + default: + efi_cons_efiputchar(c); + curx++; + if (curx > x-1) { + curx = 0; + cury++; + } + if (cury > y-1) { + curx = 0; + cury--; + } + } + curs_move(&curx, &cury, curx, cury); +#endif + } +} + +/* Gracefully exit ESC-sequence processing in case of misunderstanding. */ +static void +bail_out(int c) +{ + char buf[16], *ch; + int i; + + if (esc) { + efi_cons_rawputchar('\033'); + if (esc != '\033') + efi_cons_rawputchar(esc); + for (i = 0; i <= argc; ++i) { + sprintf(buf, "%d", args[i]); + ch = buf; + while (*ch) + efi_cons_rawputchar(*ch++); + } + } + efi_cons_rawputchar(c); + end_term(); +} + +/* Clear display from current position to end of screen. */ +static void +CD(void) { + int i; + UINTN x, y; + + get_pos(&curx, &cury); + if (curx == 0 && cury == 0) { + conout->ClearScreen(conout); + end_term(); + return; + } + + conout->QueryMode(conout, conout->Mode->Mode, &x, &y); + CL(0); /* clear current line from cursor to end */ + for (i = cury + 1; i < y-1; i++) { + curs_move(NULL, NULL, 0, i); + CL(0); + } + curs_move(NULL, NULL, curx, cury); + end_term(); +} + +/* + * Absolute cursor move to args[0] rows and args[1] columns + * (the coordinates are 1-based). + */ +static void +CM(void) +{ + if (args[0] > 0) + args[0]--; + if (args[1] > 0) + args[1]--; + curs_move(&curx, &cury, args[1], args[0]); + end_term(); +} + +/* Home cursor (left top corner), also called from mode command. */ +void +HO(void) +{ + argc = 1; + args[0] = args[1] = 1; + CM(); +} + +/* Clear line from current position to end of line */ +static void +CL(int direction) +{ + int i, len; + UINTN x, y; + CHAR16 *line; + + conout->QueryMode(conout, conout->Mode->Mode, &x, &y); + switch (direction) { + case 0: /* from cursor to end */ + len = x - curx + 1; + break; + case 1: /* from beginning to cursor */ + len = curx; + break; + case 2: /* entire line */ + len = x; + break; + } + + if (cury == y - 1) + len--; + + line = malloc(len * sizeof (CHAR16)); + if (line == NULL) { + printf("out of memory\n"); + return; + } + for (i = 0; i < len; i++) + line[i] = ' '; + line[len-1] = 0; + + if (direction != 0) + curs_move(NULL, NULL, 0, cury); + + conout->OutputString(conout, line); + /* restore cursor position */ + curs_move(NULL, NULL, curx, cury); + free(line); + end_term(); +} + +static void +get_arg(int c) +{ + if (argc < 0) + argc = 0; + args[argc] *= 10; + args[argc] += c - '0'; +} + +/* Emulate basic capabilities of cons25 terminal */ +static void +efi_term_emu(int c) +{ + static int ansi_col[] = { + 0, 4, 2, 6, 1, 5, 3, 7 + }; + int t, i; + + switch (esc) { + case 0: + switch (c) { + case '\033': + esc = c; + break; + default: + efi_cons_rawputchar(c); + break; + } + break; + case '\033': + switch (c) { + case '[': + esc = c; + args[0] = 0; + argc = -1; + break; + default: + bail_out(c); + break; + } + break; + case '[': + switch (c) { + case ';': + if (argc < 0) + argc = 0; + else if (argc + 1 >= MAXARGS) + bail_out(c); + else + args[++argc] = 0; + break; + case 'H': /* ho = \E[H */ + if (argc < 0) + HO(); + else if (argc == 1) + CM(); + else + bail_out(c); + break; + case 'J': /* cd = \E[J */ + if (argc < 0) + CD(); + else + bail_out(c); + break; + case 'm': + if (argc < 0) { + fg_c = DEFAULT_FGCOLOR; + bg_c = DEFAULT_BGCOLOR; + } + for (i = 0; i <= argc; ++i) { + switch (args[i]) { + case 0: /* back to normal */ + fg_c = DEFAULT_FGCOLOR; + bg_c = DEFAULT_BGCOLOR; + break; + case 1: /* bold */ + fg_c |= 0x8; + break; + case 4: /* underline */ + case 5: /* blink */ + bg_c |= 0x8; + break; + case 7: /* reverse */ + t = fg_c; + fg_c = bg_c; + bg_c = t; + break; + case 30: case 31: case 32: case 33: + case 34: case 35: case 36: case 37: + fg_c = ansi_col[args[i] - 30]; + break; + case 39: /* normal */ + fg_c = DEFAULT_FGCOLOR; + break; + case 40: case 41: case 42: case 43: + case 44: case 45: case 46: case 47: + bg_c = ansi_col[args[i] - 40]; + break; + case 49: /* normal */ + bg_c = DEFAULT_BGCOLOR; + break; + } + } + conout->SetAttribute(conout, EFI_TEXT_ATTR(fg_c, bg_c)); + end_term(); + break; + default: + if (isdigit(c)) + get_arg(c); + else + bail_out(c); + break; + } + break; + default: + bail_out(c); + break; + } +} + void efi_cons_putchar(int c) { - CHAR16 buf[2]; - - if (c == '\n') - efi_cons_putchar('\r'); - - buf[0] = c; - buf[1] = 0; - - conout->OutputString(conout, buf); +#ifdef TERM_EMU + efi_term_emu(c); +#else + efi_cons_rawputchar(c); +#endif } int @@ -77,6 +432,12 @@ efi_cons_getchar() BS->WaitForEvent(1, &conin->WaitForKey, &junk); status = conin->ReadKeyStroke(conin, &key); } + switch (key.ScanCode) { + case 0x17: /* ESC */ + return (0x1b); /* esc */ + } + + /* this can return */ return (key.UnicodeChar); } @@ -87,13 +448,36 @@ efi_cons_poll() return (BS->CheckEvent(conin->WaitForKey) == EFI_SUCCESS); } -struct console efi_console = { - "efi", - "EFI console", - 0, - efi_cons_probe, - efi_cons_init, - efi_cons_putchar, - efi_cons_getchar, - efi_cons_poll -}; +/* Plain direct access to EFI OutputString(). */ +void +efi_cons_efiputchar(int c) +{ + CHAR16 buf[2]; + + /* + * translate box chars to unicode + */ + switch (c) { + /* single frame */ + case 0xb3: buf[0] = BOXDRAW_VERTICAL; break; + case 0xbf: buf[0] = BOXDRAW_DOWN_LEFT; break; + case 0xc0: buf[0] = BOXDRAW_UP_RIGHT; break; + case 0xc4: buf[0] = BOXDRAW_HORIZONTAL; break; + case 0xda: buf[0] = BOXDRAW_DOWN_RIGHT; break; + case 0xd9: buf[0] = BOXDRAW_UP_LEFT; break; + + /* double frame */ + case 0xba: buf[0] = BOXDRAW_DOUBLE_VERTICAL; break; + case 0xbb: buf[0] = BOXDRAW_DOUBLE_DOWN_LEFT; break; + case 0xbc: buf[0] = BOXDRAW_DOUBLE_UP_LEFT; break; + case 0xc8: buf[0] = BOXDRAW_DOUBLE_UP_RIGHT; break; + case 0xc9: buf[0] = BOXDRAW_DOUBLE_DOWN_RIGHT; break; + case 0xcd: buf[0] = BOXDRAW_DOUBLE_HORIZONTAL; break; + + default: + buf[0] = c; + } + buf[1] = 0; /* terminate string */ + + conout->OutputString(conout, buf); +} diff --git a/sys/boot/efi/loader/arch/amd64/framebuffer.c b/sys/boot/efi/loader/arch/amd64/framebuffer.c index eb78f7b7ace9..04b880424f22 100644 --- a/sys/boot/efi/loader/arch/amd64/framebuffer.c +++ b/sys/boot/efi/loader/arch/amd64/framebuffer.c @@ -178,7 +178,7 @@ efifb_uga_find_pixel(EFI_UGA_DRAW_PROTOCOL *uga, u_int line, printf("No change detected in frame buffer"); fail: - printf(" -- error %lu\n", status & ~EFI_ERROR_MASK); + printf(" -- error %lu\n", EFI_ERROR_CODE(status)); free(data1); return (-1); } @@ -473,7 +473,7 @@ command_gop(int argc, char *argv[]) status = BS->LocateProtocol(&gop_guid, NULL, (VOID **)&gop); if (EFI_ERROR(status)) { sprintf(command_errbuf, "%s: Graphics Output Protocol not " - "present (error=%lu)", argv[0], status & ~EFI_ERROR_MASK); + "present (error=%lu)", argv[0], EFI_ERROR_CODE(status)); return (CMD_ERROR); } @@ -494,7 +494,7 @@ command_gop(int argc, char *argv[]) if (EFI_ERROR(status)) { sprintf(command_errbuf, "%s: Unable to set mode to " "%u (error=%lu)", argv[0], mode, - status & ~EFI_ERROR_MASK); + EFI_ERROR_CODE(status)); return (CMD_ERROR); } } else if (!strcmp(argv[1], "get")) { @@ -541,7 +541,7 @@ command_uga(int argc, char *argv[]) status = BS->LocateProtocol(&uga_guid, NULL, (VOID **)&uga); if (EFI_ERROR(status)) { sprintf(command_errbuf, "%s: UGA Protocol not present " - "(error=%lu)", argv[0], status & ~EFI_ERROR_MASK); + "(error=%lu)", argv[0], EFI_ERROR_CODE(status)); return (CMD_ERROR); } diff --git a/sys/boot/efi/loader/bootinfo.c b/sys/boot/efi/loader/bootinfo.c index 622f4c61dadf..ac665b200d91 100644 --- a/sys/boot/efi/loader/bootinfo.c +++ b/sys/boot/efi/loader/bootinfo.c @@ -290,7 +290,7 @@ bi_load_efi_data(struct preloaded_file *kfp) pages, &addr); if (EFI_ERROR(status)) { printf("%s: AllocatePages error %lu\n", __func__, - (unsigned long)(status & ~EFI_ERROR_MASK)); + EFI_ERROR_CODE(status)); return (ENOMEM); } @@ -306,7 +306,7 @@ bi_load_efi_data(struct preloaded_file *kfp) status = BS->GetMemoryMap(&sz, mm, &efi_mapkey, &mmsz, &mmver); if (EFI_ERROR(status)) { printf("%s: GetMemoryMap error %lu\n", __func__, - (unsigned long)(status & ~EFI_ERROR_MASK)); + EFI_ERROR_CODE(status)); return (EINVAL); } status = BS->ExitBootServices(IH, efi_mapkey); @@ -320,8 +320,7 @@ bi_load_efi_data(struct preloaded_file *kfp) } BS->FreePages(addr, pages); } - printf("ExitBootServices error %lu\n", - (unsigned long)(status & ~EFI_ERROR_MASK)); + printf("ExitBootServices error %lu\n", EFI_ERROR_CODE(status)); return (EINVAL); } diff --git a/sys/boot/efi/loader/copy.c b/sys/boot/efi/loader/copy.c index 716e9ea328a2..8714786c3471 100644 --- a/sys/boot/efi/loader/copy.c +++ b/sys/boot/efi/loader/copy.c @@ -56,7 +56,7 @@ efi_copy_init(void) STAGE_PAGES, &staging); if (EFI_ERROR(status)) { printf("failed to allocate staging area: %lu\n", - (unsigned long)(status & EFI_ERROR_MASK)); + EFI_ERROR_CODE(status)); return (status); } staging_end = staging + STAGE_PAGES * EFI_PAGE_SIZE; diff --git a/sys/boot/efi/loader/devicename.c b/sys/boot/efi/loader/devicename.c index 89f994112dda..1ba33e8f7ad0 100644 --- a/sys/boot/efi/loader/devicename.c +++ b/sys/boot/efi/loader/devicename.c @@ -147,7 +147,7 @@ efi_fmtdev(void *vdev) break; } - return(buf); + return (buf); } /* @@ -161,7 +161,7 @@ efi_setcurrdev(struct env_var *ev, int flags, const void *value) rv = efi_parsedev(&ncurr, value, NULL); if (rv != 0) - return(rv); + return (rv); free(ncurr); env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL); diff --git a/sys/boot/efi/loader/main.c b/sys/boot/efi/loader/main.c index 7a407094e0fe..1fa031f29afe 100644 --- a/sys/boot/efi/loader/main.c +++ b/sys/boot/efi/loader/main.c @@ -227,50 +227,47 @@ command_memmap(int argc, char *argv[]) status = BS->GetMemoryMap(&sz, 0, &key, &dsz, &dver); if (status != EFI_BUFFER_TOO_SMALL) { printf("Can't determine memory map size\n"); - return CMD_ERROR; + return (CMD_ERROR); } map = malloc(sz); status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver); if (EFI_ERROR(status)) { printf("Can't read memory map\n"); - return CMD_ERROR; + return (CMD_ERROR); } ndesc = sz / dsz; printf("%23s %12s %12s %8s %4s\n", - "Type", "Physical", "Virtual", "#Pages", "Attr"); + "Type", "Physical", "Virtual", "#Pages", "Attr"); for (i = 0, p = map; i < ndesc; i++, p = NextMemoryDescriptor(p, dsz)) { - printf("%23s %012lx %012lx %08lx ", - types[p->Type], - p->PhysicalStart, - p->VirtualStart, - p->NumberOfPages); - if (p->Attribute & EFI_MEMORY_UC) - printf("UC "); - if (p->Attribute & EFI_MEMORY_WC) - printf("WC "); - if (p->Attribute & EFI_MEMORY_WT) - printf("WT "); - if (p->Attribute & EFI_MEMORY_WB) - printf("WB "); - if (p->Attribute & EFI_MEMORY_UCE) - printf("UCE "); - if (p->Attribute & EFI_MEMORY_WP) - printf("WP "); - if (p->Attribute & EFI_MEMORY_RP) - printf("RP "); - if (p->Attribute & EFI_MEMORY_XP) - printf("XP "); - printf("\n"); + printf("%23s %012lx %012lx %08lx ", types[p->Type], + p->PhysicalStart, p->VirtualStart, p->NumberOfPages); + if (p->Attribute & EFI_MEMORY_UC) + printf("UC "); + if (p->Attribute & EFI_MEMORY_WC) + printf("WC "); + if (p->Attribute & EFI_MEMORY_WT) + printf("WT "); + if (p->Attribute & EFI_MEMORY_WB) + printf("WB "); + if (p->Attribute & EFI_MEMORY_UCE) + printf("UCE "); + if (p->Attribute & EFI_MEMORY_WP) + printf("WP "); + if (p->Attribute & EFI_MEMORY_RP) + printf("RP "); + if (p->Attribute & EFI_MEMORY_XP) + printf("XP "); + printf("\n"); } - return CMD_OK; + return (CMD_OK); } -COMMAND_SET(configuration, "configuration", - "print configuration tables", command_configuration); +COMMAND_SET(configuration, "configuration", "print configuration tables", + command_configuration); static const char * guid_to_string(EFI_GUID *guid) @@ -318,7 +315,7 @@ command_configuration(int argc, char *argv[]) printf(" at %p\n", ST->ConfigurationTable[i].VendorTable); } - return CMD_OK; + return (CMD_OK); } @@ -334,6 +331,7 @@ command_mode(int argc, char *argv[]) char rowenv[8]; EFI_STATUS status; SIMPLE_TEXT_OUTPUT_INTERFACE *conout; + extern void HO(void); conout = ST->ConOut; @@ -355,7 +353,7 @@ command_mode(int argc, char *argv[]) } sprintf(rowenv, "%u", (unsigned)rows); setenv("LINES", rowenv, 1); - + HO(); /* set cursor */ return (CMD_OK); } @@ -394,20 +392,17 @@ command_nvram(int argc, char *argv[]) status = RS->GetNextVariableName(&varsz, NULL, NULL); for (; status != EFI_NOT_FOUND; ) { - status = RS->GetNextVariableName(&varsz, var, - &varguid); + status = RS->GetNextVariableName(&varsz, var, &varguid); //if (EFI_ERROR(status)) //break; conout->OutputString(conout, var); printf("="); datasz = 0; - status = RS->GetVariable(var, &varguid, NULL, &datasz, - NULL); + status = RS->GetVariable(var, &varguid, NULL, &datasz, NULL); /* XXX: check status */ data = malloc(datasz); - status = RS->GetVariable(var, &varguid, NULL, &datasz, - data); + status = RS->GetVariable(var, &varguid, NULL, &datasz, data); if (EFI_ERROR(status)) printf(""); else { diff --git a/sys/boot/ficl/amd64/sysdep.c b/sys/boot/ficl/amd64/sysdep.c index ad38660843cd..5957b71e461a 100644 --- a/sys/boot/ficl/amd64/sysdep.c +++ b/sys/boot/ficl/amd64/sysdep.c @@ -55,7 +55,7 @@ void ficlTextOut(FICL_VM *pVM, char *msg, int fNewline) IGNORE(pVM); while(*msg != 0) - putchar(*(msg++)); + putchar((unsigned char)*(msg++)); if (fNewline) putchar('\n'); diff --git a/sys/boot/forth/beastie.4th b/sys/boot/forth/beastie.4th index 52c403f6d1fa..752cce22a4ff 100644 --- a/sys/boot/forth/beastie.4th +++ b/sys/boot/forth/beastie.4th @@ -85,11 +85,6 @@ variable logoY also support-functions : beastie-start ( -- ) \ starts the menu - s" console" getenv dup -1 <> if - s" efi" 2swap contains? if - s" set beastie_disable=YES" evaluate - then - else drop then s" beastie_disable" getenv dup -1 <> if s" YES" compare-insensitive 0= if any_conf_read? if diff --git a/sys/boot/forth/beastie.4th.8 b/sys/boot/forth/beastie.4th.8 index 534a60ce6bd9..9f77d5db2977 100644 --- a/sys/boot/forth/beastie.4th.8 +++ b/sys/boot/forth/beastie.4th.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd April 27, 2014 +.Dd January 6, 2016 .Dt BEASTIE.4TH 8 .Os .Sh NAME @@ -119,8 +119,7 @@ Sets the desired row position of the logo. Default is 4. If set to .Dq YES , the beastie boot menu will be skipped. -The beastie boot menu is always skipped if booting UEFI or running non-x86 -hardware. +The beastie boot menu is always skipped if running non-x86 hardware. .It Va loader_delay If set to a number higher than zero, introduces a delay before starting the beastie boot menu. During the delay the user can press either Ctrl-C to skip diff --git a/sys/boot/forth/loader.conf.5 b/sys/boot/forth/loader.conf.5 index 37f10bfea5d2..0320e988cdfd 100644 --- a/sys/boot/forth/loader.conf.5 +++ b/sys/boot/forth/loader.conf.5 @@ -23,7 +23,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd April 27, 2014 +.Dd January 6, 2016 .Dt LOADER.CONF 5 .Os .Sh NAME @@ -236,8 +236,7 @@ be displayed. If set to .Dq YES , the beastie boot menu will be skipped. -The beastie boot menu is always skipped if booting UEFI or running non-x86 -hardware. +The beastie boot menu is always skipped if running non-x86 hardware. .It Va loader_logo Pq Dq Li orbbw Selects a desired logo in the beastie boot menu. Possible values are: diff --git a/sys/boot/zfs/zfs.c b/sys/boot/zfs/zfs.c index 0e15ac48ff6a..fdb79bb21c1e 100644 --- a/sys/boot/zfs/zfs.c +++ b/sys/boot/zfs/zfs.c @@ -154,7 +154,7 @@ zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */) n = size; if (fp->f_seekp + n > sb.st_size) n = sb.st_size - fp->f_seekp; - + rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n); if (rc) return (rc); @@ -507,7 +507,7 @@ zfs_probe_dev(const char *devname, uint64_t *pool_guid) } } close(pa.fd); - return (0); + return (ret); } /* diff --git a/sys/cddl/boot/zfs/lz4.c b/sys/cddl/boot/zfs/lz4.c index 055bd627fd10..c29f8614155d 100644 --- a/sys/cddl/boot/zfs/lz4.c +++ b/sys/cddl/boot/zfs/lz4.c @@ -52,7 +52,7 @@ lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int dum * Returns 0 on success (decompression function returned non-negative) * and non-zero on failure (decompression function returned negative). */ - return (LZ4_uncompress_unknownOutputSize(s_start + 4, d_start, bufsiz, + return (LZ4_uncompress_unknownOutputSize((const char *)s_start + 4, d_start, bufsiz, d_len) < 0); } diff --git a/sys/conf/files b/sys/conf/files index feac6c0e1de9..3652b3b9f7d9 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2994,6 +2994,7 @@ geom/concat/g_concat.c optional geom_concat geom/eli/g_eli.c optional geom_eli geom/eli/g_eli_crypto.c optional geom_eli geom/eli/g_eli_ctl.c optional geom_eli +geom/eli/g_eli_hmac.c optional geom_eli geom/eli/g_eli_integrity.c optional geom_eli geom/eli/g_eli_key.c optional geom_eli geom/eli/g_eli_key_cache.c optional geom_eli diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c index 7b435607df0c..b98caaedf21f 100644 --- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -264,20 +265,14 @@ void __free_ep(struct iwch_ep_common *epc) free(epc, M_DEVBUF); } -static struct rtentry * +static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) + __be16 peer_port, u8 tos, struct nhop4_extended *pnh4) { - struct route iproute; - struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; - - bzero(&iproute, sizeof iproute); - dst->sin_family = AF_INET; - dst->sin_len = sizeof *dst; - dst->sin_addr.s_addr = peer_ip; - - rtalloc(&iproute); - return iproute.ro_rt; + struct in_addr addr; + + addr.s_addr = peer_ip; + return (fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4)); } static void @@ -1293,7 +1288,7 @@ iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int err = 0; struct iwch_dev *h = to_iwch_dev(cm_id->device); struct iwch_ep *ep; - struct rtentry *rt; + struct nhop4_extended nh4; struct toedev *tdev; if (is_loopback_dst(cm_id)) { @@ -1329,28 +1324,28 @@ iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail2; /* find a route */ - rt = find_route(cm_id->local_addr.sin_addr.s_addr, + err = find_route(cm_id->local_addr.sin_addr.s_addr, cm_id->remote_addr.sin_addr.s_addr, cm_id->local_addr.sin_port, - cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); - if (!rt) { + cm_id->remote_addr.sin_port, IPTOS_LOWDELAY, &nh4); + if (err) { printf("%s - cannot find route.\n", __FUNCTION__); err = EHOSTUNREACH; goto fail2; } - if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) { + if (!(nh4.nh_ifp->if_flags & IFCAP_TOE)) { printf("%s - interface not TOE capable.\n", __FUNCTION__); - RTFREE(rt); + fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); goto fail2; } - tdev = TOEDEV(rt->rt_ifp); + tdev = TOEDEV(nh4.nh_ifp); if (tdev == NULL) { printf("%s - No toedev for interface.\n", __FUNCTION__); - RTFREE(rt); + fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); goto fail2; } - RTFREE(rt); + fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); state_set(&ep->com, CONNECTING); ep->com.local_addr = cm_id->local_addr; diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c index 985306c3c6e8..d896020abf39 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c @@ -1536,14 +1536,13 @@ assign_rxopt(struct tcpcb *tp, uint16_t tcpopt) struct toepcb *toep = tp->t_toe; struct adapter *sc = toep->tp_tod->tod_softc; - tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40; + tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40; if (G_TCPOPT_TSTAMP(tcpopt)) { tp->t_flags |= TF_RCVD_TSTMP; tp->t_flags |= TF_REQ_TSTMP; /* forcibly set */ tp->ts_recent = 0; /* XXX */ tp->ts_recent_age = tcp_ts_getticks(); - tp->t_maxseg -= TCPOLEN_TSTAMP_APPA; } if (G_TCPOPT_SACK(tcpopt)) diff --git a/sys/dev/cxgb/ulp/tom/cxgb_listen.c b/sys/dev/cxgb/ulp/tom/cxgb_listen.c index 933a83c2aaa7..b7d69df6cd21 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_listen.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_listen.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -480,8 +481,8 @@ do_pass_accept_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) unsigned int tid = GET_TID(req); struct listen_ctx *lctx = lookup_stid(&td->tid_maps, stid); struct l2t_entry *e = NULL; + struct nhop4_basic nh4; struct sockaddr_in nam; - struct rtentry *rt; struct inpcb *inp; struct socket *so; struct port_info *pi; @@ -525,18 +526,12 @@ do_pass_accept_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) nam.sin_len = sizeof(nam); nam.sin_family = AF_INET; nam.sin_addr = inc.inc_faddr; - rt = rtalloc1((struct sockaddr *)&nam, 0, 0); - if (rt == NULL) + if (fib4_lookup_nh_basic(RT_DEFAULT_FIB, nam.sin_addr, 0, 0, &nh4) != 0) REJECT_PASS_ACCEPT(); else { - struct sockaddr *nexthop; - - RT_UNLOCK(rt); - nexthop = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : - (struct sockaddr *)&nam; - if (rt->rt_ifp == ifp) - e = t3_l2t_get(pi, rt->rt_ifp, nexthop); - RTFREE(rt); + nam.sin_addr = nh4.nh_addr; + if (nh4.nh_ifp == ifp) + e = t3_l2t_get(pi, ifp, (struct sockaddr *)&nam); if (e == NULL) REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */ } diff --git a/sys/dev/cxgbe/iw_cxgbe/cm.c b/sys/dev/cxgbe/iw_cxgbe/cm.c index cb4be71e5615..8af7df5f4ebd 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cm.c +++ b/sys/dev/cxgbe/iw_cxgbe/cm.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -86,8 +87,8 @@ static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); static void *alloc_ep(int size, gfp_t flags); void __free_ep(struct c4iw_ep_common *epc); -static struct rtentry * find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos); +static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos, struct nhop4_extended *pnh4); static int close_socket(struct c4iw_ep_common *epc, int close); static int shutdown_socket(struct c4iw_ep_common *epc); static void abort_socket(struct c4iw_ep *ep); @@ -201,23 +202,21 @@ set_tcpinfo(struct c4iw_ep *ep) } -static struct rtentry * +static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) + __be16 peer_port, u8 tos, struct nhop4_extended *pnh4) { - struct route iproute; - struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; + struct in_addr addr; + int err; CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port)); - bzero(&iproute, sizeof iproute); - dst->sin_family = AF_INET; - dst->sin_len = sizeof *dst; - dst->sin_addr.s_addr = peer_ip; - rtalloc(&iproute); - CTR2(KTR_IW_CXGBE, "%s:frtE %p", __func__, (uint64_t)iproute.ro_rt); - return iproute.ro_rt; + addr.s_addr = peer_ip; + err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4); + + CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err); + return err; } static int @@ -2012,7 +2011,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int err = 0; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_ep *ep = NULL; - struct rtentry *rt; + struct nhop4_extended nh4; struct toedev *tdev; CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id); @@ -2068,13 +2067,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) init_sock(&ep->com); /* find a route */ - rt = find_route( + err = find_route( cm_id->local_addr.sin_addr.s_addr, cm_id->remote_addr.sin_addr.s_addr, cm_id->local_addr.sin_port, - cm_id->remote_addr.sin_port, 0); + cm_id->remote_addr.sin_port, 0, &nh4); - if (!rt) { + if (err) { CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep); printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); @@ -2082,7 +2081,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail2; } - if (!(rt->rt_ifp->if_capenable & IFCAP_TOE)) { + if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE)) { CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep); printf("%s - interface not TOE capable.\n", __func__); @@ -2090,7 +2089,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOPROTOOPT; goto fail3; } - tdev = TOEDEV(rt->rt_ifp); + tdev = TOEDEV(nh4.nh_ifp); if (tdev == NULL) { @@ -2098,7 +2097,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) printf("%s - No toedev for interface.\n", __func__); goto fail3; } - RTFREE(rt); + fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); state_set(&ep->com, CONNECTING); ep->tos = 0; @@ -2117,7 +2116,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) fail3: CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep); - RTFREE(rt); + fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); fail2: cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index d58592e85b49..f18f115c3202 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -221,7 +221,7 @@ assign_rxopt(struct tcpcb *tp, unsigned int opt) n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else n = sizeof(struct ip) + sizeof(struct tcphdr); - tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; + tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid, G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]); @@ -230,7 +230,6 @@ assign_rxopt(struct tcpcb *tp, unsigned int opt) tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ tp->ts_recent = 0; /* hmmm */ tp->ts_recent_age = tcp_ts_getticks(); - tp->t_maxseg -= TCPOLEN_TSTAMP_APPA; } if (G_TCPOPT_SACK(opt)) diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c index 187a9f879f22..5af9260371f8 100644 --- a/sys/dev/cxgbe/tom/t4_listen.c +++ b/sys/dev/cxgbe/tom/t4_listen.c @@ -49,9 +49,11 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include +#include #include #include #include @@ -1095,46 +1097,44 @@ static struct l2t_entry * get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp, struct in_conninfo *inc) { - struct rtentry *rt; struct l2t_entry *e; struct sockaddr_in6 sin6; struct sockaddr *dst = (void *)&sin6; if (inc->inc_flags & INC_ISIPV6) { + struct nhop6_basic nh6; + + bzero(dst, sizeof(struct sockaddr_in6)); dst->sa_len = sizeof(struct sockaddr_in6); dst->sa_family = AF_INET6; - ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr; if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) { /* no need for route lookup */ e = t4_l2t_get(pi, ifp, dst); return (e); } + + if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &inc->inc6_faddr, + 0, 0, 0, &nh6) != 0) + return (NULL); + if (nh6.nh_ifp != ifp) + return (NULL); + ((struct sockaddr_in6 *)dst)->sin6_addr = nh6.nh_addr; } else { + struct nhop4_basic nh4; + dst->sa_len = sizeof(struct sockaddr_in); dst->sa_family = AF_INET; - ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr; - } - - rt = rtalloc1(dst, 0, 0); - if (rt == NULL) - return (NULL); - else { - struct sockaddr *nexthop; - - RT_UNLOCK(rt); - if (rt->rt_ifp != ifp) - e = NULL; - else { - if (rt->rt_flags & RTF_GATEWAY) - nexthop = rt->rt_gateway; - else - nexthop = dst; - e = t4_l2t_get(pi, ifp, nexthop); - } - RTFREE(rt); + + if (fib4_lookup_nh_basic(RT_DEFAULT_FIB, inc->inc_faddr, 0, 0, + &nh4) != 0) + return (NULL); + if (nh4.nh_ifp != ifp) + return (NULL); + ((struct sockaddr_in *)dst)->sin_addr = nh4.nh_addr; } + e = t4_l2t_get(pi, ifp, dst); return (e); } diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index d9b3ca559d6e..f586d399822f 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -260,7 +260,9 @@ static bool em_rxeof(struct rx_ring *, int, int *); #ifndef __NO_STRICT_ALIGNMENT static int em_fixup_rx(struct rx_ring *); #endif -static void em_receive_checksum(struct e1000_rx_desc *, struct mbuf *); +static void em_setup_rxdesc(union e1000_rx_desc_extended *, + const struct em_rxbuffer *rxbuf); +static void em_receive_checksum(uint32_t status, struct mbuf *); static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, struct ip *, u32 *, u32 *); static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, @@ -631,7 +633,7 @@ em_attach(device_t dev) } else adapter->num_tx_desc = em_txd; - if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 || + if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 || (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", EM_DEFAULT_RXD, em_rxd); @@ -1872,7 +1874,7 @@ em_xmit(struct tx_ring *txr, struct mbuf **m_headp) struct adapter *adapter = txr->adapter; bus_dma_segment_t segs[EM_MAX_SCATTER]; bus_dmamap_t map; - struct em_buffer *tx_buffer, *tx_buffer_mapped; + struct em_txbuffer *tx_buffer, *tx_buffer_mapped; struct e1000_tx_desc *ctxd = NULL; struct mbuf *m_head; struct ether_header *eh; @@ -3296,7 +3298,7 @@ em_allocate_queues(struct adapter *adapter) * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * - sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); + sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; @@ -3314,7 +3316,7 @@ em_allocate_queues(struct adapter *adapter) error = ENOMEM; goto err_rx_desc; } - rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr; + rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ @@ -3357,7 +3359,7 @@ em_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; - struct em_buffer *txbuf; + struct em_txbuffer *txbuf; int error, i; /* @@ -3380,7 +3382,7 @@ em_allocate_transmit_buffers(struct tx_ring *txr) } if (!(txr->tx_buffers = - (struct em_buffer *) malloc(sizeof(struct em_buffer) * + (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; @@ -3413,7 +3415,7 @@ static void em_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; - struct em_buffer *txbuf; + struct em_txbuffer *txbuf; int i; #ifdef DEV_NETMAP struct netmap_slot *slot; @@ -3632,7 +3634,7 @@ static void em_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; - struct em_buffer *txbuf; + struct em_txbuffer *txbuf; INIT_DEBUGOUT("free_transmit_ring: begin"); @@ -3699,7 +3701,7 @@ em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD = NULL; - struct em_buffer *tx_buffer; + struct em_txbuffer *tx_buffer; int cur, hdr_len; u32 cmd = 0; u16 offload = 0; @@ -3733,29 +3735,38 @@ em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, offload |= CSUM_TCP; tucss = hdr_len; tucso = hdr_len + offsetof(struct tcphdr, th_sum); - /* - * Setting up new checksum offload context for every frames - * takes a lot of processing time for hardware. This also - * reduces performance a lot for small sized frames so avoid - * it if driver can use previously configured checksum - * offload context. - */ - if (txr->last_hw_offload == offload) { - if (offload & CSUM_IP) { - if (txr->last_hw_ipcss == ipcss && - txr->last_hw_ipcso == ipcso && - txr->last_hw_tucss == tucss && - txr->last_hw_tucso == tucso) - return; - } else { - if (txr->last_hw_tucss == tucss && - txr->last_hw_tucso == tucso) - return; - } - } - txr->last_hw_offload = offload; - txr->last_hw_tucss = tucss; - txr->last_hw_tucso = tucso; + /* + * The 82574L can only remember the *last* context used + * regardless of queue that it was use for. We cannot reuse + * contexts on this hardware platform and must generate a new + * context every time. 82574L hardware spec, section 7.2.6, + * second note. + */ + if (adapter->num_queues < 2) { + /* + * Setting up new checksum offload context for every + * frames takes a lot of processing time for hardware. + * This also reduces performance a lot for small sized + * frames so avoid it if driver can use previously + * configured checksum offload context. + */ + if (txr->last_hw_offload == offload) { + if (offload & CSUM_IP) { + if (txr->last_hw_ipcss == ipcss && + txr->last_hw_ipcso == ipcso && + txr->last_hw_tucss == tucss && + txr->last_hw_tucso == tucso) + return; + } else { + if (txr->last_hw_tucss == tucss && + txr->last_hw_tucso == tucso) + return; + } + } + txr->last_hw_offload = offload; + txr->last_hw_tucss = tucss; + txr->last_hw_tucso = tucso; + } /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. @@ -3771,29 +3782,38 @@ em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, *txd_upper |= E1000_TXD_POPTS_TXSM << 8; tucss = hdr_len; tucso = hdr_len + offsetof(struct udphdr, uh_sum); - /* - * Setting up new checksum offload context for every frames - * takes a lot of processing time for hardware. This also - * reduces performance a lot for small sized frames so avoid - * it if driver can use previously configured checksum - * offload context. - */ - if (txr->last_hw_offload == offload) { - if (offload & CSUM_IP) { - if (txr->last_hw_ipcss == ipcss && - txr->last_hw_ipcso == ipcso && - txr->last_hw_tucss == tucss && - txr->last_hw_tucso == tucso) - return; - } else { - if (txr->last_hw_tucss == tucss && - txr->last_hw_tucso == tucso) - return; + /* + * The 82574L can only remember the *last* context used + * regardless of queue that it was use for. We cannot reuse + * contexts on this hardware platform and must generate a new + * context every time. 82574L hardware spec, section 7.2.6, + * second note. + */ + if (adapter->num_queues < 2) { + /* + * Setting up new checksum offload context for every + * frames takes a lot of processing time for hardware. + * This also reduces performance a lot for small sized + * frames so avoid it if driver can use previously + * configured checksum offload context. + */ + if (txr->last_hw_offload == offload) { + if (offload & CSUM_IP) { + if (txr->last_hw_ipcss == ipcss && + txr->last_hw_ipcso == ipcso && + txr->last_hw_tucss == tucss && + txr->last_hw_tucso == tucso) + return; + } else { + if (txr->last_hw_tucss == tucss && + txr->last_hw_tucso == tucso) + return; + } } - } - txr->last_hw_offload = offload; - txr->last_hw_tucss = tucss; - txr->last_hw_tucso = tucso; + txr->last_hw_offload = offload; + txr->last_hw_tucss = tucss; + txr->last_hw_tucso = tucso; + } /* * Start offset for header checksum calculation. * End offset for header checksum calculation. @@ -3836,7 +3856,7 @@ em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD; - struct em_buffer *tx_buffer; + struct em_txbuffer *tx_buffer; int cur, hdr_len; /* @@ -3914,7 +3934,7 @@ em_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; int first, last, done, processed; - struct em_buffer *tx_buffer; + struct em_txbuffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; if_t ifp = adapter->ifp; @@ -4020,7 +4040,6 @@ em_txeof(struct tx_ring *txr) txr->busy = EM_TX_IDLE; } - /********************************************************************* * * Refresh RX descriptor mbufs from system mbuf buffer pool. @@ -4031,8 +4050,8 @@ em_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; struct mbuf *m; - bus_dma_segment_t segs[1]; - struct em_buffer *rxbuf; + bus_dma_segment_t segs; + struct em_rxbuffer *rxbuf; int i, j, error, nsegs; bool cleaned = FALSE; @@ -4067,7 +4086,7 @@ em_refresh_mbufs(struct rx_ring *rxr, int limit) /* Use bus_dma machinery to setup the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, - m, segs, &nsegs, BUS_DMA_NOWAIT); + m, &segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: hdr dmamap load" " failure - %d\n", error); @@ -4076,9 +4095,10 @@ em_refresh_mbufs(struct rx_ring *rxr, int limit) goto update; } rxbuf->m_head = m; + rxbuf->paddr = segs.ds_addr; bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); - rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr); + em_setup_rxdesc(&rxr->rx_base[i], rxbuf); cleaned = TRUE; i = j; /* Next is precalulated for us */ @@ -4113,10 +4133,10 @@ em_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; - struct em_buffer *rxbuf; + struct em_rxbuffer *rxbuf; int error; - rxr->rx_buffers = malloc(sizeof(struct em_buffer) * + rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (rxr->rx_buffers == NULL) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); @@ -4169,7 +4189,7 @@ static int em_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; - struct em_buffer *rxbuf; + struct em_rxbuffer *rxbuf; bus_dma_segment_t seg[1]; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP @@ -4181,7 +4201,7 @@ em_setup_receive_ring(struct rx_ring *rxr) /* Clear the ring contents */ EM_RX_LOCK(rxr); rsize = roundup2(adapter->num_rx_desc * - sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); + sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_RX, rxr->me, 0); @@ -4212,8 +4232,7 @@ em_setup_receive_ring(struct rx_ring *rxr) addr = PNMB(na, slot + si, &paddr); netmap_load_map(na, rxr->rxtag, rxbuf->map, addr); - /* Update descriptor */ - rxr->rx_base[j].buffer_addr = htole64(paddr); + em_setup_rxdesc(&rxr->rx_base[j], rxbuf); continue; } #endif /* DEV_NETMAP */ @@ -4239,8 +4258,8 @@ em_setup_receive_ring(struct rx_ring *rxr) bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); - /* Update descriptor */ - rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr); + rxbuf->paddr = seg[0].ds_addr; + em_setup_rxdesc(&rxr->rx_base[j], rxbuf); } rxr->next_to_check = 0; rxr->next_to_refresh = 0; @@ -4277,7 +4296,7 @@ em_setup_receive_structures(struct adapter *adapter) for (int i = 0; i < q; ++i) { rxr = &adapter->rx_rings[i]; for (int n = 0; n < adapter->num_rx_desc; n++) { - struct em_buffer *rxbuf; + struct em_rxbuffer *rxbuf; rxbuf = &rxr->rx_buffers[n]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, @@ -4324,7 +4343,7 @@ static void em_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; - struct em_buffer *rxbuf = NULL; + struct em_rxbuffer *rxbuf = NULL; INIT_DEBUGOUT("free_receive_buffers: begin"); @@ -4366,11 +4385,10 @@ em_free_receive_buffers(struct rx_ring *rxr) static void em_initialize_receive_unit(struct adapter *adapter) { - struct rx_ring *rxr = adapter->rx_rings; + struct rx_ring *rxr = adapter->rx_rings; if_t ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; - u64 bus_addr; - u32 rctl, rxcsum; + u32 rctl, rxcsum, rfctl; INIT_DEBUGOUT("em_initialize_receive_units: begin"); @@ -4383,6 +4401,25 @@ em_initialize_receive_unit(struct adapter *adapter) if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); + /* Setup the Receive Control Register */ + rctl &= ~(3 << E1000_RCTL_MO_SHIFT); + rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | + E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | + (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + + /* Do not store bad packets */ + rctl &= ~E1000_RCTL_SBP; + + /* Enable Long Packet receive */ + if (if_getmtu(ifp) > ETHERMTU) + rctl |= E1000_RCTL_LPE; + else + rctl &= ~E1000_RCTL_LPE; + + /* Strip the CRC */ + if (!em_disable_crc_stripping) + rctl |= E1000_RCTL_SECRC; + E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); @@ -4394,20 +4431,21 @@ em_initialize_receive_unit(struct adapter *adapter) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); + /* Use extended rx descriptor formats */ + rfctl = E1000_READ_REG(hw, E1000_RFCTL); + rfctl |= E1000_RFCTL_EXTEN; /* ** When using MSIX interrupts we need to throttle ** using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { - u32 rfctl; for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ - rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_ACK_DIS; - E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); } + E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (if_getcapenable(ifp) & IFCAP_RXCSUM) { @@ -4424,38 +4462,44 @@ em_initialize_receive_unit(struct adapter *adapter) E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); #ifdef EM_MULTIQUEUE +#define RSSKEYLEN 10 if (adapter->num_queues > 1) { - uint32_t rss_key[10]; - uint32_t reta; + uint8_t rss_key[4 * RSSKEYLEN]; + uint32_t reta = 0; int i; /* * Configure RSS key */ arc4rand(rss_key, sizeof(rss_key), 0); - for (i = 0; i < 10; ++i) - E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]); + for (i = 0; i < RSSKEYLEN; ++i) { + uint32_t rssrk = 0; + + rssrk = EM_RSSRK_VAL(rss_key, i); + E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); + } /* * Configure RSS redirect table in following fashion: * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] */ - reta = 0; - for (i = 0; i < 4; ++i) { + for (i = 0; i < sizeof(reta); ++i) { uint32_t q; + q = (i % adapter->num_queues) << 7; reta |= q << (8 * i); } - for (i = 0; i < 32; ++i) + + for (i = 0; i < 32; ++i) { E1000_WRITE_REG(hw, E1000_RETA(i), reta); + } E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | E1000_MRQC_RSS_FIELD_IPV4_TCP | E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | E1000_MRQC_RSS_FIELD_IPV6_EX | - E1000_MRQC_RSS_FIELD_IPV6 | - E1000_MRQC_RSS_FIELD_IPV6_TCP); + E1000_MRQC_RSS_FIELD_IPV6); } #endif /* @@ -4470,11 +4514,11 @@ em_initialize_receive_unit(struct adapter *adapter) for (int i = 0; i < adapter->num_queues; i++, rxr++) { /* Setup the Base and Length of the Rx Descriptor Ring */ + u64 bus_addr = rxr->rxdma.dma_paddr; u32 rdt = adapter->num_rx_desc - 1; /* default */ - bus_addr = rxr->rxdma.dma_paddr; E1000_WRITE_REG(hw, E1000_RDLEN(i), - adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); + adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ @@ -4505,14 +4549,13 @@ em_initialize_receive_unit(struct adapter *adapter) (if_getmtu(ifp) > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); - } else if ((adapter->hw.mac.type == e1000_82574) && - (if_getmtu(ifp) > ETHERMTU)) { + } else if (adapter->hw.mac.type == e1000_82574) { for (int i = 0; i < adapter->num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); - rxdctl |= 0x20; /* PTHRESH */ - rxdctl |= 4 << 8; /* HTHRESH */ - rxdctl |= 4 << 16;/* WTHRESH */ + rxdctl |= 0x20; /* PTHRESH */ + rxdctl |= 4 << 8; /* HTHRESH */ + rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } @@ -4525,19 +4568,8 @@ em_initialize_receive_unit(struct adapter *adapter) e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } - /* Setup the Receive Control Register */ - rctl &= ~(3 << E1000_RCTL_MO_SHIFT); - rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | - E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | - (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); - - /* Strip the CRC */ - if (!em_disable_crc_stripping) - rctl |= E1000_RCTL_SECRC; - /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; - rctl &= ~E1000_RCTL_SBP; if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; @@ -4546,11 +4578,8 @@ em_initialize_receive_unit(struct adapter *adapter) else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; - if (if_getmtu(ifp) > ETHERMTU) - rctl |= E1000_RCTL_LPE; - else - rctl &= ~E1000_RCTL_LPE; - + /* ensure we clear use DTYPE of 00 here */ + rctl &= ~0x00000C00; /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); @@ -4575,11 +4604,11 @@ em_rxeof(struct rx_ring *rxr, int count, int *done) struct adapter *adapter = rxr->adapter; if_t ifp = adapter->ifp; struct mbuf *mp, *sendmp; - u8 status = 0; + u32 status = 0; u16 len; int i, processed, rxdone = 0; bool eop; - struct e1000_rx_desc *cur; + union e1000_rx_desc_extended *cur; EM_RX_LOCK(rxr); @@ -4596,21 +4625,20 @@ em_rxeof(struct rx_ring *rxr, int count, int *done) #endif /* DEV_NETMAP */ for (i = rxr->next_to_check, processed = 0; count != 0;) { - if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) break; cur = &rxr->rx_base[i]; - status = cur->status; + status = le32toh(cur->wb.upper.status_error); mp = sendmp = NULL; if ((status & E1000_RXD_STAT_DD) == 0) break; - len = le16toh(cur->length); + len = le16toh(cur->wb.upper.length); eop = (status & E1000_RXD_STAT_EOP) != 0; - if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) || + if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || (rxr->discard == TRUE)) { adapter->dropped_pkts++; ++rxr->rx_discarded; @@ -4647,7 +4675,7 @@ em_rxeof(struct rx_ring *rxr, int count, int *done) sendmp = rxr->fmp; if_setrcvif(sendmp, ifp); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); - em_receive_checksum(cur, sendmp); + em_receive_checksum(status, sendmp); #ifndef __NO_STRICT_ALIGNMENT if (adapter->hw.mac.max_frame_size > (MCLBYTES - ETHER_ALIGN) && @@ -4656,7 +4684,7 @@ em_rxeof(struct rx_ring *rxr, int count, int *done) #endif if (status & E1000_RXD_STAT_VP) { if_setvtag(sendmp, - le16toh(cur->special)); + le16toh(cur->wb.upper.vlan)); sendmp->m_flags |= M_VLANTAG; } #ifndef __NO_STRICT_ALIGNMENT @@ -4670,7 +4698,7 @@ em_rxeof(struct rx_ring *rxr, int count, int *done) BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Zero out the receive descriptors status. */ - cur->status = 0; + cur->wb.upper.status_error &= htole32(~0xFF); ++rxdone; /* cumulative for POLL */ ++processed; @@ -4709,7 +4737,7 @@ em_rxeof(struct rx_ring *rxr, int count, int *done) static __inline void em_rx_discard(struct rx_ring *rxr, int i) { - struct em_buffer *rbuf; + struct em_rxbuffer *rbuf; rbuf = &rxr->rx_buffers[i]; bus_dmamap_unload(rxr->rxtag, rbuf->map); @@ -4781,6 +4809,14 @@ em_fixup_rx(struct rx_ring *rxr) } #endif +static void +em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf) +{ + rxd->read.buffer_addr = htole64(rxbuf->paddr); + /* DD bits must be cleared */ + rxd->wb.upper.status_error= 0; +} + /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. @@ -4789,23 +4825,27 @@ em_fixup_rx(struct rx_ring *rxr) * *********************************************************************/ static void -em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp) +em_receive_checksum(uint32_t status, struct mbuf *mp) { mp->m_pkthdr.csum_flags = 0; /* Ignore Checksum bit is set */ - if (rx_desc->status & E1000_RXD_STAT_IXSM) + if (status & E1000_RXD_STAT_IXSM) return; - if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) - return; - - /* IP Checksum Good? */ - if (rx_desc->status & E1000_RXD_STAT_IPCS) + /* If the IP checksum exists and there is no IP Checksum error */ + if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == + E1000_RXD_STAT_IPCS) { mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); + } /* TCP or UDP checksum */ - if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { + if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == + E1000_RXD_STAT_TCPCS) { + mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + mp->m_pkthdr.csum_data = htons(0xffff); + } + if (status & E1000_RXD_STAT_UDPCS) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h index 8725de35e81d..be9fdc96e6d3 100644 --- a/sys/dev/e1000/if_em.h +++ b/sys/dev/e1000/if_em.h @@ -330,7 +330,7 @@ struct tx_ring { struct taskqueue *tq; u32 next_avail_desc; u32 next_to_clean; - struct em_buffer *tx_buffers; + struct em_txbuffer *tx_buffers; volatile u16 tx_avail; u32 tx_tso; /* last tx was tso */ u16 last_hw_offload; @@ -362,11 +362,11 @@ struct rx_ring { u32 payload; struct task rx_task; struct taskqueue *tq; - struct e1000_rx_desc *rx_base; + union e1000_rx_desc_extended *rx_base; struct em_dma_alloc rxdma; u32 next_to_refresh; u32 next_to_check; - struct em_buffer *rx_buffers; + struct em_rxbuffer *rx_buffers; struct mbuf *fmp; struct mbuf *lmp; @@ -499,12 +499,19 @@ typedef struct _em_vendor_info_t { unsigned int index; } em_vendor_info_t; -struct em_buffer { +struct em_txbuffer { int next_eop; /* Index of the desc to watch */ struct mbuf *m_head; bus_dmamap_t map; /* bus_dma map for packet */ }; +struct em_rxbuffer { + int next_eop; /* Index of the desc to watch */ + struct mbuf *m_head; + bus_dmamap_t map; /* bus_dma map for packet */ + bus_addr_t paddr; +}; + /* ** Find the number of unrefreshed RX descriptors @@ -541,4 +548,9 @@ e1000_rx_unrefreshed(struct rx_ring *rxr) #define EM_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) #define EM_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED) +#define EM_RSSRK_SIZE 4 +#define EM_RSSRK_VAL(key, i) (key[(i) * EM_RSSRK_SIZE] | \ + key[(i) * EM_RSSRK_SIZE + 1] << 8 | \ + key[(i) * EM_RSSRK_SIZE + 2] << 16 | \ + key[(i) * EM_RSSRK_SIZE + 3] << 24) #endif /* _EM_H_DEFINED_ */ diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index c7ac59399ad2..90d70d98a464 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -2947,12 +2947,7 @@ ixgbe_config_link(struct adapter *adapter) sfp = ixgbe_is_sfp(hw); if (sfp) { - if (hw->phy.multispeed_fiber) { - hw->mac.ops.setup_sfp(hw); - ixgbe_enable_tx_laser(hw); - taskqueue_enqueue(adapter->tq, &adapter->msf_task); - } else - taskqueue_enqueue(adapter->tq, &adapter->mod_task); + taskqueue_enqueue(adapter->tq, &adapter->mod_task); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, @@ -3758,23 +3753,66 @@ ixgbe_handle_mod(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; + enum ixgbe_phy_type orig_type = hw->phy.type; device_t dev = adapter->dev; u32 err; + IXGBE_CORE_LOCK(adapter); + + /* Check to see if the PHY type changed */ + if (hw->phy.ops.identify) { + hw->phy.type = ixgbe_phy_unknown; + hw->phy.ops.identify(hw); + } + + if (hw->phy.type != orig_type) { + device_printf(dev, "Detected phy_type %d\n", hw->phy.type); + + if (hw->phy.type == ixgbe_phy_none) { + hw->phy.sfp_type = ixgbe_sfp_type_unknown; + goto out; + } + + /* Try to do the initialization that was skipped before */ + if (hw->phy.ops.init) + hw->phy.ops.init(hw); + if (hw->phy.ops.reset) + hw->phy.ops.reset(hw); + } + err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); - return; + goto out; } err = hw->mac.ops.setup_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Setup failure - unsupported SFP+ module type.\n"); - return; + goto out; } - taskqueue_enqueue(adapter->tq, &adapter->msf_task); + if (hw->phy.multispeed_fiber) + taskqueue_enqueue(adapter->tq, &adapter->msf_task); +out: + /* Update media type */ + switch (hw->mac.ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + adapter->optics = IFM_10G_SR; + break; + case ixgbe_media_type_copper: + adapter->optics = IFM_10G_TWINAX; + break; + case ixgbe_media_type_cx4: + adapter->optics = IFM_10G_CX4; + break; + default: + adapter->optics = 0; + break; + } + + IXGBE_CORE_UNLOCK(adapter); return; } @@ -3790,6 +3828,7 @@ ixgbe_handle_msf(void *context, int pending) u32 autoneg; bool negotiate; + IXGBE_CORE_LOCK(adapter); /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); @@ -3802,6 +3841,7 @@ ixgbe_handle_msf(void *context, int pending) /* Adjust media types shown in ifconfig */ ifmedia_removeall(&adapter->media); ixgbe_add_media_types(adapter); + IXGBE_CORE_UNLOCK(adapter); return; } diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h index eae4f8c18ca0..28f2dd4bbc64 100644 --- a/sys/dev/netmap/if_em_netmap.h +++ b/sys/dev/netmap/if_em_netmap.h @@ -148,7 +148,7 @@ em_netmap_txsync(struct netmap_kring *kring, int flags) /* device-specific */ struct e1000_tx_desc *curr = &txr->tx_base[nic_i]; - struct em_buffer *txbuf = &txr->tx_buffers[nic_i]; + struct em_txbuffer *txbuf = &txr->tx_buffers[nic_i]; int flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? E1000_TXD_CMD_RS : 0; @@ -239,12 +239,12 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags) nm_i = netmap_idx_n2k(kring, nic_i); for (n = 0; ; n++) { // XXX no need to count - struct e1000_rx_desc *curr = &rxr->rx_base[nic_i]; - uint32_t staterr = le32toh(curr->status); + union e1000_rx_desc_extended *curr = &rxr->rx_base[nic_i]; + uint32_t staterr = le32toh(curr->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; - ring->slot[nm_i].len = le16toh(curr->length); + ring->slot[nm_i].len = le16toh(curr->wb.upper.length); ring->slot[nm_i].flags = slot_flags; bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map, BUS_DMASYNC_POSTREAD); @@ -271,19 +271,19 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags) uint64_t paddr; void *addr = PNMB(na, slot, &paddr); - struct e1000_rx_desc *curr = &rxr->rx_base[nic_i]; - struct em_buffer *rxbuf = &rxr->rx_buffers[nic_i]; + union e1000_rx_desc_extended *curr = &rxr->rx_base[nic_i]; + struct em_rxbuffer *rxbuf = &rxr->rx_buffers[nic_i]; if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ goto ring_reset; if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ - curr->buffer_addr = htole64(paddr); + curr->read.buffer_addr = htole64(paddr); netmap_reload_map(na, rxr->rxtag, rxbuf->map, addr); slot->flags &= ~NS_BUF_CHANGED; } - curr->status = 0; + curr->wb.upper.status_error = 0; bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); nm_i = nm_next(nm_i, lim); diff --git a/sys/dev/nvd/nvd.c b/sys/dev/nvd/nvd.c index f459e06b2ab4..24ee07583034 100644 --- a/sys/dev/nvd/nvd.c +++ b/sys/dev/nvd/nvd.c @@ -47,6 +47,8 @@ struct nvd_disk; static disk_ioctl_t nvd_ioctl; static disk_strategy_t nvd_strategy; +static void nvd_done(void *arg, const struct nvme_completion *cpl); + static void *nvd_new_disk(struct nvme_namespace *ns, void *ctrlr); static void destroy_geom_disk(struct nvd_disk *ndisk); @@ -71,6 +73,7 @@ struct nvd_disk { struct nvme_namespace *ns; uint32_t cur_depth; + uint32_t ordered_in_flight; TAILQ_ENTRY(nvd_disk) global_tailq; TAILQ_ENTRY(nvd_disk) ctrlr_tailq; @@ -148,6 +151,28 @@ nvd_unload() nvme_unregister_consumer(consumer_handle); } +static int +nvd_bio_submit(struct nvd_disk *ndisk, struct bio *bp) +{ + int err; + + bp->bio_driver1 = NULL; + atomic_add_int(&ndisk->cur_depth, 1); + err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done); + if (err) { + atomic_add_int(&ndisk->cur_depth, -1); + if (__predict_false(bp->bio_flags & BIO_ORDERED)) + atomic_add_int(&ndisk->ordered_in_flight, -1); + bp->bio_error = err; + bp->bio_flags |= BIO_ERROR; + bp->bio_resid = bp->bio_bcount; + biodone(bp); + return (-1); + } + + return (0); +} + static void nvd_strategy(struct bio *bp) { @@ -155,6 +180,18 @@ nvd_strategy(struct bio *bp) ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1; + if (__predict_false(bp->bio_flags & BIO_ORDERED)) + atomic_add_int(&ndisk->ordered_in_flight, 1); + + if (__predict_true(ndisk->ordered_in_flight == 0)) { + nvd_bio_submit(ndisk, bp); + return; + } + + /* + * There are ordered bios in flight, so we need to submit + * bios through the task queue to enforce ordering. + */ mtx_lock(&ndisk->bioqlock); bioq_insert_tail(&ndisk->bioq, bp); mtx_unlock(&ndisk->bioqlock); @@ -186,6 +223,8 @@ nvd_done(void *arg, const struct nvme_completion *cpl) ndisk = bp->bio_disk->d_drv1; atomic_add_int(&ndisk->cur_depth, -1); + if (__predict_false(bp->bio_flags & BIO_ORDERED)) + atomic_add_int(&ndisk->ordered_in_flight, -1); biodone(bp); } @@ -195,7 +234,6 @@ nvd_bioq_process(void *arg, int pending) { struct nvd_disk *ndisk = arg; struct bio *bp; - int err; for (;;) { mtx_lock(&ndisk->bioqlock); @@ -204,30 +242,8 @@ nvd_bioq_process(void *arg, int pending) if (bp == NULL) break; -#ifdef BIO_ORDERED - /* - * BIO_ORDERED flag dictates that all outstanding bios - * must be completed before processing the bio with - * BIO_ORDERED flag set. - */ - if (bp->bio_flags & BIO_ORDERED) { - while (ndisk->cur_depth > 0) { - pause("nvd flush", 1); - } - } -#endif - - bp->bio_driver1 = NULL; - atomic_add_int(&ndisk->cur_depth, 1); - - err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done); - - if (err) { - atomic_add_int(&ndisk->cur_depth, -1); - bp->bio_error = err; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - biodone(bp); + if (nvd_bio_submit(ndisk, bp) != 0) { + continue; } #ifdef BIO_ORDERED @@ -287,7 +303,7 @@ nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg) disk->d_unit = TAILQ_LAST(&disk_head, disk_list)->disk->d_unit + 1; - disk->d_flags = 0; + disk->d_flags = DISKFLAG_DIRECT_COMPLETION; if (nvme_ns_get_flags(ns) & NVME_NS_DEALLOCATE_SUPPORTED) disk->d_flags |= DISKFLAG_CANDELETE; @@ -317,6 +333,7 @@ nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg) ndisk->ns = ns; ndisk->disk = disk; ndisk->cur_depth = 0; + ndisk->ordered_in_flight = 0; mtx_init(&ndisk->bioqlock, "NVD bioq lock", NULL, MTX_DEF); bioq_init(&ndisk->bioq); diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c index cc14d34afbc4..9db2b14d13d0 100644 --- a/sys/dev/nvme/nvme.c +++ b/sys/dev/nvme/nvme.c @@ -270,8 +270,6 @@ nvme_attach(device_t dev) return (status); } - nvme_sysctl_initialize_ctrlr(ctrlr); - pci_enable_busmaster(dev); ctrlr->config_hook.ich_func = nvme_ctrlr_start_config_hook; diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index ee4b901a9c75..151f02542626 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -42,8 +42,15 @@ __FBSDID("$FreeBSD$"); #include "nvme_private.h" +/* + * Used for calculating number of CPUs to assign to each core and number of I/O + * queues to allocate per controller. + */ +#define NVME_CEILING(num, div) ((((num) - 1) / (div)) + 1) + static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr, struct nvme_async_event_request *aer); +static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr); static int nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) @@ -140,6 +147,13 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) */ num_trackers = min(num_trackers, (num_entries-1)); + /* + * This was calculated previously when setting up interrupts, but + * a controller could theoretically support fewer I/O queues than + * MSI-X vectors. So calculate again here just to be safe. + */ + ctrlr->num_cpus_per_ioq = NVME_CEILING(mp_ncpus, ctrlr->num_io_queues); + ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair), M_NVME, M_ZERO | M_WAITOK); @@ -160,8 +174,13 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) num_trackers, ctrlr); - if (ctrlr->per_cpu_io_queues) - bus_bind_intr(ctrlr->dev, qpair->res, i); + /* + * Do not bother binding interrupts if we only have one I/O + * interrupt thread for this controller. + */ + if (ctrlr->num_io_queues > 1) + bus_bind_intr(ctrlr->dev, qpair->res, + i * ctrlr->num_cpus_per_ioq); } return (0); @@ -306,8 +325,15 @@ nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) int i; nvme_admin_qpair_disable(&ctrlr->adminq); - for (i = 0; i < ctrlr->num_io_queues; i++) - nvme_io_qpair_disable(&ctrlr->ioq[i]); + /* + * I/O queues are not allocated before the initial HW + * reset, so do not try to disable them. Use is_initialized + * to determine if this is the initial HW reset. + */ + if (ctrlr->is_initialized) { + for (i = 0; i < ctrlr->num_io_queues; i++) + nvme_io_qpair_disable(&ctrlr->ioq[i]); + } DELAY(100*1000); @@ -363,7 +389,7 @@ static int nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr) { struct nvme_completion_poll_status status; - int cq_allocated, i, sq_allocated; + int cq_allocated, sq_allocated; status.done = FALSE; nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues, @@ -384,26 +410,12 @@ nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr) cq_allocated = (status.cpl.cdw0 >> 16) + 1; /* - * Check that the controller was able to allocate the number of - * queues we requested. If not, revert to one IO queue pair. + * Controller may allocate more queues than we requested, + * so use the minimum of the number requested and what was + * actually allocated. */ - if (sq_allocated < ctrlr->num_io_queues || - cq_allocated < ctrlr->num_io_queues) { - - /* - * Destroy extra IO queue pairs that were created at - * controller construction time but are no longer - * needed. This will only happen when a controller - * supports fewer queues than MSI-X vectors. This - * is not the normal case, but does occur with the - * Chatham prototype board. - */ - for (i = 1; i < ctrlr->num_io_queues; i++) - nvme_io_qpair_destroy(&ctrlr->ioq[i]); - - ctrlr->num_io_queues = 1; - ctrlr->per_cpu_io_queues = 0; - } + ctrlr->num_io_queues = min(ctrlr->num_io_queues, sq_allocated); + ctrlr->num_io_queues = min(ctrlr->num_io_queues, cq_allocated); return (0); } @@ -687,9 +699,20 @@ static void nvme_ctrlr_start(void *ctrlr_arg) { struct nvme_controller *ctrlr = ctrlr_arg; + uint32_t old_num_io_queues; int i; - nvme_qpair_reset(&ctrlr->adminq); + /* + * Only reset adminq here when we are restarting the + * controller after a reset. During initialization, + * we have already submitted admin commands to get + * the number of I/O queues supported, so cannot reset + * the adminq again here. + */ + if (ctrlr->is_resetting) { + nvme_qpair_reset(&ctrlr->adminq); + } + for (i = 0; i < ctrlr->num_io_queues; i++) nvme_qpair_reset(&ctrlr->ioq[i]); @@ -700,11 +723,25 @@ nvme_ctrlr_start(void *ctrlr_arg) return; } + /* + * The number of qpairs are determined during controller initialization, + * including using NVMe SET_FEATURES/NUMBER_OF_QUEUES to determine the + * HW limit. We call SET_FEATURES again here so that it gets called + * after any reset for controllers that depend on the driver to + * explicit specify how many queues it will use. This value should + * never change between resets, so panic if somehow that does happen. + */ + old_num_io_queues = ctrlr->num_io_queues; if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) { nvme_ctrlr_fail(ctrlr); return; } + if (old_num_io_queues != ctrlr->num_io_queues) { + panic("num_io_queues changed from %u to %u", old_num_io_queues, + ctrlr->num_io_queues); + } + if (nvme_ctrlr_create_qpairs(ctrlr) != 0) { nvme_ctrlr_fail(ctrlr); return; @@ -727,7 +764,16 @@ nvme_ctrlr_start_config_hook(void *arg) { struct nvme_controller *ctrlr = arg; - nvme_ctrlr_start(ctrlr); + nvme_qpair_reset(&ctrlr->adminq); + nvme_admin_qpair_enable(&ctrlr->adminq); + + if (nvme_ctrlr_set_num_qpairs(ctrlr) == 0 && + nvme_ctrlr_construct_io_qpairs(ctrlr) == 0) + nvme_ctrlr_start(ctrlr); + else + nvme_ctrlr_fail(ctrlr); + + nvme_sysctl_initialize_ctrlr(ctrlr); config_intrhook_disestablish(&ctrlr->config_hook); ctrlr->is_initialized = 1; @@ -778,8 +824,9 @@ static int nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr) { + ctrlr->msix_enabled = 0; ctrlr->num_io_queues = 1; - ctrlr->per_cpu_io_queues = 0; + ctrlr->num_cpus_per_ioq = mp_ncpus; ctrlr->rid = 0; ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE); @@ -926,13 +973,77 @@ static struct cdevsw nvme_ctrlr_cdevsw = { .d_ioctl = nvme_ctrlr_ioctl }; +static void +nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr) +{ + device_t dev; + int per_cpu_io_queues; + int num_vectors_requested, num_vectors_allocated; + int num_vectors_available; + + dev = ctrlr->dev; + per_cpu_io_queues = 1; + TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues); + + ctrlr->force_intx = 0; + TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx); + + /* + * FreeBSD currently cannot allocate more than about 190 vectors at + * boot, meaning that systems with high core count and many devices + * requesting per-CPU interrupt vectors will not get their full + * allotment. So first, try to allocate as many as we may need to + * understand what is available, then immediately release them. + * Then figure out how many of those we will actually use, based on + * assigning an equal number of cores to each I/O queue. + */ + + /* One vector for per core I/O queue, plus one vector for admin queue. */ + num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1); + if (pci_alloc_msix(dev, &num_vectors_available) != 0) { + num_vectors_available = 0; + } + pci_release_msi(dev); + + if (ctrlr->force_intx || num_vectors_available < 2) { + nvme_ctrlr_configure_intx(ctrlr); + return; + } + + if (per_cpu_io_queues) + ctrlr->num_cpus_per_ioq = NVME_CEILING(mp_ncpus, num_vectors_available + 1); + else + ctrlr->num_cpus_per_ioq = mp_ncpus; + + ctrlr->num_io_queues = NVME_CEILING(mp_ncpus, ctrlr->num_cpus_per_ioq); + num_vectors_requested = ctrlr->num_io_queues + 1; + num_vectors_allocated = num_vectors_requested; + + /* + * Now just allocate the number of vectors we need. This should + * succeed, since we previously called pci_alloc_msix() + * successfully returning at least this many vectors, but just to + * be safe, if something goes wrong just revert to INTx. + */ + if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) { + nvme_ctrlr_configure_intx(ctrlr); + return; + } + + if (num_vectors_allocated < num_vectors_requested) { + pci_release_msi(dev); + nvme_ctrlr_configure_intx(ctrlr); + return; + } + + ctrlr->msix_enabled = 1; +} + int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) { union cap_lo_register cap_lo; union cap_hi_register cap_hi; - int i, per_cpu_io_queues, rid; - int num_vectors_requested, num_vectors_allocated; int status, timeout_period; ctrlr->dev = dev; @@ -967,116 +1078,13 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) nvme_retry_count = NVME_DEFAULT_RETRY_COUNT; TUNABLE_INT_FETCH("hw.nvme.retry_count", &nvme_retry_count); - per_cpu_io_queues = 1; - TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues); - ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE; - - if (ctrlr->per_cpu_io_queues) - ctrlr->num_io_queues = mp_ncpus; - else - ctrlr->num_io_queues = 1; - - ctrlr->force_intx = 0; - TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx); - ctrlr->enable_aborts = 0; TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts); - ctrlr->msix_enabled = 1; - - if (ctrlr->force_intx) { - ctrlr->msix_enabled = 0; - goto intx; - } - - /* One vector per IO queue, plus one vector for admin queue. */ - num_vectors_requested = ctrlr->num_io_queues + 1; - - /* - * If we cannot even allocate 2 vectors (one for admin, one for - * I/O), then revert to INTx. - */ - if (pci_msix_count(dev) < 2) { - ctrlr->msix_enabled = 0; - goto intx; - } else if (pci_msix_count(dev) < num_vectors_requested) { - ctrlr->per_cpu_io_queues = FALSE; - ctrlr->num_io_queues = 1; - num_vectors_requested = 2; /* one for admin, one for I/O */ - } - - num_vectors_allocated = num_vectors_requested; - if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) { - ctrlr->msix_enabled = 0; - goto intx; - } else if (num_vectors_allocated < num_vectors_requested) { - if (num_vectors_allocated < 2) { - pci_release_msi(dev); - ctrlr->msix_enabled = 0; - goto intx; - } else { - ctrlr->per_cpu_io_queues = FALSE; - ctrlr->num_io_queues = 1; - /* - * Release whatever vectors were allocated, and just - * reallocate the two needed for the admin and single - * I/O qpair. - */ - num_vectors_allocated = 2; - pci_release_msi(dev); - if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) - panic("could not reallocate any vectors\n"); - if (num_vectors_allocated != 2) - panic("could not reallocate 2 vectors\n"); - } - } - - /* - * On earlier FreeBSD releases, there are reports that - * pci_alloc_msix() can return successfully with all vectors - * requested, but a subsequent bus_alloc_resource_any() - * for one of those vectors fails. This issue occurs more - * readily with multiple devices using per-CPU vectors. - * To workaround this issue, try to allocate the resources now, - * and fall back to INTx if we cannot allocate all of them. - * This issue cannot be reproduced on more recent versions of - * FreeBSD which have increased the maximum number of MSI-X - * vectors, but adding the workaround makes it easier for - * vendors wishing to import this driver into kernels based on - * older versions of FreeBSD. - */ - for (i = 0; i < num_vectors_allocated; i++) { - rid = i + 1; - ctrlr->msi_res[i] = bus_alloc_resource_any(ctrlr->dev, - SYS_RES_IRQ, &rid, RF_ACTIVE); - - if (ctrlr->msi_res[i] == NULL) { - ctrlr->msix_enabled = 0; - while (i > 0) { - i--; - bus_release_resource(ctrlr->dev, - SYS_RES_IRQ, - rman_get_rid(ctrlr->msi_res[i]), - ctrlr->msi_res[i]); - } - pci_release_msi(dev); - nvme_printf(ctrlr, "could not obtain all MSI-X " - "resources, reverting to intx\n"); - break; - } - } - -intx: - - if (!ctrlr->msix_enabled) - nvme_ctrlr_configure_intx(ctrlr); + nvme_ctrlr_setup_interrupts(ctrlr); ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; nvme_ctrlr_construct_admin_qpair(ctrlr); - status = nvme_ctrlr_construct_io_qpairs(ctrlr); - - if (status != 0) - return (status); ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "nvme%d", device_get_unit(dev)); @@ -1188,11 +1196,7 @@ nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr, { struct nvme_qpair *qpair; - if (ctrlr->per_cpu_io_queues) - qpair = &ctrlr->ioq[curcpu]; - else - qpair = &ctrlr->ioq[0]; - + qpair = &ctrlr->ioq[curcpu / ctrlr->num_cpus_per_ioq]; nvme_qpair_submit_request(qpair, req); } diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 6137b41e6425..33307117b5be 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -265,7 +265,7 @@ struct nvme_controller { uint32_t enable_aborts; uint32_t num_io_queues; - boolean_t per_cpu_io_queues; + uint32_t num_cpus_per_ioq; /* Fields for tracking progress during controller initialization. */ struct intr_config_hook config_hook; @@ -276,8 +276,6 @@ struct nvme_controller { struct task fail_req_task; struct taskqueue *taskqueue; - struct resource *msi_res[MAXCPU + 1]; - /* For shared legacy interrupt. */ int rid; struct resource *res; diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index d0cb8c6212c7..92fe6722e4f4 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -479,8 +479,9 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, * the queue's vector to get the corresponding rid to use. */ qpair->rid = vector + 1; - qpair->res = ctrlr->msi_res[vector]; + qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, + &qpair->rid, RF_ACTIVE); bus_setup_intr(ctrlr->dev, qpair->res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_qpair_msix_handler, qpair, &qpair->tag); diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index 57adc40addc8..a2b4e6517ee4 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -571,40 +571,6 @@ g_eli_worker(void *arg) } } -/* - * Here we generate IV. It is unique for every sector. - */ -void -g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv, - size_t size) -{ - uint8_t off[8]; - - if ((sc->sc_flags & G_ELI_FLAG_NATIVE_BYTE_ORDER) != 0) - bcopy(&offset, off, sizeof(off)); - else - le64enc(off, (uint64_t)offset); - - switch (sc->sc_ealgo) { - case CRYPTO_AES_XTS: - bcopy(off, iv, sizeof(off)); - bzero(iv + sizeof(off), size - sizeof(off)); - break; - default: - { - u_char hash[SHA256_DIGEST_LENGTH]; - SHA256_CTX ctx; - - /* Copy precalculated SHA256 context for IV-Key. */ - bcopy(&sc->sc_ivctx, &ctx, sizeof(ctx)); - SHA256_Update(&ctx, off, sizeof(off)); - SHA256_Final(hash, &ctx); - bcopy(hash, iv, MIN(sizeof(hash), size)); - break; - } - } -} - int g_eli_read_metadata(struct g_class *mp, struct g_provider *pp, struct g_eli_metadata *md) @@ -751,44 +717,9 @@ g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp, else gp->access = g_std_access; - sc->sc_version = md->md_version; - sc->sc_inflight = 0; - sc->sc_crypto = G_ELI_CRYPTO_UNKNOWN; - sc->sc_flags = md->md_flags; - /* Backward compatibility. */ - if (md->md_version < G_ELI_VERSION_04) - sc->sc_flags |= G_ELI_FLAG_NATIVE_BYTE_ORDER; - if (md->md_version < G_ELI_VERSION_05) - sc->sc_flags |= G_ELI_FLAG_SINGLE_KEY; - if (md->md_version < G_ELI_VERSION_06 && - (sc->sc_flags & G_ELI_FLAG_AUTH) != 0) { - sc->sc_flags |= G_ELI_FLAG_FIRST_KEY; - } - if (md->md_version < G_ELI_VERSION_07) - sc->sc_flags |= G_ELI_FLAG_ENC_IVKEY; - sc->sc_ealgo = md->md_ealgo; + eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize); sc->sc_nkey = nkey; - if (sc->sc_flags & G_ELI_FLAG_AUTH) { - sc->sc_akeylen = sizeof(sc->sc_akey) * 8; - sc->sc_aalgo = md->md_aalgo; - sc->sc_alen = g_eli_hashlen(sc->sc_aalgo); - - sc->sc_data_per_sector = bpp->sectorsize - sc->sc_alen; - /* - * Some hash functions (like SHA1 and RIPEMD160) generates hash - * which length is not multiple of 128 bits, but we want data - * length to be multiple of 128, so we can encrypt without - * padding. The line below rounds down data length to multiple - * of 128 bits. - */ - sc->sc_data_per_sector -= sc->sc_data_per_sector % 16; - - sc->sc_bytes_per_sector = - (md->md_sectorsize - 1) / sc->sc_data_per_sector + 1; - sc->sc_bytes_per_sector *= bpp->sectorsize; - } - gp->softc = sc; sc->sc_geom = gp; @@ -831,22 +762,10 @@ g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp, goto failed; } - sc->sc_sectorsize = md->md_sectorsize; - sc->sc_mediasize = bpp->mediasize; - if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) - sc->sc_mediasize -= bpp->sectorsize; - if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) - sc->sc_mediasize -= (sc->sc_mediasize % sc->sc_sectorsize); - else { - sc->sc_mediasize /= sc->sc_bytes_per_sector; - sc->sc_mediasize *= sc->sc_sectorsize; - } - /* * Remember the keys in our softc structure. */ g_eli_mkey_propagate(sc, mkey); - sc->sc_ekeylen = md->md_keylen; LIST_INIT(&sc->sc_workers); diff --git a/sys/geom/eli/g_eli.h b/sys/geom/eli/g_eli.h index e4dbee6819ab..3deb865c4af7 100644 --- a/sys/geom/eli/g_eli.h +++ b/sys/geom/eli/g_eli.h @@ -40,8 +40,6 @@ #include #include #include -#include -#include #include #else #include @@ -49,6 +47,8 @@ #include #include #endif +#include +#include #ifndef _OpenSSL_ #include #endif @@ -132,15 +132,15 @@ /* Switch data encryption key every 2^20 blocks. */ #define G_ELI_KEY_SHIFT 20 +#define G_ELI_CRYPTO_UNKNOWN 0 +#define G_ELI_CRYPTO_HW 1 +#define G_ELI_CRYPTO_SW 2 + #ifdef _KERNEL extern int g_eli_debug; extern u_int g_eli_overwrites; extern u_int g_eli_batch; -#define G_ELI_CRYPTO_UNKNOWN 0 -#define G_ELI_CRYPTO_HW 1 -#define G_ELI_CRYPTO_SW 2 - #define G_ELI_DEBUG(lvl, ...) do { \ if (g_eli_debug >= (lvl)) { \ printf("GEOM_ELI"); \ @@ -173,6 +173,8 @@ struct g_eli_worker { LIST_ENTRY(g_eli_worker) w_next; }; +#endif /* _KERNEL */ + struct g_eli_softc { struct g_geom *sc_geom; u_int sc_version; @@ -200,15 +202,35 @@ struct g_eli_softc { size_t sc_sectorsize; u_int sc_bytes_per_sector; u_int sc_data_per_sector; +#ifndef _KERNEL + int sc_cpubind; +#else /* _KERNEL */ boolean_t sc_cpubind; /* Only for software cryptography. */ struct bio_queue_head sc_queue; struct mtx sc_queue_mtx; LIST_HEAD(, g_eli_worker) sc_workers; +#endif /* _KERNEL */ }; #define sc_name sc_geom->name -#endif /* _KERNEL */ + +#define G_ELI_KEY_MAGIC 0xe11341c + +struct g_eli_key { + /* Key value, must be first in the structure. */ + uint8_t gek_key[G_ELI_DATAKEYLEN]; + /* Magic. */ + int gek_magic; + /* Key number. */ + uint64_t gek_keyno; + /* Reference counter. */ + int gek_count; + /* Keeps keys sorted by most recent use. */ + TAILQ_ENTRY(g_eli_key) gek_next; + /* Keeps keys sorted by number. */ + RB_ENTRY(g_eli_key) gek_link; +}; struct g_eli_metadata { char md_magic[16]; /* Magic value. */ @@ -569,6 +591,60 @@ g_eli_hashlen(u_int algo) return (0); } +static __inline void +eli_metadata_softc(struct g_eli_softc *sc, const struct g_eli_metadata *md, + u_int sectorsize, off_t mediasize) +{ + + sc->sc_version = md->md_version; + sc->sc_inflight = 0; + sc->sc_crypto = G_ELI_CRYPTO_UNKNOWN; + sc->sc_flags = md->md_flags; + /* Backward compatibility. */ + if (md->md_version < G_ELI_VERSION_04) + sc->sc_flags |= G_ELI_FLAG_NATIVE_BYTE_ORDER; + if (md->md_version < G_ELI_VERSION_05) + sc->sc_flags |= G_ELI_FLAG_SINGLE_KEY; + if (md->md_version < G_ELI_VERSION_06 && + (sc->sc_flags & G_ELI_FLAG_AUTH) != 0) { + sc->sc_flags |= G_ELI_FLAG_FIRST_KEY; + } + if (md->md_version < G_ELI_VERSION_07) + sc->sc_flags |= G_ELI_FLAG_ENC_IVKEY; + sc->sc_ealgo = md->md_ealgo; + + if (sc->sc_flags & G_ELI_FLAG_AUTH) { + sc->sc_akeylen = sizeof(sc->sc_akey) * 8; + sc->sc_aalgo = md->md_aalgo; + sc->sc_alen = g_eli_hashlen(sc->sc_aalgo); + + sc->sc_data_per_sector = sectorsize - sc->sc_alen; + /* + * Some hash functions (like SHA1 and RIPEMD160) generates hash + * which length is not multiple of 128 bits, but we want data + * length to be multiple of 128, so we can encrypt without + * padding. The line below rounds down data length to multiple + * of 128 bits. + */ + sc->sc_data_per_sector -= sc->sc_data_per_sector % 16; + + sc->sc_bytes_per_sector = + (md->md_sectorsize - 1) / sc->sc_data_per_sector + 1; + sc->sc_bytes_per_sector *= sectorsize; + } + sc->sc_sectorsize = md->md_sectorsize; + sc->sc_mediasize = mediasize; + if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) + sc->sc_mediasize -= sectorsize; + if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) + sc->sc_mediasize -= (sc->sc_mediasize % sc->sc_sectorsize); + else { + sc->sc_mediasize /= sc->sc_bytes_per_sector; + sc->sc_mediasize *= sc->sc_sectorsize; + } + sc->sc_ekeylen = md->md_keylen; +} + #ifdef _KERNEL int g_eli_read_metadata(struct g_class *mp, struct g_provider *pp, struct g_eli_metadata *md); @@ -583,8 +659,6 @@ void g_eli_config(struct gctl_req *req, struct g_class *mp, const char *verb); void g_eli_read_done(struct bio *bp); void g_eli_write_done(struct bio *bp); int g_eli_crypto_rerun(struct cryptop *crp); -void g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv, - size_t size); void g_eli_crypto_read(struct g_eli_softc *sc, struct bio *bp, boolean_t fromworker); void g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp); @@ -592,6 +666,8 @@ void g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp); void g_eli_auth_read(struct g_eli_softc *sc, struct bio *bp); void g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp); #endif +void g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv, + size_t size); void g_eli_mkey_hmac(unsigned char *mkey, const unsigned char *key); int g_eli_mkey_decrypt(const struct g_eli_metadata *md, @@ -620,6 +696,8 @@ void g_eli_crypto_hmac_final(struct hmac_ctx *ctx, uint8_t *md, size_t mdsize); void g_eli_crypto_hmac(const uint8_t *hkey, size_t hkeysize, const uint8_t *data, size_t datasize, uint8_t *md, size_t mdsize); +void g_eli_key_fill(struct g_eli_softc *sc, struct g_eli_key *key, + uint64_t keyno); #ifdef _KERNEL void g_eli_key_init(struct g_eli_softc *sc); void g_eli_key_destroy(struct g_eli_softc *sc); diff --git a/sys/geom/eli/g_eli_crypto.c b/sys/geom/eli/g_eli_crypto.c index 43eabf49e3db..2d145fd80c5a 100644 --- a/sys/geom/eli/g_eli_crypto.c +++ b/sys/geom/eli/g_eli_crypto.c @@ -221,75 +221,3 @@ g_eli_crypto_decrypt(u_int algo, u_char *data, size_t datasize, return (g_eli_crypto_cipher(algo, 0, data, datasize, key, keysize)); } - -void -g_eli_crypto_hmac_init(struct hmac_ctx *ctx, const uint8_t *hkey, - size_t hkeylen) -{ - u_char k_ipad[128], key[128]; - SHA512_CTX lctx; - u_int i; - - bzero(key, sizeof(key)); - if (hkeylen == 0) - ; /* do nothing */ - else if (hkeylen <= 128) - bcopy(hkey, key, hkeylen); - else { - /* If key is longer than 128 bytes reset it to key = SHA512(key). */ - SHA512_Init(&lctx); - SHA512_Update(&lctx, hkey, hkeylen); - SHA512_Final(key, &lctx); - } - - /* XOR key with ipad and opad values. */ - for (i = 0; i < sizeof(key); i++) { - k_ipad[i] = key[i] ^ 0x36; - ctx->k_opad[i] = key[i] ^ 0x5c; - } - bzero(key, sizeof(key)); - /* Perform inner SHA512. */ - SHA512_Init(&ctx->shactx); - SHA512_Update(&ctx->shactx, k_ipad, sizeof(k_ipad)); - bzero(k_ipad, sizeof(k_ipad)); -} - -void -g_eli_crypto_hmac_update(struct hmac_ctx *ctx, const uint8_t *data, - size_t datasize) -{ - - SHA512_Update(&ctx->shactx, data, datasize); -} - -void -g_eli_crypto_hmac_final(struct hmac_ctx *ctx, uint8_t *md, size_t mdsize) -{ - u_char digest[SHA512_MDLEN]; - SHA512_CTX lctx; - - SHA512_Final(digest, &ctx->shactx); - /* Perform outer SHA512. */ - SHA512_Init(&lctx); - SHA512_Update(&lctx, ctx->k_opad, sizeof(ctx->k_opad)); - bzero(ctx, sizeof(*ctx)); - SHA512_Update(&lctx, digest, sizeof(digest)); - SHA512_Final(digest, &lctx); - bzero(&lctx, sizeof(lctx)); - /* mdsize == 0 means "Give me the whole hash!" */ - if (mdsize == 0) - mdsize = SHA512_MDLEN; - bcopy(digest, md, mdsize); - bzero(digest, sizeof(digest)); -} - -void -g_eli_crypto_hmac(const uint8_t *hkey, size_t hkeysize, const uint8_t *data, - size_t datasize, uint8_t *md, size_t mdsize) -{ - struct hmac_ctx ctx; - - g_eli_crypto_hmac_init(&ctx, hkey, hkeysize); - g_eli_crypto_hmac_update(&ctx, data, datasize); - g_eli_crypto_hmac_final(&ctx, md, mdsize); -} diff --git a/sys/geom/eli/g_eli_hmac.c b/sys/geom/eli/g_eli_hmac.c new file mode 100644 index 000000000000..36b76deb9fda --- /dev/null +++ b/sys/geom/eli/g_eli_hmac.c @@ -0,0 +1,150 @@ +/*- + * Copyright (c) 2005-2010 Pawel Jakub Dawidek + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#ifdef _KERNEL +#include +#include +#include +#else +#include +#include +#include +#include +#include +#include +#define _OpenSSL_ +#endif +#include + +void +g_eli_crypto_hmac_init(struct hmac_ctx *ctx, const uint8_t *hkey, + size_t hkeylen) +{ + u_char k_ipad[128], key[128]; + SHA512_CTX lctx; + u_int i; + + bzero(key, sizeof(key)); + if (hkeylen == 0) + ; /* do nothing */ + else if (hkeylen <= 128) + bcopy(hkey, key, hkeylen); + else { + /* If key is longer than 128 bytes reset it to key = SHA512(key). */ + SHA512_Init(&lctx); + SHA512_Update(&lctx, hkey, hkeylen); + SHA512_Final(key, &lctx); + } + + /* XOR key with ipad and opad values. */ + for (i = 0; i < sizeof(key); i++) { + k_ipad[i] = key[i] ^ 0x36; + ctx->k_opad[i] = key[i] ^ 0x5c; + } + bzero(key, sizeof(key)); + /* Perform inner SHA512. */ + SHA512_Init(&ctx->shactx); + SHA512_Update(&ctx->shactx, k_ipad, sizeof(k_ipad)); + bzero(k_ipad, sizeof(k_ipad)); +} + +void +g_eli_crypto_hmac_update(struct hmac_ctx *ctx, const uint8_t *data, + size_t datasize) +{ + + SHA512_Update(&ctx->shactx, data, datasize); +} + +void +g_eli_crypto_hmac_final(struct hmac_ctx *ctx, uint8_t *md, size_t mdsize) +{ + u_char digest[SHA512_MDLEN]; + SHA512_CTX lctx; + + SHA512_Final(digest, &ctx->shactx); + /* Perform outer SHA512. */ + SHA512_Init(&lctx); + SHA512_Update(&lctx, ctx->k_opad, sizeof(ctx->k_opad)); + bzero(ctx, sizeof(*ctx)); + SHA512_Update(&lctx, digest, sizeof(digest)); + SHA512_Final(digest, &lctx); + bzero(&lctx, sizeof(lctx)); + /* mdsize == 0 means "Give me the whole hash!" */ + if (mdsize == 0) + mdsize = SHA512_MDLEN; + bcopy(digest, md, mdsize); + bzero(digest, sizeof(digest)); +} + +void +g_eli_crypto_hmac(const uint8_t *hkey, size_t hkeysize, const uint8_t *data, + size_t datasize, uint8_t *md, size_t mdsize) +{ + struct hmac_ctx ctx; + + g_eli_crypto_hmac_init(&ctx, hkey, hkeysize); + g_eli_crypto_hmac_update(&ctx, data, datasize); + g_eli_crypto_hmac_final(&ctx, md, mdsize); +} + +/* + * Here we generate IV. It is unique for every sector. + */ +void +g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv, + size_t size) +{ + uint8_t off[8]; + + if ((sc->sc_flags & G_ELI_FLAG_NATIVE_BYTE_ORDER) != 0) + bcopy(&offset, off, sizeof(off)); + else + le64enc(off, (uint64_t)offset); + + switch (sc->sc_ealgo) { + case CRYPTO_AES_XTS: + bcopy(off, iv, sizeof(off)); + bzero(iv + sizeof(off), size - sizeof(off)); + break; + default: + { + u_char hash[SHA256_DIGEST_LENGTH]; + SHA256_CTX ctx; + + /* Copy precalculated SHA256 context for IV-Key. */ + bcopy(&sc->sc_ivctx, &ctx, sizeof(ctx)); + SHA256_Update(&ctx, off, sizeof(off)); + SHA256_Final(hash, &ctx); + bcopy(hash, iv, MIN(sizeof(hash), size)); + break; + } + } +} diff --git a/sys/geom/eli/g_eli_key_cache.c b/sys/geom/eli/g_eli_key_cache.c index cb867166cee3..0b909bef55ee 100644 --- a/sys/geom/eli/g_eli_key_cache.c +++ b/sys/geom/eli/g_eli_key_cache.c @@ -28,17 +28,20 @@ __FBSDID("$FreeBSD$"); #include +#ifdef _KERNEL #include #include -#include #include #include +#endif /* _KERNEL */ +#include #include #include #include +#ifdef _KERNEL MALLOC_DECLARE(M_ELI); SYSCTL_DECL(_kern_geom_eli); @@ -56,22 +59,7 @@ static uint64_t g_eli_key_cache_misses; SYSCTL_UQUAD(_kern_geom_eli, OID_AUTO, key_cache_misses, CTLFLAG_RW, &g_eli_key_cache_misses, 0, "Key cache misses"); -#define G_ELI_KEY_MAGIC 0xe11341c - -struct g_eli_key { - /* Key value, must be first in the structure. */ - uint8_t gek_key[G_ELI_DATAKEYLEN]; - /* Magic. */ - int gek_magic; - /* Key number. */ - uint64_t gek_keyno; - /* Reference counter. */ - int gek_count; - /* Keeps keys sorted by most recent use. */ - TAILQ_ENTRY(g_eli_key) gek_next; - /* Keeps keys sorted by number. */ - RB_ENTRY(g_eli_key) gek_link; -}; +#endif /* _KERNEL */ static int g_eli_key_cmp(const struct g_eli_key *a, const struct g_eli_key *b) @@ -84,10 +72,7 @@ g_eli_key_cmp(const struct g_eli_key *a, const struct g_eli_key *b) return (0); } -RB_PROTOTYPE(g_eli_key_tree, g_eli_key, gek_link, g_eli_key_cmp); -RB_GENERATE(g_eli_key_tree, g_eli_key, gek_link, g_eli_key_cmp); - -static void +void g_eli_key_fill(struct g_eli_softc *sc, struct g_eli_key *key, uint64_t keyno) { const uint8_t *ekey; @@ -110,6 +95,10 @@ g_eli_key_fill(struct g_eli_softc *sc, struct g_eli_key *key, uint64_t keyno) key->gek_magic = G_ELI_KEY_MAGIC; } +#ifdef _KERNEL +RB_PROTOTYPE(g_eli_key_tree, g_eli_key, gek_link, g_eli_key_cmp); +RB_GENERATE(g_eli_key_tree, g_eli_key, gek_link, g_eli_key_cmp); + static struct g_eli_key * g_eli_key_allocate(struct g_eli_softc *sc, uint64_t keyno) { @@ -350,3 +339,4 @@ g_eli_key_drop(struct g_eli_softc *sc, uint8_t *rawkey) } mtx_unlock(&sc->sc_ekeys_lock); } +#endif /* _KERNEL */ diff --git a/sys/geom/eli/pkcs5v2.c b/sys/geom/eli/pkcs5v2.c index 05677c1b76f1..6992801958ce 100644 --- a/sys/geom/eli/pkcs5v2.c +++ b/sys/geom/eli/pkcs5v2.c @@ -83,6 +83,7 @@ pkcs5v2_genkey(uint8_t *key, unsigned keylen, const uint8_t *salt, } #ifndef _KERNEL +#ifndef _STAND /* * Return the number of microseconds needed for 'interations' iterations. */ @@ -120,4 +121,5 @@ pkcs5v2_calculate(int usecs) } return (((intmax_t)iterations * (intmax_t)usecs) / v); } +#endif /* !_STAND */ #endif /* !_KERNEL */ diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index ff3736d2dffa..f1bd82122372 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -288,7 +288,7 @@ static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss, "Number of cache misses"); static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap, "Number of cache misses we do not want to cache"); -static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps, +static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps, "Number of cache hits (positive) we do not want to cache"); static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits, "Number of cache hits (positive)"); @@ -303,8 +303,6 @@ SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE, &nchstats, sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); - - static void cache_zap(struct namecache *ncp); static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen); @@ -410,8 +408,7 @@ SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD| * pointer to a vnode or if it is just a negative cache entry. */ static void -cache_zap(ncp) - struct namecache *ncp; +cache_zap(struct namecache *ncp) { struct vnode *vp; @@ -446,7 +443,7 @@ cache_zap(ncp) } numcache--; cache_free(ncp); - if (vp) + if (vp != NULL) vdrop(vp); } @@ -468,12 +465,8 @@ cache_zap(ncp) */ int -cache_lookup(dvp, vpp, cnp, tsp, ticksp) - struct vnode *dvp; - struct vnode **vpp; - struct componentname *cnp; - struct timespec *tsp; - int *ticksp; +cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, + struct timespec *tsp, int *ticksp) { struct namecache *ncp; uint32_t hash; @@ -701,12 +694,8 @@ cache_lookup(dvp, vpp, cnp, tsp, ticksp) * Add an entry to the cache. */ void -cache_enter_time(dvp, vp, cnp, tsp, dtsp) - struct vnode *dvp; - struct vnode *vp; - struct componentname *cnp; - struct timespec *tsp; - struct timespec *dtsp; +cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, + struct timespec *tsp, struct timespec *dtsp) { struct namecache *ncp, *n2; struct namecache_ts *n3; @@ -836,9 +825,9 @@ cache_enter_time(dvp, vp, cnp, tsp, dtsp) * has populated v_cache_dd pointer already. */ if (dvp->v_cache_dd != NULL) { - CACHE_WUNLOCK(); - cache_free(ncp); - return; + CACHE_WUNLOCK(); + cache_free(ncp); + return; } KASSERT(vp == NULL || vp->v_type == VDIR, ("wrong vnode type %p", vp)); @@ -846,7 +835,7 @@ cache_enter_time(dvp, vp, cnp, tsp, dtsp) } numcache++; - if (!vp) { + if (vp == NULL) { numneg++; if (cnp->cn_flags & ISWHITEOUT) ncp->nc_flag |= NCF_WHITE; @@ -884,7 +873,7 @@ cache_enter_time(dvp, vp, cnp, tsp, dtsp) * "negative" cache queue, otherwise, we place it into the * destination vnode's cache entries queue. */ - if (vp) { + if (vp != NULL) { TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); SDT_PROBE3(vfs, namecache, enter, done, dvp, nc_get_name(ncp), vp); @@ -975,8 +964,7 @@ cache_changesize(int newmaxvnodes) * Invalidate all entries to a particular vnode. */ void -cache_purge(vp) - struct vnode *vp; +cache_purge(struct vnode *vp) { CTR1(KTR_VFS, "cache_purge(%p)", vp); @@ -999,8 +987,7 @@ cache_purge(vp) * Invalidate all negative entries for a particular directory vnode. */ void -cache_purge_negative(vp) - struct vnode *vp; +cache_purge_negative(struct vnode *vp) { struct namecache *cp, *ncp; @@ -1018,8 +1005,7 @@ cache_purge_negative(vp) * Flush all entries referencing a particular filesystem. */ void -cache_purgevfs(mp) - struct mount *mp; +cache_purgevfs(struct mount *mp) { struct nchashhead *ncpp; struct namecache *ncp, *nnp; @@ -1042,12 +1028,7 @@ cache_purgevfs(mp) */ int -vfs_cache_lookup(ap) - struct vop_lookup_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - } */ *ap; +vfs_cache_lookup(struct vop_lookup_args *ap) { struct vnode *dvp; int error; @@ -1088,9 +1069,7 @@ SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, /* Implementation of the getcwd syscall. */ int -sys___getcwd(td, uap) - struct thread *td; - struct __getcwd_args *uap; +sys___getcwd(struct thread *td, struct __getcwd_args *uap) { return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen, diff --git a/sys/modules/geom/geom_eli/Makefile b/sys/modules/geom/geom_eli/Makefile index 51d821a64f62..c42ccf19f737 100644 --- a/sys/modules/geom/geom_eli/Makefile +++ b/sys/modules/geom/geom_eli/Makefile @@ -6,6 +6,7 @@ KMOD= geom_eli SRCS= g_eli.c SRCS+= g_eli_crypto.c SRCS+= g_eli_ctl.c +SRCS+= g_eli_hmac.c SRCS+= g_eli_integrity.c SRCS+= g_eli_key.c SRCS+= g_eli_key_cache.c diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c index 9b3f7d839ce5..644d46f7a712 100644 --- a/sys/netgraph/netflow/netflow.c +++ b/sys/netgraph/netflow/netflow.c @@ -395,9 +395,8 @@ hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, int plen, uint8_t flags, uint8_t tcp_flags) { struct flow6_entry *fle6; - struct sockaddr_in6 *src, *dst; + struct sockaddr_in6 sin6; struct rtentry *rt; - struct route_in6 rin6; mtx_assert(&hsh6->mtx, MA_OWNED); @@ -425,16 +424,14 @@ hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. */ if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { - bzero(&rin6, sizeof(struct route_in6)); - dst = (struct sockaddr_in6 *)&rin6.ro_dst; - dst->sin6_len = sizeof(struct sockaddr_in6); - dst->sin6_family = AF_INET6; - dst->sin6_addr = r->dst.r_dst6; + bzero(&sin6, sizeof(struct sockaddr_in6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = r->dst.r_dst6; - rin6.ro_rt = rtalloc1_fib((struct sockaddr *)dst, 0, 0, r->fib); + rt = rtalloc1_fib((struct sockaddr *)&sin6, 0, 0, r->fib); - if (rin6.ro_rt != NULL) { - rt = rin6.ro_rt; + if (rt != NULL) { fle6->f.fle_o_ifx = rt->rt_ifp->if_index; if (rt->rt_flags & RTF_GATEWAY && @@ -453,17 +450,14 @@ hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { /* Do route lookup on source address, to fill in src_mask. */ - bzero(&rin6, sizeof(struct route_in6)); - src = (struct sockaddr_in6 *)&rin6.ro_dst; - src->sin6_len = sizeof(struct sockaddr_in6); - src->sin6_family = AF_INET6; - src->sin6_addr = r->src.r_src6; + bzero(&sin6, sizeof(struct sockaddr_in6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = r->src.r_src6; - rin6.ro_rt = rtalloc1_fib((struct sockaddr *)src, 0, 0, r->fib); - - if (rin6.ro_rt != NULL) { - rt = rin6.ro_rt; + rt = rtalloc1_fib((struct sockaddr *)&sin6, 0, 0, r->fib); + if (rt != NULL) { if (rt_mask(rt)) fle6->f.src_mask = RT_MASK6(rt); else diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index a763e464ba48..3a979a0c42d7 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -290,7 +290,7 @@ cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type) if (type == CC_ACK) { if (tp->snd_cwnd > tp->snd_ssthresh) { tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, - V_tcp_abc_l_var * tp->t_maxseg); + V_tcp_abc_l_var * tcp_maxseg(tp)); if (tp->t_bytes_acked >= tp->snd_cwnd) { tp->t_bytes_acked -= tp->snd_cwnd; tp->ccv->flags |= CCF_ABC_SENTAWND; @@ -313,11 +313,13 @@ cc_conn_init(struct tcpcb *tp) { struct hc_metrics_lite metrics; struct inpcb *inp = tp->t_inpcb; + u_int maxseg; int rtt; INP_WLOCK_ASSERT(tp->t_inpcb); tcp_hc_get(&inp->inp_inc, &metrics); + maxseg = tcp_maxseg(tp); if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { tp->t_srtt = rtt; @@ -342,7 +344,7 @@ cc_conn_init(struct tcpcb *tp) * the slow start threshhold, but set the * threshold to no less than 2*mss. */ - tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh); + tp->snd_ssthresh = max(2 * maxseg, metrics.rmx_ssthresh); TCPSTAT_INC(tcps_usedssthresh); } @@ -359,21 +361,20 @@ cc_conn_init(struct tcpcb *tp) * requiring us to be cautious. */ if (tp->snd_cwnd == 1) - tp->snd_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */ + tp->snd_cwnd = maxseg; /* SYN(-ACK) lost */ else if (V_tcp_initcwnd_segments) - tp->snd_cwnd = min(V_tcp_initcwnd_segments * tp->t_maxseg, - max(2 * tp->t_maxseg, V_tcp_initcwnd_segments * 1460)); + tp->snd_cwnd = min(V_tcp_initcwnd_segments * maxseg, + max(2 * maxseg, V_tcp_initcwnd_segments * 1460)); else if (V_tcp_do_rfc3390) - tp->snd_cwnd = min(4 * tp->t_maxseg, - max(2 * tp->t_maxseg, 4380)); + tp->snd_cwnd = min(4 * maxseg, max(2 * maxseg, 4380)); else { /* Per RFC5681 Section 3.1 */ - if (tp->t_maxseg > 2190) - tp->snd_cwnd = 2 * tp->t_maxseg; - else if (tp->t_maxseg > 1095) - tp->snd_cwnd = 3 * tp->t_maxseg; + if (maxseg > 2190) + tp->snd_cwnd = 2 * maxseg; + else if (maxseg > 1095) + tp->snd_cwnd = 3 * maxseg; else - tp->snd_cwnd = 4 * tp->t_maxseg; + tp->snd_cwnd = 4 * maxseg; } if (CC_ALGO(tp)->conn_init != NULL) @@ -383,6 +384,8 @@ cc_conn_init(struct tcpcb *tp) void inline cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) { + u_int maxseg; + INP_WLOCK_ASSERT(tp->t_inpcb); switch(type) { @@ -402,12 +405,13 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) } break; case CC_RTO: + maxseg = tcp_maxseg(tp); tp->t_dupacks = 0; tp->t_bytes_acked = 0; EXIT_RECOVERY(tp->t_flags); tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 / - tp->t_maxseg) * tp->t_maxseg; - tp->snd_cwnd = tp->t_maxseg; + maxseg) * maxseg; + tp->snd_cwnd = maxseg; break; case CC_RTO_ERR: TCPSTAT_INC(tcps_sndrexmitbad); @@ -469,13 +473,11 @@ tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen, * the ack that opens up a 0-sized window. * - LRO wasn't used for this segment. We make sure by checking that the * segment size is not larger than the MSS. - * - Delayed acks are enabled or this is a half-synchronized T/TCP - * connection. */ #define DELAY_ACK(tp, tlen) \ ((!tcp_timer_active(tp, TT_DELACK) && \ (tp->t_flags & TF_RXWIN0SENT) == 0) && \ - (tlen <= tp->t_maxopd) && \ + (tlen <= tp->t_maxseg) && \ (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) static void inline @@ -2481,6 +2483,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, hhook_run_tcp_est_in(tp, th, &to); if (SEQ_LEQ(th->th_ack, tp->snd_una)) { + u_int maxseg; + + maxseg = tcp_maxseg(tp); if (tlen == 0 && (tiwin == tp->snd_wnd || (tp->t_flags & TF_SACK_PERMIT))) { @@ -2560,12 +2565,12 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tp->sackhint.sack_bytes_rexmit; if (awnd < tp->snd_ssthresh) { - tp->snd_cwnd += tp->t_maxseg; + tp->snd_cwnd += maxseg; if (tp->snd_cwnd > tp->snd_ssthresh) tp->snd_cwnd = tp->snd_ssthresh; } } else - tp->snd_cwnd += tp->t_maxseg; + tp->snd_cwnd += maxseg; (void) tp->t_fb->tfb_tcp_output(tp); goto drop; } else if (tp->t_dupacks == tcprexmtthresh) { @@ -2599,18 +2604,18 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, TCPSTAT_INC( tcps_sack_recovery_episode); tp->sack_newdata = tp->snd_nxt; - tp->snd_cwnd = tp->t_maxseg; + tp->snd_cwnd = maxseg; (void) tp->t_fb->tfb_tcp_output(tp); goto drop; } tp->snd_nxt = th->th_ack; - tp->snd_cwnd = tp->t_maxseg; + tp->snd_cwnd = maxseg; (void) tp->t_fb->tfb_tcp_output(tp); KASSERT(tp->snd_limited <= 2, ("%s: tp->snd_limited too big", __func__)); tp->snd_cwnd = tp->snd_ssthresh + - tp->t_maxseg * + maxseg * (tp->t_dupacks - tp->snd_limited); if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; @@ -2641,7 +2646,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tp->snd_cwnd = (tp->snd_nxt - tp->snd_una) + (tp->t_dupacks - tp->snd_limited) * - tp->t_maxseg; + maxseg; /* * Only call tcp_output when there * is new data available to be sent. @@ -2654,10 +2659,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, if (avail > 0) (void) tp->t_fb->tfb_tcp_output(tp); sent = tp->snd_max - oldsndmax; - if (sent > tp->t_maxseg) { + if (sent > maxseg) { KASSERT((tp->t_dupacks == 2 && tp->snd_limited == 0) || - (sent == tp->t_maxseg + 1 && + (sent == maxseg + 1 && tp->t_flags & TF_SENTFIN), ("%s: sent too much", __func__)); @@ -3510,11 +3515,9 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt) * While looking at the routing entry, we also initialize other path-dependent * parameters from pre-set or cached values in the routing entry. * - * Also take into account the space needed for options that we - * send regularly. Make maxseg shorter by that amount to assure - * that we can send maxseg amount of data even when the options - * are present. Store the upper limit of the length of options plus - * data in maxopd. + * NOTE that resulting t_maxseg doesn't include space for TCP options or + * IP options, e.g. IPSEC data, since length of this data may vary, and + * thus it is calculated for every segment separately in tcp_output(). * * NOTE that this routine is only called when we process an incoming * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS @@ -3528,7 +3531,6 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, u_long maxmtu = 0; struct inpcb *inp = tp->t_inpcb; struct hc_metrics_lite metrics; - int origoffer; #ifdef INET6 int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; size_t min_protoh = isipv6 ? @@ -3544,13 +3546,12 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, KASSERT(offer == -1, ("%s: conflict", __func__)); offer = mtuoffer - min_protoh; } - origoffer = offer; /* Initialize. */ #ifdef INET6 if (isipv6) { maxmtu = tcp_maxmtu6(&inp->inp_inc, cap); - tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt; + tp->t_maxseg = V_tcp_v6mssdflt; } #endif #if defined(INET) && defined(INET6) @@ -3559,7 +3560,7 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, #ifdef INET { maxmtu = tcp_maxmtu(&inp->inp_inc, cap); - tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt; + tp->t_maxseg = V_tcp_mssdflt; } #endif @@ -3583,9 +3584,9 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, /* * Offer == 0 means that there was no MSS on the SYN * segment, in this case we use tcp_mssdflt as - * already assigned to t_maxopd above. + * already assigned to t_maxseg above. */ - offer = tp->t_maxopd; + offer = tp->t_maxseg; break; case -1: @@ -3657,31 +3658,15 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, mss = min(mss, offer); /* - * Sanity check: make sure that maxopd will be large + * Sanity check: make sure that maxseg will be large * enough to allow some data on segments even if the * all the option space is used (40bytes). Otherwise * funny things may happen in tcp_output. + * + * XXXGL: shouldn't we reserve space for IP/IPv6 options? */ mss = max(mss, 64); - /* - * maxopd stores the maximum length of data AND options - * in a segment; maxseg is the amount of data in a normal - * segment. We need to store this value (maxopd) apart - * from maxseg, because now every segment carries options - * and thus we normally have somewhat less data in segments. - */ - tp->t_maxopd = mss; - - /* - * origoffer==-1 indicates that no segments were received yet. - * In this case we just guess. - */ - if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && - (origoffer == -1 || - (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) - mss -= TCPOLEN_TSTAMP_APPA; - tp->t_maxseg = mss; } @@ -3804,7 +3789,8 @@ void tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th) { tcp_seq onxt = tp->snd_nxt; - u_long ocwnd = tp->snd_cwnd; + u_long ocwnd = tp->snd_cwnd; + u_int maxseg = tcp_maxseg(tp); INP_WLOCK_ASSERT(tp->t_inpcb); @@ -3815,7 +3801,7 @@ tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th) * Set snd_cwnd to one segment beyond acknowledged offset. * (tp->snd_una has not yet been updated when this function is called.) */ - tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th); + tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th); tp->t_flags |= TF_ACKNOW; (void) tp->t_fb->tfb_tcp_output(tp); tp->snd_cwnd = ocwnd; @@ -3829,7 +3815,7 @@ tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th) tp->snd_cwnd -= BYTES_THIS_ACK(tp, th); else tp->snd_cwnd = 0; - tp->snd_cwnd += tp->t_maxseg; + tp->snd_cwnd += maxseg; } int diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 482ead5a329b..3c32d77c377e 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -830,11 +830,11 @@ tcp_output(struct tcpcb *tp) /* * Adjust data length if insertion of options will - * bump the packet length beyond the t_maxopd length. + * bump the packet length beyond the t_maxseg length. * Clear the FIN bit because we cut off the tail of * the segment. */ - if (len + optlen + ipoptlen > tp->t_maxopd) { + if (len + optlen + ipoptlen > tp->t_maxseg) { flags &= ~TH_FIN; if (tso) { @@ -937,7 +937,7 @@ tcp_output(struct tcpcb *tp) * fractional unless the send sockbuf can be * emptied: */ - max_len = (tp->t_maxopd - optlen); + max_len = (tp->t_maxseg - optlen); if ((off + len) < sbavail(&so->so_snd)) { moff = len % max_len; if (moff != 0) { @@ -967,7 +967,7 @@ tcp_output(struct tcpcb *tp) sendalot = 1; } else { - len = tp->t_maxopd - optlen - ipoptlen; + len = tp->t_maxseg - optlen - ipoptlen; sendalot = 1; } } else @@ -1277,10 +1277,10 @@ tcp_output(struct tcpcb *tp) * The TCP pseudo header checksum is always provided. */ if (tso) { - KASSERT(len > tp->t_maxopd - optlen, + KASSERT(len > tp->t_maxseg - optlen, ("%s: len <= tso_segsz", __func__)); m->m_pkthdr.csum_flags |= CSUM_TSO; - m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen; + m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen; } #ifdef IPSEC @@ -1348,7 +1348,7 @@ tcp_output(struct tcpcb *tp) */ ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); - if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) + if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) tp->t_flags2 |= TF2_PLPMTU_PMTUD; else tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; @@ -1394,7 +1394,7 @@ tcp_output(struct tcpcb *tp) * * NB: Don't set DF on small MTU/MSS to have a safe fallback. */ - if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) { + if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) { ip->ip_off |= htons(IP_DF); tp->t_flags2 |= TF2_PLPMTU_PMTUD; } else { diff --git a/sys/netinet/tcp_stacks/fastpath.c b/sys/netinet/tcp_stacks/fastpath.c index 85b24f67c7a3..a49f85bbaa06 100644 --- a/sys/netinet/tcp_stacks/fastpath.c +++ b/sys/netinet/tcp_stacks/fastpath.c @@ -158,13 +158,11 @@ static void tcp_do_segment_fastack(struct mbuf *, struct tcphdr *, * the ack that opens up a 0-sized window. * - LRO wasn't used for this segment. We make sure by checking that the * segment size is not larger than the MSS. - * - Delayed acks are enabled or this is a half-synchronized T/TCP - * connection. */ #define DELAY_ACK(tp, tlen) \ ((!tcp_timer_active(tp, TT_DELACK) && \ (tp->t_flags & TF_RXWIN0SENT) == 0) && \ - (tlen <= tp->t_maxopd) && \ + (tlen <= tp->t_maxseg) && \ (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) /* diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index c2e0696394a5..9f21f116f164 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1087,7 +1087,7 @@ tcp_newtcpcb(struct inpcb *inp) #endif tp->t_timers = &tm->tt; /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ - tp->t_maxseg = tp->t_maxopd = + tp->t_maxseg = #ifdef INET6 isipv6 ? V_tcp_v6mssdflt : #endif /* INET6 */ @@ -1901,7 +1901,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) * Only process the offered MTU if it * is smaller than the current one. */ - if (mtu < tp->t_maxopd + + if (mtu < tp->t_maxseg + sizeof(struct tcpiphdr)) { bzero(&inc, sizeof(inc)); inc.inc_faddr = faddr; @@ -2283,6 +2283,59 @@ tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap) } #endif /* INET6 */ +/* + * Calculate effective SMSS per RFC5681 definition for a given TCP + * connection at its current state, taking into account SACK and etc. + */ +u_int +tcp_maxseg(const struct tcpcb *tp) +{ + u_int optlen; + + if (tp->t_flags & TF_NOOPT) + return (tp->t_maxseg); + + /* + * Here we have a simplified code from tcp_addoptions(), + * without a proper loop, and having most of paddings hardcoded. + * We might make mistakes with padding here in some edge cases, + * but this is harmless, since result of tcp_maxseg() is used + * only in cwnd and ssthresh estimations. + */ +#define PAD(len) ((((len) / 4) + !!((len) % 4)) * 4) + if (TCPS_HAVEESTABLISHED(tp->t_state)) { + if (tp->t_flags & TF_RCVD_TSTMP) + optlen = TCPOLEN_TSTAMP_APPA; + else + optlen = 0; +#ifdef TCP_SIGNATURE + if (tp->t_flags & TF_SIGNATURE) + optlen += PAD(TCPOLEN_SIGNATURE); +#endif + if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) { + optlen += TCPOLEN_SACKHDR; + optlen += tp->rcv_numsacks * TCPOLEN_SACK; + optlen = PAD(optlen); + } + } else { + if (tp->t_flags & TF_REQ_TSTMP) + optlen = TCPOLEN_TSTAMP_APPA; + else + optlen = PAD(TCPOLEN_MAXSEG); + if (tp->t_flags & TF_REQ_SCALE) + optlen += PAD(TCPOLEN_WINDOW); +#ifdef TCP_SIGNATURE + if (tp->t_flags & TF_SIGNATURE) + optlen += PAD(TCPOLEN_SIGNATURE); +#endif + if (tp->t_flags & TF_SACK_PERMIT) + optlen += PAD(TCPOLEN_SACK_PERMITTED); + } +#undef PAD + optlen = min(optlen, TCP_MAXOLEN); + return (tp->t_maxseg - optlen); +} + #ifdef IPSEC /* compute ESP/AH header size for TCP, including outer IP header. */ size_t diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 9767eb709056..fb4ff11a4a72 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -660,7 +660,6 @@ tcp_timer_rexmt(void * xtp) */ if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED)) || (tp->t_state == TCPS_FIN_WAIT_1))) { - int optlen; #ifdef INET6 int isipv6; #endif @@ -684,8 +683,7 @@ tcp_timer_rexmt(void * xtp) tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE; /* Keep track of previous MSS. */ - optlen = tp->t_maxopd - tp->t_maxseg; - tp->t_pmtud_saved_maxopd = tp->t_maxopd; + tp->t_pmtud_saved_maxseg = tp->t_maxseg; /* * Reduce the MSS to blackhole value or to the default @@ -694,13 +692,13 @@ tcp_timer_rexmt(void * xtp) #ifdef INET6 isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0; if (isipv6 && - tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) { + tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) { /* Use the sysctl tuneable blackhole MSS. */ - tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss; + tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss; V_tcp_pmtud_blackhole_activated++; } else if (isipv6) { /* Use the default MSS. */ - tp->t_maxopd = V_tcp_v6mssdflt; + tp->t_maxseg = V_tcp_v6mssdflt; /* * Disable Path MTU Discovery when we switch to * minmss. @@ -713,13 +711,13 @@ tcp_timer_rexmt(void * xtp) else #endif #ifdef INET - if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) { + if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) { /* Use the sysctl tuneable blackhole MSS. */ - tp->t_maxopd = V_tcp_pmtud_blackhole_mss; + tp->t_maxseg = V_tcp_pmtud_blackhole_mss; V_tcp_pmtud_blackhole_activated++; } else { /* Use the default MSS. */ - tp->t_maxopd = V_tcp_mssdflt; + tp->t_maxseg = V_tcp_mssdflt; /* * Disable Path MTU Discovery when we switch to * minmss. @@ -728,7 +726,6 @@ tcp_timer_rexmt(void * xtp) V_tcp_pmtud_blackhole_activated_min_mss++; } #endif - tp->t_maxseg = tp->t_maxopd - optlen; /* * Reset the slow-start flight size * as it may depend on the new MSS. @@ -748,9 +745,7 @@ tcp_timer_rexmt(void * xtp) (tp->t_rxtshift > 6)) { tp->t_flags2 |= TF2_PLPMTU_PMTUD; tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; - optlen = tp->t_maxopd - tp->t_maxseg; - tp->t_maxopd = tp->t_pmtud_saved_maxopd; - tp->t_maxseg = tp->t_maxopd - optlen; + tp->t_maxseg = tp->t_pmtud_saved_maxseg; V_tcp_pmtud_blackhole_failed++; /* * Reset the slow-start flight size as it diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 3435668dd8cb..76bc8aac0d99 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -904,8 +904,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, /* * Do implied connect if not yet connected, * initialize window to default value, and - * initialize maxseg/maxopd using peer's cached - * MSS. + * initialize maxseg using peer's cached MSS. */ #ifdef INET6 if (isipv6) @@ -964,8 +963,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, /* * Do implied connect if not yet connected, * initialize window to default value, and - * initialize maxseg/maxopd using peer's cached - * MSS. + * initialize maxseg using peer's cached MSS. */ #ifdef INET6 if (isipv6) @@ -2208,8 +2206,8 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent) "0x%08x\n", tp->snd_ssthresh, tp->snd_recover); db_print_indent(indent); - db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n", - tp->t_maxopd, tp->t_rcvtime, tp->t_starttime); + db_printf("t_rcvtime: %u t_startime: %u\n", + tp->t_rcvtime, tp->t_starttime); db_print_indent(indent); db_printf("t_rttime: %u t_rtsq: 0x%08x\n", diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 8d76912275a1..6cd4cf05874a 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -180,8 +180,6 @@ struct tcpcb { u_long snd_spare2; /* unused */ tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ - u_int t_maxopd; /* mss plus options */ - u_int t_rcvtime; /* inactivity time */ u_int t_starttime; /* time connection was established */ u_int t_rtttime; /* RTT measurement start time */ @@ -192,6 +190,7 @@ struct tcpcb { int t_rxtcur; /* current retransmit value (ticks) */ u_int t_maxseg; /* maximum segment size */ + u_int t_pmtud_saved_maxseg; /* pre-blackhole MSS */ int t_srtt; /* smoothed round-trip time */ int t_rttvar; /* variance in round-trip time */ @@ -251,7 +250,6 @@ struct tcpcb { u_int t_tsomax; /* TSO total burst length limit in bytes */ u_int t_tsomaxsegcount; /* TSO maximum segment count */ u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */ - u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */ u_int t_flags2; /* More tcpcb flags storage */ #if defined(_KERNEL) && defined(TCP_RFC7413) uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */ @@ -775,6 +773,7 @@ int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb u_long tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *); u_long tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *); +u_int tcp_maxseg(const struct tcpcb *); void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *, struct tcp_ifcap *); void tcp_mss(struct tcpcb *, int); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 57e78e8498d6..1b7715309ba7 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -94,6 +95,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #endif /* INET6 */ #include @@ -2985,49 +2988,35 @@ static u_int16_t pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) { #ifdef INET - struct sockaddr_in *dst; - struct route ro; + struct nhop4_basic nh4; #endif /* INET */ #ifdef INET6 - struct sockaddr_in6 *dst6; - struct route_in6 ro6; + struct nhop6_basic nh6; + struct in6_addr dst6; + uint32_t scopeid; #endif /* INET6 */ - struct rtentry *rt = NULL; int hlen = 0; - u_int16_t mss = V_tcp_mssdflt; + uint16_t mss = 0; switch (af) { #ifdef INET case AF_INET: hlen = sizeof(struct ip); - bzero(&ro, sizeof(ro)); - dst = (struct sockaddr_in *)&ro.ro_dst; - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr = addr->v4; - in_rtalloc_ign(&ro, 0, rtableid); - rt = ro.ro_rt; + if (fib4_lookup_nh_basic(rtableid, addr->v4, 0, 0, &nh4) == 0) + mss = nh4.nh_mtu - hlen - sizeof(struct tcphdr); break; #endif /* INET */ #ifdef INET6 case AF_INET6: hlen = sizeof(struct ip6_hdr); - bzero(&ro6, sizeof(ro6)); - dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; - dst6->sin6_family = AF_INET6; - dst6->sin6_len = sizeof(*dst6); - dst6->sin6_addr = addr->v6; - in6_rtalloc_ign(&ro6, 0, rtableid); - rt = ro6.ro_rt; + in6_splitscope(&addr->v6, &dst6, &scopeid); + if (fib6_lookup_nh_basic(rtableid, &dst6, scopeid, 0,0,&nh6)==0) + mss = nh6.nh_mtu - hlen - sizeof(struct tcphdr); break; #endif /* INET6 */ } - if (rt && rt->rt_ifp) { - mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); - mss = max(V_tcp_mssdflt, mss); - RTFREE(rt); - } + mss = max(V_tcp_mssdflt, mss); mss = min(mss, offer); mss = max(mss, 64); /* sanity - at least max opt space */ return (mss); @@ -5194,13 +5183,12 @@ pf_pull_hdr(struct mbuf *m, int off, void *p, int len, return (p); } -int -pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, +#ifdef RADIX_MPATH +static int +pf_routable_oldmpath(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, int rtableid) { -#ifdef RADIX_MPATH struct radix_node_head *rnh; -#endif struct sockaddr_in *dst; int ret = 1; int check_mpath; @@ -5215,12 +5203,10 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, struct ifnet *ifp; check_mpath = 0; -#ifdef RADIX_MPATH /* XXX: stick to table 0 for now */ rnh = rt_tables_get_rnh(0, af); if (rnh != NULL && rn_mpath_capable(rnh)) check_mpath = 1; -#endif bzero(&ro, sizeof(ro)); switch (af) { case AF_INET: @@ -5283,9 +5269,7 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, if (kif->pfik_ifp == ifp) ret = 1; -#ifdef RADIX_MPATH rn = rn_mpath_next(rn); -#endif } while (check_mpath == 1 && rn != NULL && ret == 0); } else ret = 0; @@ -5294,6 +5278,72 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, RTFREE(ro.ro_rt); return (ret); } +#endif + +int +pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, + int rtableid) +{ +#ifdef INET + struct nhop4_basic nh4; +#endif +#ifdef INET6 + struct nhop6_basic nh6; +#endif + struct ifnet *ifp; +#ifdef RADIX_MPATH + struct radix_node_head *rnh; + + /* XXX: stick to table 0 for now */ + rnh = rt_tables_get_rnh(0, af); + if (rnh != NULL && rn_mpath_capable(rnh)) + return (pf_routable_oldmpath(addr, af, kif, rtableid)); +#endif + /* + * Skip check for addresses with embedded interface scope, + * as they would always match anyway. + */ + if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6)) + return (1); + + if (af != AF_INET && af != AF_INET6) + return (0); + + /* Skip checks for ipsec interfaces */ + if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) + return (1); + + ifp = NULL; + + switch (af) { +#ifdef INET6 + case AF_INET6: + if (fib6_lookup_nh_basic(rtableid, &addr->v6, 0, 0, 0, &nh6)!=0) + return (0); + ifp = nh6.nh_ifp; + break; +#endif +#ifdef INET + case AF_INET: + if (fib4_lookup_nh_basic(rtableid, addr->v4, 0, 0, &nh4) != 0) + return (0); + ifp = nh4.nh_ifp; + break; +#endif + } + + /* No interface given, this is a no-route check */ + if (kif == NULL) + return (1); + + if (kif->pfik_ifp == NULL) + return (0); + + /* Perform uRPF check if passed input interface */ + if (kif->pfik_ifp == ifp) + return (1); + return (0); +} #ifdef INET static void @@ -5344,23 +5394,20 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, dst.sin_addr = ip->ip_dst; if (r->rt == PF_FASTROUTE) { - struct rtentry *rt; + struct nhop4_basic nh4; if (s) PF_STATE_UNLOCK(s); - rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0)); - if (rt == NULL) { + + if (fib4_lookup_nh_basic(M_GETFIB(m0), ip->ip_dst, 0, + m0->m_pkthdr.flowid, &nh4) != 0) { KMOD_IPSTAT_INC(ips_noroute); error = EHOSTUNREACH; goto bad; } - ifp = rt->rt_ifp; - counter_u64_add(rt->rt_pksent, 1); - - if (rt->rt_flags & RTF_GATEWAY) - bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst)); - RTFREE_LOCKED(rt); + ifp = nh4.nh_ifp; + dst.sin_addr = nh4.nh_addr; } else { if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, diff --git a/sys/ofed/include/rdma/Kbuild b/sys/ofed/include/rdma/Kbuild deleted file mode 100644 index e7c043216558..000000000000 --- a/sys/ofed/include/rdma/Kbuild +++ /dev/null @@ -1 +0,0 @@ -header-y += ib_user_mad.h diff --git a/targets/pseudo/userland/lib/Makefile.depend b/targets/pseudo/userland/lib/Makefile.depend index c5e2407ddc18..e57164a7d917 100644 --- a/targets/pseudo/userland/lib/Makefile.depend +++ b/targets/pseudo/userland/lib/Makefile.depend @@ -136,6 +136,7 @@ DIRDEPS = \ lib/libstand \ lib/libstdbuf \ lib/libstdthreads \ + lib/libsysdecode \ lib/libtacplus \ lib/libtelnet \ lib/libthr \ diff --git a/usr.bin/cap_mkdb/cap_mkdb.c b/usr.bin/cap_mkdb/cap_mkdb.c index 2f8bd96f16af..bbcedd5a8d54 100644 --- a/usr.bin/cap_mkdb/cap_mkdb.c +++ b/usr.bin/cap_mkdb/cap_mkdb.c @@ -119,7 +119,7 @@ main(int argc, char *argv[]) (void)snprintf(buf, sizeof(buf), "%s.db", capname ? capname : *argv); if ((capname = strdup(buf)) == NULL) errx(1, "strdup failed"); - if ((capdbp = dbopen(capname, O_CREAT | O_TRUNC | O_RDWR, + if ((capdbp = dbopen(capname, O_CREAT | O_TRUNC | O_RDWR | O_SYNC, DEFFILEMODE, DB_HASH, &openinfo)) == NULL) err(1, "%s", buf); diff --git a/usr.bin/truss/Makefile.depend.amd64 b/usr.bin/truss/Makefile.depend.amd64 index ad9c8b3d5906..334bb3a8d446 100644 --- a/usr.bin/truss/Makefile.depend.amd64 +++ b/usr.bin/truss/Makefile.depend.amd64 @@ -6,7 +6,6 @@ DIRDEPS = \ gnu/lib/libgcc \ include \ include/arpa \ - include/rpc \ include/xlocale \ lib/${CSU_DIR} \ lib/libc \ @@ -26,6 +25,4 @@ amd64-freebsd32.o: freebsd32_syscalls.h amd64-freebsd32.po: freebsd32_syscalls.h amd64-linux32.o: amd64-linux32_syscalls.h amd64-linux32.po: amd64-linux32_syscalls.h -ioctl.o: ioctl.c -ioctl.po: ioctl.c .endif diff --git a/usr.sbin/mountd/exports.5 b/usr.sbin/mountd/exports.5 index 88e2219ab46d..018a865015ab 100644 --- a/usr.sbin/mountd/exports.5 +++ b/usr.sbin/mountd/exports.5 @@ -131,6 +131,7 @@ The credential includes all the groups to which the user is a member on the local machine (see .Xr id 1 ) . The user may be specified by name or number. +The user string may be quoted, or use backslash escaping. .Pp .Sm off .Fl maproot Li = Sy user:group1:group2:... @@ -140,6 +141,7 @@ to be used for remote access by root. The elements of the list may be either names or numbers. Note that user: should be used to distinguish a credential containing no groups from a complete credential for that user. +The group names may be quoted, or use backslash escaping. .Pp .Sm off .Fl mapall Li = Sy user diff --git a/usr.sbin/mountd/mountd.c b/usr.sbin/mountd/mountd.c index 535a3f718197..d6da2bc9c24c 100644 --- a/usr.sbin/mountd/mountd.c +++ b/usr.sbin/mountd/mountd.c @@ -174,6 +174,7 @@ static int check_options(struct dirlist *); static int checkmask(struct sockaddr *sa); static int chk_host(struct dirlist *, struct sockaddr *, int *, int *, int *, int **); +static char *strsep_quote(char **stringp, const char *delim); static int create_service(struct netconfig *nconf); static void complete_service(struct netconfig *nconf, char *port_str); static void clearout_service(void); @@ -277,6 +278,73 @@ static void SYSLOG(int, const char *, ...) __printflike(2, 3); static int debug = 0; #endif +/* + * Similar to strsep(), but it allows for quoted strings + * and escaped characters. + * + * It returns the string (or NULL, if *stringp is NULL), + * which is a de-quoted version of the string if necessary. + * + * It modifies *stringp in place. + */ +static char * +strsep_quote(char **stringp, const char *delim) +{ + char *srcptr, *dstptr, *retval; + char quot = 0; + + if (stringp == NULL || *stringp == NULL) + return (NULL); + + srcptr = dstptr = retval = *stringp; + + while (*srcptr) { + /* + * We're looking for several edge cases here. + * First: if we're in quote state (quot != 0), + * then we ignore the delim characters, but otherwise + * process as normal, unless it is the quote character. + * Second: if the current character is a backslash, + * we take the next character as-is, without checking + * for delim, quote, or backslash. Exception: if the + * next character is a NUL, that's the end of the string. + * Third: if the character is a quote character, we toggle + * quote state. + * Otherwise: check the current character for NUL, or + * being in delim, and end the string if either is true. + */ + if (*srcptr == '\\') { + srcptr++; + /* + * The edge case here is if the next character + * is NUL, we want to stop processing. But if + * it's not NUL, then we simply want to copy it. + */ + if (*srcptr) { + *dstptr++ = *srcptr++; + } + continue; + } + if (quot == 0 && (*srcptr == '\'' || *srcptr == '"')) { + quot = *srcptr++; + continue; + } + if (quot && *srcptr == quot) { + /* End of the quoted part */ + quot = 0; + srcptr++; + continue; + } + if (!quot && strchr(delim, *srcptr)) + break; + *dstptr++ = *srcptr++; + } + + *dstptr = 0; /* Terminate the string */ + *stringp = (*srcptr == '\0') ? NULL : srcptr + 1; + return (retval); +} + /* * Mountd server for NFS mount protocol as described in: * NFS: Network File System Protocol Specification, RFC1094, Appendix A @@ -2831,8 +2899,9 @@ parsecred(char *namelist, struct xucred *cr) /* * Get the user's password table entry. */ - names = strsep(&namelist, " \t\n"); + names = strsep_quote(&namelist, " \t\n"); name = strsep(&names, ":"); + /* Bug? name could be NULL here */ if (isdigit(*name) || *name == '-') pw = getpwuid(atoi(name)); else diff --git a/usr.sbin/rpcbind/Makefile b/usr.sbin/rpcbind/Makefile index 2b679f416277..b3282601efc7 100644 --- a/usr.sbin/rpcbind/Makefile +++ b/usr.sbin/rpcbind/Makefile @@ -14,6 +14,10 @@ CFLAGS+= -DPORTMAP -DLIBWRAP CFLAGS+= -DINET6 .endif +.if ${MK_TESTS} != "no" +SUBDIR+= tests +.endif + WARNS?= 1 LIBADD= wrap diff --git a/usr.sbin/rpcbind/check_bound.c b/usr.sbin/rpcbind/check_bound.c index 3691f5f1a2f0..64b73c741d81 100644 --- a/usr.sbin/rpcbind/check_bound.c +++ b/usr.sbin/rpcbind/check_bound.c @@ -50,6 +50,7 @@ static char sccsid[] = "@(#)check_bound.c 1.11 89/04/21 Copyr 1989 Sun Micro"; #include #include #include +#include #include #include #include @@ -159,6 +160,7 @@ char * mergeaddr(SVCXPRT *xprt, char *netid, char *uaddr, char *saddr) { struct fdlist *fdl; + struct svc_dg_data *dg_data; char *c_uaddr, *s_uaddr, *m_uaddr, *allocated_uaddr = NULL; for (fdl = fdhead; fdl; fdl = fdl->next) @@ -170,11 +172,20 @@ mergeaddr(SVCXPRT *xprt, char *netid, char *uaddr, char *saddr) /* that server died */ return (nullstring); /* + * Try to determine the local address on which the client contacted us, + * so we can send a reply from the same address. If it's unknown, then + * try to determine which address the client used, and pick a nearby + * local address. + * * If saddr is not NULL, the remote client may have included the * address by which it contacted us. Use that for the "client" uaddr, * otherwise use the info from the SVCXPRT. */ - if (saddr != NULL) { + dg_data = (struct svc_dg_data*)xprt->xp_p2; + if (dg_data != NULL && dg_data->su_srcaddr.buf != NULL) { + c_uaddr = taddr2uaddr(fdl->nconf, &dg_data->su_srcaddr); + } + else if (saddr != NULL) { c_uaddr = saddr; } else { c_uaddr = taddr2uaddr(fdl->nconf, svc_getrpccaller(xprt)); @@ -217,7 +228,7 @@ mergeaddr(SVCXPRT *xprt, char *netid, char *uaddr, char *saddr) * structure should not be freed. */ struct netconfig * -rpcbind_get_conf(char *netid) +rpcbind_get_conf(const char *netid) { struct fdlist *fdl; diff --git a/usr.sbin/rpcbind/rpcbind.h b/usr.sbin/rpcbind/rpcbind.h index 4aba42042a6b..309bc0ba77da 100644 --- a/usr.sbin/rpcbind/rpcbind.h +++ b/usr.sbin/rpcbind/rpcbind.h @@ -85,7 +85,7 @@ extern char *tcp_uaddr; /* Universal TCP address */ int add_bndlist(struct netconfig *, struct netbuf *); bool_t is_bound(char *, char *); char *mergeaddr(SVCXPRT *, char *, char *, char *); -struct netconfig *rpcbind_get_conf(char *); +struct netconfig *rpcbind_get_conf(const char *); void rpcbs_init(void); void rpcbs_procinfo(rpcvers_t, rpcproc_t); @@ -134,8 +134,8 @@ extern void pmap_service(struct svc_req *, SVCXPRT *); void write_warmstart(void); void read_warmstart(void); -char *addrmerge(struct netbuf *caller, char *serv_uaddr, char *clnt_uaddr, - char *netid); +char *addrmerge(struct netbuf *caller, const char *serv_uaddr, + const char *clnt_uaddr, char const *netid); int listen_addr(const struct sockaddr *sa); void network_init(void); struct sockaddr *local_sa(int); diff --git a/usr.sbin/rpcbind/tests/Makefile b/usr.sbin/rpcbind/tests/Makefile new file mode 100644 index 000000000000..4b0cf15f13da --- /dev/null +++ b/usr.sbin/rpcbind/tests/Makefile @@ -0,0 +1,17 @@ +# $FreeBSD$ + +.include + +.PATH: ${.CURDIR}/.. + +ATF_TESTS_C= addrmerge_test +CFLAGS+= -I${.CURDIR}/.. -Wno-cast-qual +SRCS.addrmerge_test= addrmerge_test.c util.c + +.if ${MK_INET6_SUPPORT} != "no" +CFLAGS+= -DINET6 +.endif + +WARNS?= 3 + +.include diff --git a/usr.sbin/rpcbind/tests/addrmerge_test.c b/usr.sbin/rpcbind/tests/addrmerge_test.c new file mode 100644 index 000000000000..357354af1c70 --- /dev/null +++ b/usr.sbin/rpcbind/tests/addrmerge_test.c @@ -0,0 +1,849 @@ +/*- + * Copyright (c) 2014 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * $FreeBSD$ + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#include "rpcbind.h" + +#define MAX_IFADDRS 16 + +int debugging = false; + +/* Data for mocking getifaddrs */ +struct ifaddr_storage { + struct ifaddrs ifaddr; + struct sockaddr_storage addr; + struct sockaddr_storage mask; + struct sockaddr_storage bcast; +} mock_ifaddr_storage[MAX_IFADDRS]; +struct ifaddrs *mock_ifaddrs = NULL; +int ifaddr_count = 0; + +/* Data for mocking listen_addr */ +int bind_address_count = 0; +struct sockaddr* bind_addresses[MAX_IFADDRS]; + +/* Stub library functions */ +void +freeifaddrs(struct ifaddrs *ifp __unused) +{ + return ; +} + +int +getifaddrs(struct ifaddrs **ifap) +{ + *ifap = mock_ifaddrs; + return (0); +} + +static void +mock_ifaddr4(const char* name, const char* addr, const char* mask, + const char* bcast, unsigned int flags, bool bind) +{ + struct ifaddrs *ifaddr = &mock_ifaddr_storage[ifaddr_count].ifaddr; + struct sockaddr_in *in = (struct sockaddr_in*) + &mock_ifaddr_storage[ifaddr_count].addr; + struct sockaddr_in *mask_in = (struct sockaddr_in*) + &mock_ifaddr_storage[ifaddr_count].mask; + struct sockaddr_in *bcast_in = (struct sockaddr_in*) + &mock_ifaddr_storage[ifaddr_count].bcast; + + in->sin_family = AF_INET; + in->sin_port = 0; + in->sin_len = sizeof(in); + in->sin_addr.s_addr = inet_addr(addr); + mask_in->sin_family = AF_INET; + mask_in->sin_port = 0; + mask_in->sin_len = sizeof(mask_in); + mask_in->sin_addr.s_addr = inet_addr(mask); + bcast_in->sin_family = AF_INET; + bcast_in->sin_port = 0; + bcast_in->sin_len = sizeof(bcast_in); + bcast_in->sin_addr.s_addr = inet_addr(bcast); + *ifaddr = (struct ifaddrs) { + .ifa_next = NULL, + .ifa_name = (char*) name, + .ifa_flags = flags, + .ifa_addr = (struct sockaddr*) in, + .ifa_netmask = (struct sockaddr*) mask_in, + .ifa_broadaddr = (struct sockaddr*) bcast_in, + .ifa_data = NULL, /* addrmerge doesn't care*/ + }; + + if (ifaddr_count > 0) + mock_ifaddr_storage[ifaddr_count - 1].ifaddr.ifa_next = ifaddr; + ifaddr_count++; + mock_ifaddrs = &mock_ifaddr_storage[0].ifaddr; + + /* Optionally simulate binding an ip ala "rpcbind -h foo" */ + if (bind) { + bind_addresses[bind_address_count] = (struct sockaddr*)in; + bind_address_count++; + } +} + +#ifdef INET6 +static void +mock_ifaddr6(const char* name, const char* addr, const char* mask, + const char* bcast, unsigned int flags, uint32_t scope_id, bool bind) +{ + struct ifaddrs *ifaddr = &mock_ifaddr_storage[ifaddr_count].ifaddr; + struct sockaddr_in6 *in6 = (struct sockaddr_in6*) + &mock_ifaddr_storage[ifaddr_count].addr; + struct sockaddr_in6 *mask_in6 = (struct sockaddr_in6*) + &mock_ifaddr_storage[ifaddr_count].mask; + struct sockaddr_in6 *bcast_in6 = (struct sockaddr_in6*) + &mock_ifaddr_storage[ifaddr_count].bcast; + + in6->sin6_family = AF_INET6; + in6->sin6_port = 0; + in6->sin6_len = sizeof(*in6); + in6->sin6_scope_id = scope_id; + ATF_REQUIRE_EQ(1, inet_pton(AF_INET6, addr, (void*)&in6->sin6_addr)); + mask_in6->sin6_family = AF_INET6; + mask_in6->sin6_port = 0; + mask_in6->sin6_len = sizeof(*mask_in6); + mask_in6->sin6_scope_id = scope_id; + ATF_REQUIRE_EQ(1, inet_pton(AF_INET6, mask, + (void*)&mask_in6->sin6_addr)); + bcast_in6->sin6_family = AF_INET6; + bcast_in6->sin6_port = 0; + bcast_in6->sin6_len = sizeof(*bcast_in6); + bcast_in6->sin6_scope_id = scope_id; + ATF_REQUIRE_EQ(1, inet_pton(AF_INET6, bcast, + (void*)&bcast_in6->sin6_addr)); + *ifaddr = (struct ifaddrs) { + .ifa_next = NULL, + .ifa_name = (char*) name, + .ifa_flags = flags, + .ifa_addr = (struct sockaddr*) in6, + .ifa_netmask = (struct sockaddr*) mask_in6, + .ifa_broadaddr = (struct sockaddr*) bcast_in6, + .ifa_data = NULL, /* addrmerge doesn't care*/ + }; + + if (ifaddr_count > 0) + mock_ifaddr_storage[ifaddr_count - 1].ifaddr.ifa_next = ifaddr; + ifaddr_count++; + mock_ifaddrs = &mock_ifaddr_storage[0].ifaddr; + + /* Optionally simulate binding an ip ala "rpcbind -h foo" */ + if (bind) { + bind_addresses[bind_address_count] = (struct sockaddr*)in6; + bind_address_count++; + } +} +#else +static void +mock_ifaddr6(const char* name __unused, const char* addr __unused, + const char* mask __unused, const char* bcast __unused, + unsigned int flags __unused, uint32_t scope_id __unused, bool bind __unused) +{ +} +#endif /*INET6 */ + +static void +mock_lo0(void) +{ + /* + * This broadcast address looks wrong, but it's what getifaddrs(2) + * actually returns. It's invalid because IFF_BROADCAST is not set + */ + mock_ifaddr4("lo0", "127.0.0.1", "255.0.0.0", "127.0.0.1", + IFF_LOOPBACK | IFF_UP | IFF_RUNNING | IFF_MULTICAST, false); + mock_ifaddr6("lo0", "::1", "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", + "::1", + IFF_LOOPBACK | IFF_UP | IFF_RUNNING | IFF_MULTICAST, 0, false); +} + +static void +mock_igb0(void) +{ + mock_ifaddr4("igb0", "192.0.2.2", "255.255.255.128", "192.0.2.127", + IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST, + false); + mock_ifaddr6("igb0", "2001:db8::2", "ffff:ffff:ffff:ffff::", + "2001:db8::ffff:ffff:ffff:ffff", + IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST, + 0, false); + /* Link local address */ + mock_ifaddr6("igb0", "fe80::2", "ffff:ffff:ffff:ffff::", + "fe80::ffff:ffff:ffff:ffff", + IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST, + 2, false); +} + +/* On the same subnet as igb0 */ +static void +mock_igb1(bool bind) +{ + mock_ifaddr4("igb1", "192.0.2.3", "255.255.255.128", "192.0.2.127", + IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST, + bind); + mock_ifaddr6("igb1", "2001:db8::3", "ffff:ffff:ffff:ffff::", + "2001:db8::ffff:ffff:ffff:ffff", + IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST, + 0, bind); + /* Link local address */ + mock_ifaddr6("igb1", "fe80::3", "ffff:ffff:ffff:ffff::", + "fe80::ffff:ffff:ffff:ffff", + IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST, + 3, bind); +} + +/* igb2 is on a different subnet than igb0 */ +static void +mock_igb2(void) +{ + mock_ifaddr4("igb2", "192.0.2.130", "255.255.255.128", "192.0.2.255", + IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST, + false); + mock_ifaddr6("igb2", "2001:db8:1::2", "ffff:ffff:ffff:ffff::", + "2001:db8:1:0:ffff:ffff:ffff:ffff", + IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST, + 0, false); +} + +/* tun0 is a P2P interface */ +static void +mock_tun0(void) +{ + mock_ifaddr4("tun0", "192.0.2.5", "255.255.255.255", "192.0.2.6", + IFF_UP | IFF_RUNNING | IFF_POINTOPOINT | IFF_MULTICAST, false); + mock_ifaddr6("tun0", "2001:db8::5", + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", + "2001:db8::6", + IFF_UP | IFF_RUNNING | IFF_POINTOPOINT | IFF_MULTICAST, 0, false); +} + + +/* Stub rpcbind functions */ +int +listen_addr(const struct sockaddr *sa) +{ + int i; + + if (bind_address_count == 0) + return (1); + + for (i = 0; i < bind_address_count; i++) { + if (bind_addresses[i]->sa_family != sa->sa_family) + continue; + + if (0 == memcmp(bind_addresses[i]->sa_data, sa->sa_data, + sa->sa_len)) + return (1); + } + return (0); +} + +struct netconfig* +rpcbind_get_conf(const char* netid __unused) +{ + /* Use static variables so we can return pointers to them */ + static char* lookups = NULL; + static struct netconfig nconf_udp; +#ifdef INET6 + static struct netconfig nconf_udp6; +#endif /* INET6 */ + + nconf_udp.nc_netid = "udp"; //netid_storage; + nconf_udp.nc_semantics = NC_TPI_CLTS; + nconf_udp.nc_flag = NC_VISIBLE; + nconf_udp.nc_protofmly = (char*)"inet"; + nconf_udp.nc_proto = (char*)"udp"; + nconf_udp.nc_device = (char*)"-"; + nconf_udp.nc_nlookups = 0; + nconf_udp.nc_lookups = &lookups; + +#ifdef INET6 + nconf_udp6.nc_netid = "udp6"; //netid_storage; + nconf_udp6.nc_semantics = NC_TPI_CLTS; + nconf_udp6.nc_flag = NC_VISIBLE; + nconf_udp6.nc_protofmly = (char*)"inet6"; + nconf_udp6.nc_proto = (char*)"udp6"; + nconf_udp6.nc_device = (char*)"-"; + nconf_udp6.nc_nlookups = 0; + nconf_udp6.nc_lookups = &lookups; +#endif /* INET6 */ + + if (0 == strncmp("udp", netid, sizeof("udp"))) + return (&nconf_udp); +#ifdef INET6 + else if (0 == strncmp("udp6", netid, sizeof("udp6"))) + return (&nconf_udp6); +#endif /* INET6 */ + else + return (NULL); +} + +/* + * Helper function used by most test cases + * param recvdstaddr If non-null, the uaddr on which the request was received + */ +static char* +do_addrmerge4(const char* recvdstaddr) +{ + struct netbuf caller; + struct sockaddr_in caller_in; + const char *serv_uaddr, *clnt_uaddr, *netid; + + /* caller contains the client's IP address */ + caller.maxlen = sizeof(struct sockaddr_storage); + caller.len = sizeof(caller_in); + caller_in.sin_family = AF_INET; + caller_in.sin_len = sizeof(caller_in); + caller_in.sin_port = 1234; + caller_in.sin_addr.s_addr = inet_addr("192.0.2.1"); + caller.buf = (void*)&caller_in; + if (recvdstaddr != NULL) + clnt_uaddr = recvdstaddr; + else + clnt_uaddr = "192.0.2.1.3.46"; + + /* assume server is bound in INADDR_ANY port 814 */ + serv_uaddr = "0.0.0.0.3.46"; + + netid = "udp"; + return (addrmerge(&caller, serv_uaddr, clnt_uaddr, netid)); +} + +#ifdef INET6 +/* + * Variant of do_addrmerge4 where the caller has an IPv6 address + * param recvdstaddr If non-null, the uaddr on which the request was received + */ +static char* +do_addrmerge6(const char* recvdstaddr) +{ + struct netbuf caller; + struct sockaddr_in6 caller_in6; + const char *serv_uaddr, *clnt_uaddr, *netid; + + /* caller contains the client's IP address */ + caller.maxlen = sizeof(struct sockaddr_storage); + caller.len = sizeof(caller_in6); + caller_in6.sin6_family = AF_INET6; + caller_in6.sin6_len = sizeof(caller_in6); + caller_in6.sin6_port = 1234; + ATF_REQUIRE_EQ(1, inet_pton(AF_INET6, "2001:db8::1", + (void*)&caller_in6.sin6_addr)); + caller.buf = (void*)&caller_in6; + if (recvdstaddr != NULL) + clnt_uaddr = recvdstaddr; + else + clnt_uaddr = "2001:db8::1.3.46"; + + /* assume server is bound in INADDR_ANY port 814 */ + serv_uaddr = "::1.3.46"; + + netid = "udp6"; + return (addrmerge(&caller, serv_uaddr, clnt_uaddr, netid)); +} + +/* Variant of do_addrmerge6 where the caller uses a link local address */ +static char* +do_addrmerge6_ll(void) +{ + struct netbuf caller; + struct sockaddr_in6 caller_in6; + const char *serv_uaddr, *clnt_uaddr, *netid; + + /* caller contains the client's IP address */ + caller.maxlen = sizeof(struct sockaddr_storage); + caller.len = sizeof(caller_in6); + caller_in6.sin6_family = AF_INET6; + caller_in6.sin6_len = sizeof(caller_in6); + caller_in6.sin6_port = 1234; + caller_in6.sin6_scope_id = 2; /* same as igb0 */ + ATF_REQUIRE_EQ(1, inet_pton(AF_INET6, "fe80::beef", + (void*)&caller_in6.sin6_addr)); + caller.buf = (void*)&caller_in6; + clnt_uaddr = "fe80::beef.3.46"; + + /* assume server is bound in INADDR_ANY port 814 */ + serv_uaddr = "::1.3.46"; + + netid = "udp6"; + return (addrmerge(&caller, serv_uaddr, clnt_uaddr, netid)); +} +#endif /* INET6 */ + +ATF_TC_WITHOUT_HEAD(addrmerge_noifaddrs); +ATF_TC_BODY(addrmerge_noifaddrs, tc) +{ + char* maddr; + + maddr = do_addrmerge4(NULL); + + /* Since getifaddrs returns null, addrmerge must too */ + ATF_CHECK_EQ(NULL, maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_localhost_only); +ATF_TC_BODY(addrmerge_localhost_only, tc) +{ + char *maddr; + + /* getifaddrs will return localhost only */ + mock_lo0(); + + maddr = do_addrmerge4(NULL); + + /* We must return localhost if there is nothing better */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("127.0.0.1.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_singlehomed); +ATF_TC_BODY(addrmerge_singlehomed, tc) +{ + char *maddr; + + /* getifaddrs will return one public address */ + mock_lo0(); + mock_igb0(); + + maddr = do_addrmerge4(NULL); + + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("192.0.2.2.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_one_addr_on_each_subnet); +ATF_TC_BODY(addrmerge_one_addr_on_each_subnet, tc) +{ + char *maddr; + + mock_lo0(); + mock_igb0(); + mock_igb2(); + + maddr = do_addrmerge4(NULL); + + /* We must return the address on the caller's subnet */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("192.0.2.2.3.46", maddr); +} + + +/* + * Like addrmerge_one_addr_on_each_subnet, but getifaddrs returns a different + * order + */ +ATF_TC_WITHOUT_HEAD(addrmerge_one_addr_on_each_subnet_rev); +ATF_TC_BODY(addrmerge_one_addr_on_each_subnet_rev, tc) +{ + char *maddr; + + /* getifaddrs will return one public address on each of two subnets */ + mock_igb2(); + mock_igb0(); + mock_lo0(); + + maddr = do_addrmerge4(NULL); + + /* We must return the address on the caller's subnet */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("192.0.2.2.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_point2point); +ATF_TC_BODY(addrmerge_point2point, tc) +{ + char *maddr; + + /* getifaddrs will return one normal and one p2p address */ + mock_lo0(); + mock_igb2(); + mock_tun0(); + + maddr = do_addrmerge4(NULL); + + /* addrmerge should disprefer P2P interfaces */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("192.0.2.130.3.46", maddr); +} + +/* Like addrerge_point2point, but getifaddrs returns a different order */ +ATF_TC_WITHOUT_HEAD(addrmerge_point2point_rev); +ATF_TC_BODY(addrmerge_point2point_rev, tc) +{ + char *maddr; + + /* getifaddrs will return one normal and one p2p address */ + mock_tun0(); + mock_igb2(); + mock_lo0(); + + maddr = do_addrmerge4(NULL); + + /* addrmerge should disprefer P2P interfaces */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("192.0.2.130.3.46", maddr); +} + +/* + * Simulate using rpcbind -h to select just one ip when the subnet has + * multiple + */ +ATF_TC_WITHOUT_HEAD(addrmerge_bindip); +ATF_TC_BODY(addrmerge_bindip, tc) +{ + char *maddr; + + /* getifaddrs will return one public address on each of two subnets */ + mock_lo0(); + mock_igb0(); + mock_igb1(true); + + maddr = do_addrmerge4(NULL); + + /* We must return the address to which we are bound */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("192.0.2.3.3.46", maddr); +} + +/* Like addrmerge_bindip, but getifaddrs returns a different order */ +ATF_TC_WITHOUT_HEAD(addrmerge_bindip_rev); +ATF_TC_BODY(addrmerge_bindip_rev, tc) +{ + char *maddr; + + /* getifaddrs will return one public address on each of two subnets */ + mock_igb1(true); + mock_igb0(); + mock_lo0(); + + maddr = do_addrmerge4(NULL); + + /* We must return the address to which we are bound */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("192.0.2.3.3.46", maddr); +} + +/* + * The address on which the request was received is known, and is provided as + * the hint. + */ +ATF_TC_WITHOUT_HEAD(addrmerge_recvdstaddr); +ATF_TC_BODY(addrmerge_recvdstaddr, tc) +{ + char *maddr; + + mock_lo0(); + mock_igb0(); + mock_igb1(false); + + maddr = do_addrmerge4("192.0.2.2.3.46"); + + /* We must return the address on which the request was received */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("192.0.2.2.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_recvdstaddr_rev); +ATF_TC_BODY(addrmerge_recvdstaddr_rev, tc) +{ + char *maddr; + + mock_igb1(false); + mock_igb0(); + mock_lo0(); + + maddr = do_addrmerge4("192.0.2.2.3.46"); + + /* We must return the address on which the request was received */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("192.0.2.2.3.46", maddr); +} + +#ifdef INET6 +ATF_TC_WITHOUT_HEAD(addrmerge_localhost_only6); +ATF_TC_BODY(addrmerge_localhost_only6, tc) +{ + char *maddr; + + /* getifaddrs will return localhost only */ + mock_lo0(); + + maddr = do_addrmerge6(NULL); + + /* We must return localhost if there is nothing better */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("::1.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_singlehomed6); +ATF_TC_BODY(addrmerge_singlehomed6, tc) +{ + char *maddr; + + /* getifaddrs will return one public address */ + mock_lo0(); + mock_igb0(); + + maddr = do_addrmerge6(NULL); + + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("2001:db8::2.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_one_addr_on_each_subnet6); +ATF_TC_BODY(addrmerge_one_addr_on_each_subnet6, tc) +{ + char *maddr; + + mock_lo0(); + mock_igb0(); + mock_igb2(); + + maddr = do_addrmerge6(NULL); + + /* We must return the address on the caller's subnet */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("2001:db8::2.3.46", maddr); +} + + +/* + * Like addrmerge_one_addr_on_each_subnet6, but getifaddrs returns a different + * order + */ +ATF_TC_WITHOUT_HEAD(addrmerge_one_addr_on_each_subnet6_rev); +ATF_TC_BODY(addrmerge_one_addr_on_each_subnet6_rev, tc) +{ + char *maddr; + + /* getifaddrs will return one public address on each of two subnets */ + mock_igb2(); + mock_igb0(); + mock_lo0(); + + maddr = do_addrmerge6(NULL); + + /* We must return the address on the caller's subnet */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("2001:db8::2.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_point2point6); +ATF_TC_BODY(addrmerge_point2point6, tc) +{ + char *maddr; + + /* getifaddrs will return one normal and one p2p address */ + mock_lo0(); + mock_igb2(); + mock_tun0(); + + maddr = do_addrmerge6(NULL); + + /* addrmerge should disprefer P2P interfaces */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("2001:db8:1::2.3.46", maddr); +} + +/* Like addrerge_point2point, but getifaddrs returns a different order */ +ATF_TC_WITHOUT_HEAD(addrmerge_point2point6_rev); +ATF_TC_BODY(addrmerge_point2point6_rev, tc) +{ + char *maddr; + + /* getifaddrs will return one normal and one p2p address */ + mock_tun0(); + mock_igb2(); + mock_lo0(); + + maddr = do_addrmerge6(NULL); + + /* addrmerge should disprefer P2P interfaces */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("2001:db8:1::2.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_bindip6); +ATF_TC_BODY(addrmerge_bindip6, tc) +{ + char *maddr; + + /* getifaddrs will return one public address on each of two subnets */ + mock_lo0(); + mock_igb0(); + mock_igb1(true); + + maddr = do_addrmerge6(NULL); + + /* We must return the address to which we are bound */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("2001:db8::3.3.46", maddr); +} + +/* Like addrerge_bindip, but getifaddrs returns a different order */ +ATF_TC_WITHOUT_HEAD(addrmerge_bindip6_rev); +ATF_TC_BODY(addrmerge_bindip6_rev, tc) +{ + char *maddr; + + /* getifaddrs will return one public address on each of two subnets */ + mock_igb1(true); + mock_igb0(); + mock_lo0(); + + maddr = do_addrmerge6(NULL); + + /* We must return the address to which we are bound */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("2001:db8::3.3.46", maddr); +} + +/* + * IPv6 Link Local addresses with the same scope id as the caller, if the caller + * is also a link local address, should be preferred + */ +ATF_TC_WITHOUT_HEAD(addrmerge_ipv6_linklocal); +ATF_TC_BODY(addrmerge_ipv6_linklocal, tc) +{ + char *maddr; + + /* + * getifaddrs will return two link local addresses with the same netmask + * and prefix but different scope IDs + */ + mock_igb1(false); + mock_igb0(); + mock_lo0(); + + maddr = do_addrmerge6_ll(); + + /* We must return the address to which we are bound */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("fe80::2.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_ipv6_linklocal_rev); +ATF_TC_BODY(addrmerge_ipv6_linklocal_rev, tc) +{ + char *maddr; + + /* + * getifaddrs will return two link local addresses with the same netmask + * and prefix but different scope IDs + */ + mock_lo0(); + mock_igb0(); + mock_igb1(false); + + maddr = do_addrmerge6_ll(); + + /* We must return the address to which we are bound */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("fe80::2.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_recvdstaddr6); +ATF_TC_BODY(addrmerge_recvdstaddr6, tc) +{ + char *maddr; + + mock_lo0(); + mock_igb0(); + mock_igb1(false); + + maddr = do_addrmerge6("2001:db8::2.3.46"); + + /* We must return the address on which the request was received */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("2001:db8::2.3.46", maddr); +} + +ATF_TC_WITHOUT_HEAD(addrmerge_recvdstaddr6_rev); +ATF_TC_BODY(addrmerge_recvdstaddr6_rev, tc) +{ + char *maddr; + + mock_igb1(false); + mock_igb0(); + mock_lo0(); + + maddr = do_addrmerge6("2001:db8::2.3.46"); + + /* We must return the address on which the request was received */ + ATF_REQUIRE(maddr != NULL); + ATF_CHECK_STREQ("2001:db8::2.3.46", maddr); +} +#endif /* INET6 */ + + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, addrmerge_noifaddrs); + ATF_TP_ADD_TC(tp, addrmerge_localhost_only); + ATF_TP_ADD_TC(tp, addrmerge_singlehomed); + ATF_TP_ADD_TC(tp, addrmerge_one_addr_on_each_subnet); + ATF_TP_ADD_TC(tp, addrmerge_one_addr_on_each_subnet_rev); + ATF_TP_ADD_TC(tp, addrmerge_point2point); + ATF_TP_ADD_TC(tp, addrmerge_point2point_rev); + ATF_TP_ADD_TC(tp, addrmerge_bindip); + ATF_TP_ADD_TC(tp, addrmerge_bindip_rev); + ATF_TP_ADD_TC(tp, addrmerge_recvdstaddr); + ATF_TP_ADD_TC(tp, addrmerge_recvdstaddr_rev); +#ifdef INET6 + ATF_TP_ADD_TC(tp, addrmerge_localhost_only6); + ATF_TP_ADD_TC(tp, addrmerge_singlehomed6); + ATF_TP_ADD_TC(tp, addrmerge_one_addr_on_each_subnet6); + ATF_TP_ADD_TC(tp, addrmerge_one_addr_on_each_subnet6_rev); + ATF_TP_ADD_TC(tp, addrmerge_point2point6); + ATF_TP_ADD_TC(tp, addrmerge_point2point6_rev); + ATF_TP_ADD_TC(tp, addrmerge_bindip6); + ATF_TP_ADD_TC(tp, addrmerge_bindip6_rev); + ATF_TP_ADD_TC(tp, addrmerge_ipv6_linklocal); + ATF_TP_ADD_TC(tp, addrmerge_ipv6_linklocal_rev); + ATF_TP_ADD_TC(tp, addrmerge_recvdstaddr6); + ATF_TP_ADD_TC(tp, addrmerge_recvdstaddr6_rev); +#endif + + return (atf_no_error()); +} diff --git a/usr.sbin/rpcbind/util.c b/usr.sbin/rpcbind/util.c index 8ddb13eec68d..da6a5abeea75 100644 --- a/usr.sbin/rpcbind/util.c +++ b/usr.sbin/rpcbind/util.c @@ -56,7 +56,7 @@ static struct sockaddr_in *local_in4; static struct sockaddr_in6 *local_in6; #endif -static int bitmaskcmp(void *, void *, void *, int); +static int bitmaskcmp(struct sockaddr *, struct sockaddr *, struct sockaddr *); /* * For all bits set in "mask", compare the corresponding bits in @@ -64,10 +64,34 @@ static int bitmaskcmp(void *, void *, void *, int); * match. */ static int -bitmaskcmp(void *dst, void *src, void *mask, int bytelen) +bitmaskcmp(struct sockaddr *dst, struct sockaddr *src, struct sockaddr *mask) { int i; - u_int8_t *p1 = dst, *p2 = src, *netmask = mask; + u_int8_t *p1, *p2, *netmask; + int bytelen; + + if (dst->sa_family != src->sa_family || + dst->sa_family != mask->sa_family) + return (1); + + switch (dst->sa_family) { + case AF_INET: + p1 = (uint8_t*) &SA2SINADDR(dst); + p2 = (uint8_t*) &SA2SINADDR(src); + netmask = (uint8_t*) &SA2SINADDR(mask); + bytelen = sizeof(struct in_addr); + break; +#ifdef INET6 + case AF_INET6: + p1 = (uint8_t*) &SA2SIN6ADDR(dst); + p2 = (uint8_t*) &SA2SIN6ADDR(src); + netmask = (uint8_t*) &SA2SIN6ADDR(mask); + bytelen = sizeof(struct in6_addr); + break; +#endif + default: + return (1); + } for (i = 0; i < bytelen; i++) if ((p1[i] & netmask[i]) != (p2[i] & netmask[i])) @@ -86,16 +110,18 @@ bitmaskcmp(void *dst, void *src, void *mask, int bytelen) * string which should be freed by the caller. On error, returns NULL. */ char * -addrmerge(struct netbuf *caller, char *serv_uaddr, char *clnt_uaddr, - char *netid) +addrmerge(struct netbuf *caller, const char *serv_uaddr, const char *clnt_uaddr, + const char *netid) { struct ifaddrs *ifap, *ifp = NULL, *bestif; struct netbuf *serv_nbp = NULL, *hint_nbp = NULL, tbuf; struct sockaddr *caller_sa, *hint_sa, *ifsa, *ifmasksa, *serv_sa; struct sockaddr_storage ss; struct netconfig *nconf; - char *caller_uaddr = NULL, *hint_uaddr = NULL; + char *caller_uaddr = NULL; + const char *hint_uaddr = NULL; char *ret = NULL; + int bestif_goodness; #ifdef ND_DEBUG if (debugging) @@ -139,19 +165,29 @@ addrmerge(struct netbuf *caller, char *serv_uaddr, char *clnt_uaddr, goto freeit; /* - * Loop through all interfaces. For each interface, see if it - * is either the loopback interface (which we always listen - * on) or is one of the addresses the program bound to (the - * wildcard by default, or a subset if -h is specified) and - * the network portion of its address is equal to that of the - * client. If so, we have found the interface that we want to - * use. + * Loop through all interface addresses. We are listening to an address + * if any of the following are true: + * a) It's a loopback address + * b) It was specified with the -h command line option + * c) There were no -h command line options. + * + * Among addresses on which we are listening, choose in order of + * preference an address that is: + * + * a) Equal to the hint + * b) A link local address with the same scope ID as the client's + * address, if the client's address is also link local + * c) An address on the same subnet as the client's address + * d) A non-localhost, non-p2p address + * e) Any usable address */ bestif = NULL; + bestif_goodness = 0; for (ifap = ifp; ifap != NULL; ifap = ifap->ifa_next) { ifsa = ifap->ifa_addr; ifmasksa = ifap->ifa_netmask; + /* Skip addresses where we don't listen */ if (ifsa == NULL || ifsa->sa_family != hint_sa->sa_family || !(ifap->ifa_flags & IFF_UP)) continue; @@ -159,21 +195,29 @@ addrmerge(struct netbuf *caller, char *serv_uaddr, char *clnt_uaddr, if (!(ifap->ifa_flags & IFF_LOOPBACK) && !listen_addr(ifsa)) continue; - switch (hint_sa->sa_family) { - case AF_INET: - /* - * If the hint address matches this interface - * address/netmask, then we're done. - */ - if (!bitmaskcmp(&SA2SINADDR(ifsa), - &SA2SINADDR(hint_sa), &SA2SINADDR(ifmasksa), - sizeof(struct in_addr))) { - bestif = ifap; - goto found; - } - break; + if ((hint_sa->sa_family == AF_INET) && + ((((struct sockaddr_in*)hint_sa)->sin_addr.s_addr == + ((struct sockaddr_in*)ifsa)->sin_addr.s_addr))) { + const int goodness = 4; + + bestif_goodness = goodness; + bestif = ifap; + goto found; + } #ifdef INET6 - case AF_INET6: + if ((hint_sa->sa_family == AF_INET6) && + (0 == memcmp(&((struct sockaddr_in6*)hint_sa)->sin6_addr, + &((struct sockaddr_in6*)ifsa)->sin6_addr, + sizeof(struct in6_addr))) && + (((struct sockaddr_in6*)hint_sa)->sin6_scope_id == + (((struct sockaddr_in6*)ifsa)->sin6_scope_id))) { + const int goodness = 4; + + bestif_goodness = goodness; + bestif = ifap; + goto found; + } + if (hint_sa->sa_family == AF_INET6) { /* * For v6 link local addresses, if the caller is on * a link-local address then use the scope id to see @@ -184,28 +228,33 @@ addrmerge(struct netbuf *caller, char *serv_uaddr, char *clnt_uaddr, IN6_IS_ADDR_LINKLOCAL(&SA2SIN6ADDR(hint_sa))) { if (SA2SIN6(ifsa)->sin6_scope_id == SA2SIN6(caller_sa)->sin6_scope_id) { - bestif = ifap; - goto found; - } - } else if (!bitmaskcmp(&SA2SIN6ADDR(ifsa), - &SA2SIN6ADDR(hint_sa), &SA2SIN6ADDR(ifmasksa), - sizeof(struct in6_addr))) { - bestif = ifap; - goto found; - } - break; -#endif - default: - continue; - } + const int goodness = 3; - /* - * Remember the first possibly useful interface, preferring - * "normal" to point-to-point and loopback ones. - */ - if (bestif == NULL || - (!(ifap->ifa_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) && - (bestif->ifa_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)))) + if (bestif_goodness < goodness) { + bestif = ifap; + bestif_goodness = goodness; + } + } + } + } +#endif /* INET6 */ + if (0 == bitmaskcmp(hint_sa, ifsa, ifmasksa)) { + const int goodness = 2; + + if (bestif_goodness < goodness) { + bestif = ifap; + bestif_goodness = goodness; + } + } + if (!(ifap->ifa_flags & (IFF_LOOPBACK | IFF_POINTOPOINT))) { + const int goodness = 1; + + if (bestif_goodness < goodness) { + bestif = ifap; + bestif_goodness = goodness; + } + } + if (bestif == NULL) bestif = ifap; } if (bestif == NULL) diff --git a/usr.sbin/services_mkdb/services_mkdb.c b/usr.sbin/services_mkdb/services_mkdb.c index a91340e7d7d2..9ea66deed689 100644 --- a/usr.sbin/services_mkdb/services_mkdb.c +++ b/usr.sbin/services_mkdb/services_mkdb.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -91,6 +92,8 @@ main(int argc, char *argv[]) size_t cnt = 0; StringList *sl, ***svc; size_t port, proto; + char *dbname_dir; + int dbname_dir_fd = -1; setprogname(argv[0]); @@ -138,7 +141,7 @@ main(int argc, char *argv[]) err(1, "Cannot install exit handler"); (void)snprintf(tname, sizeof(tname), "%s.tmp", dbname); - db = dbopen(tname, O_RDWR | O_CREAT | O_EXCL, + db = dbopen(tname, O_RDWR | O_CREAT | O_EXCL | O_SYNC, (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH), DB_HASH, &hinfo); if (!db) err(1, "Error opening temporary database `%s'", tname); @@ -164,8 +167,21 @@ main(int argc, char *argv[]) if ((db->close)(db)) err(1, "Error closing temporary database `%s'", tname); - if (rename(tname, dbname) == -1) + /* + * Make sure file is safe on disk. To improve performance we will call + * fsync() to the directory where file lies + */ + if (rename(tname, dbname) == -1 || + (dbname_dir = dirname(dbname)) == NULL || + (dbname_dir_fd = open(dbname_dir, O_RDONLY|O_DIRECTORY)) == -1 || + fsync(dbname_dir_fd) != 0) { + if (dbname_dir_fd != -1) + close(dbname_dir_fd); err(1, "Cannot rename `%s' to `%s'", tname, dbname); + } + + if (dbname_dir_fd != -1) + close(dbname_dir_fd); return 0; }