From 7f65185940d1cc62c0e76d6fab92236ca75d42d5 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 17 Sep 2019 08:09:39 +0000 Subject: [PATCH 01/46] vfs: fix braino resulting in NULL pointer deref in r352424 The breakage was added after all the testing and the testing which followed was not sufficient to find it. Reported by: pho Sponsored by: The FreeBSD Foundation --- sys/kern/vfs_default.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 163a617a3ec5..2fd95fbc8b46 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -613,11 +613,13 @@ vop_stdgetwritemount(ap) vfs_op_thread_exit(mp); } else { MNT_ILOCK(mp); - if (mp == vp->v_mount) + if (mp == vp->v_mount) { MNT_REF(mp); - else + MNT_IUNLOCK(mp); + } else { + MNT_IUNLOCK(mp); mp = NULL; - MNT_IUNLOCK(mp); + } } *(ap->a_mpp) = mp; return (0); From 3c193115440ea21d7811c20326488be4346ed4a2 Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Tue, 17 Sep 2019 09:46:42 +0000 Subject: [PATCH 02/46] Only allow a SCTP-AUTH shared key to be updated by the application if it is not deactivated and not used. This avoids a use-after-free problem. Reported by: da_cheng_shao@yeah.net MFC after: 3 days --- sys/netinet/sctp_auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netinet/sctp_auth.c b/sys/netinet/sctp_auth.c index d379dd0a143e..f286ebf9d8d4 100644 --- a/sys/netinet/sctp_auth.c +++ b/sys/netinet/sctp_auth.c @@ -523,7 +523,7 @@ sctp_insert_sharedkey(struct sctp_keyhead *shared_keys, } else if (new_skey->keyid == skey->keyid) { /* replace the existing key */ /* verify this key *can* be replaced */ - if ((skey->deactivated) && (skey->refcount > 1)) { + if ((skey->deactivated) || (skey->refcount > 1)) { SCTPDBG(SCTP_DEBUG_AUTH1, "can't replace shared key id %u\n", new_skey->keyid); From 144c4ca0398fc9cb2c7e602baba7009871ef8ce8 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Tue, 17 Sep 2019 09:47:35 +0000 Subject: [PATCH 03/46] loader: factor out guard location setup from Realloc To simplify and make it easier to read, use ga_Bytes field from guard structure. --- stand/libsa/zalloc_malloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stand/libsa/zalloc_malloc.c b/stand/libsa/zalloc_malloc.c index 2cc14d80641d..f0eddb123178 100644 --- a/stand/libsa/zalloc_malloc.c +++ b/stand/libsa/zalloc_malloc.c @@ -156,7 +156,9 @@ Realloc(void *ptr, size_t size, const char *file, int line) if ((res = Malloc(size, file, line)) != NULL) { if (ptr) { - old = *(size_t *)((char *)ptr - MALLOCALIGN) - MALLOCALIGN; + Guard *g = (Guard *)((char *)ptr - MALLOCALIGN); + + old = g->ga_Bytes - MALLOCALIGN; if (old < size) bcopy(ptr, res, old); else From e57c0c2afbaef051b01022588ed54a6c4ace79da Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Tue, 17 Sep 2019 11:35:53 +0000 Subject: [PATCH 04/46] loader: cstyle cleanup libsa zalloc sources Clean up libstand zalloc* sources. Note that it is not 100% whitespace cleanup. I also reduced block in znalloc and zfree as those were obvious simplifications and did help to save one level of indent. --- stand/libsa/zalloc.c | 310 ++++++++++++++++++------------------ stand/libsa/zalloc_defs.h | 29 ++-- stand/libsa/zalloc_malloc.c | 161 ++++++++++--------- stand/libsa/zalloc_mem.h | 23 +-- stand/libsa/zalloc_protos.h | 7 +- 5 files changed, 270 insertions(+), 260 deletions(-) diff --git a/stand/libsa/zalloc.c b/stand/libsa/zalloc.c index 4d1ec6291210..f359e1830aa4 100644 --- a/stand/libsa/zalloc.c +++ b/stand/libsa/zalloc.c @@ -1,5 +1,5 @@ /* - * This module derived from code donated to the FreeBSD Project by + * This module derived from code donated to the FreeBSD Project by * Matthew Dillon * * Copyright (c) 1998 The FreeBSD Project @@ -31,10 +31,10 @@ __FBSDID("$FreeBSD$"); /* - * LIB/MEMORY/ZALLOC.C - self contained low-overhead memory pool/allocation + * LIB/MEMORY/ZALLOC.C - self contained low-overhead memory pool/allocation * subsystem * - * This subsystem implements memory pools and memory allocation + * This subsystem implements memory pools and memory allocation * routines. * * Pools are managed via a linked list of 'free' areas. Allocating @@ -43,7 +43,7 @@ __FBSDID("$FreeBSD$"); * to allocate the entire pool without incuring any structural overhead. * * The system works best when allocating similarly-sized chunks of - * memory. Care must be taken to avoid fragmentation when + * memory. Care must be taken to avoid fragmentation when * allocating/deallocating dissimilar chunks. * * When a memory pool is first allocated, the entire pool is marked as @@ -53,7 +53,7 @@ __FBSDID("$FreeBSD$"); * available. * * z[n]xalloc() works like z[n]alloc() but the allocation is made from - * within the specified address range. If the segment could not be + * within the specified address range. If the segment could not be * allocated, NULL is returned. WARNING! The address range will be * aligned to an 8 or 16 byte boundry depending on the cpu so if you * give an unaligned address range, unexpected results may occur. @@ -88,56 +88,54 @@ typedef char assert_align[(sizeof(struct MemNode) <= MALLOCALIGN) ? 1 : -1]; void * znalloc(MemPool *mp, uintptr_t bytes) { - /* - * align according to pool object size (can be 0). This is - * inclusive of the MEMNODE_SIZE_MASK minimum alignment. - * - */ - bytes = (bytes + MEMNODE_SIZE_MASK) & ~MEMNODE_SIZE_MASK; - - if (bytes == 0) - return((void *)-1); - - /* - * locate freelist entry big enough to hold the object. If all objects - * are the same size, this is a constant-time function. - */ - - if (bytes <= mp->mp_Size - mp->mp_Used) { MemNode **pmn; MemNode *mn; - for (pmn = &mp->mp_First; (mn=*pmn) != NULL; pmn = &mn->mr_Next) { - if (bytes > mn->mr_Bytes) - continue; + /* + * align according to pool object size (can be 0). This is + * inclusive of the MEMNODE_SIZE_MASK minimum alignment. + * + */ + bytes = (bytes + MEMNODE_SIZE_MASK) & ~MEMNODE_SIZE_MASK; - /* - * Cut a chunk of memory out of the beginning of this - * block and fixup the link appropriately. - */ + if (bytes == 0) + return ((void *)-1); - { + /* + * locate freelist entry big enough to hold the object. If all objects + * are the same size, this is a constant-time function. + */ + + if (bytes > mp->mp_Size - mp->mp_Used) + return (NULL); + + for (pmn = &mp->mp_First; (mn = *pmn) != NULL; pmn = &mn->mr_Next) { char *ptr = (char *)mn; + if (bytes > mn->mr_Bytes) + continue; + + /* + * Cut a chunk of memory out of the beginning of this + * block and fixup the link appropriately. + */ if (mn->mr_Bytes == bytes) { - *pmn = mn->mr_Next; + *pmn = mn->mr_Next; } else { - mn = (MemNode *)((char *)mn + bytes); - mn->mr_Next = ((MemNode *)ptr)->mr_Next; - mn->mr_Bytes = ((MemNode *)ptr)->mr_Bytes - bytes; - *pmn = mn; + mn = (MemNode *)((char *)mn + bytes); + mn->mr_Next = ((MemNode *)ptr)->mr_Next; + mn->mr_Bytes = ((MemNode *)ptr)->mr_Bytes - bytes; + *pmn = mn; } mp->mp_Used += bytes; return(ptr); - } } - } - /* - * Memory pool is full, return NULL. - */ + /* + * Memory pool is full, return NULL. + */ - return(NULL); + return (NULL); } /* @@ -147,99 +145,97 @@ znalloc(MemPool *mp, uintptr_t bytes) void zfree(MemPool *mp, void *ptr, uintptr_t bytes) { - /* - * align according to pool object size (can be 0). This is - * inclusive of the MEMNODE_SIZE_MASK minimum alignment. - */ - bytes = (bytes + MEMNODE_SIZE_MASK) & ~MEMNODE_SIZE_MASK; - - if (bytes == 0) - return; - - /* - * panic if illegal pointer - */ - - if ((char *)ptr < (char *)mp->mp_Base || - (char *)ptr + bytes > (char *)mp->mp_End || - ((uintptr_t)ptr & MEMNODE_SIZE_MASK) != 0) - panic("zfree(%p,%ju): wild pointer", ptr, (uintmax_t)bytes); - - /* - * free the segment - */ - - { MemNode **pmn; MemNode *mn; + /* + * align according to pool object size (can be 0). This is + * inclusive of the MEMNODE_SIZE_MASK minimum alignment. + */ + bytes = (bytes + MEMNODE_SIZE_MASK) & ~MEMNODE_SIZE_MASK; + + if (bytes == 0) + return; + + /* + * panic if illegal pointer + */ + + if ((char *)ptr < (char *)mp->mp_Base || + (char *)ptr + bytes > (char *)mp->mp_End || + ((uintptr_t)ptr & MEMNODE_SIZE_MASK) != 0) + panic("zfree(%p,%ju): wild pointer", ptr, (uintmax_t)bytes); + + /* + * free the segment + */ mp->mp_Used -= bytes; for (pmn = &mp->mp_First; (mn = *pmn) != NULL; pmn = &mn->mr_Next) { - /* - * If area between last node and current node - * - check range - * - check merge with next area - * - check merge with previous area - */ - if ((char *)ptr <= (char *)mn) { /* - * range check + * If area between last node and current node + * - check range + * - check merge with next area + * - check merge with previous area */ - if ((char *)ptr + bytes > (char *)mn) { - panic("zfree(%p,%ju): corrupt memlist1", ptr, - (uintmax_t)bytes); + if ((char *)ptr <= (char *)mn) { + /* + * range check + */ + if ((char *)ptr + bytes > (char *)mn) { + panic("zfree(%p,%ju): corrupt memlist1", ptr, + (uintmax_t)bytes); + } + + /* + * merge against next area or create independant area + */ + + if ((char *)ptr + bytes == (char *)mn) { + ((MemNode *)ptr)->mr_Next = mn->mr_Next; + ((MemNode *)ptr)->mr_Bytes = + bytes + mn->mr_Bytes; + } else { + ((MemNode *)ptr)->mr_Next = mn; + ((MemNode *)ptr)->mr_Bytes = bytes; + } + *pmn = mn = (MemNode *)ptr; + + /* + * merge against previous area (if there is a previous + * area). + */ + + if (pmn != &mp->mp_First) { + if ((char *)pmn + ((MemNode*)pmn)->mr_Bytes == + (char *)ptr) { + ((MemNode *)pmn)->mr_Next = mn->mr_Next; + ((MemNode *)pmn)->mr_Bytes += + mn->mr_Bytes; + mn = (MemNode *)pmn; + } + } + return; } - - /* - * merge against next area or create independant area - */ - - if ((char *)ptr + bytes == (char *)mn) { - ((MemNode *)ptr)->mr_Next = mn->mr_Next; - ((MemNode *)ptr)->mr_Bytes= bytes + mn->mr_Bytes; - } else { - ((MemNode *)ptr)->mr_Next = mn; - ((MemNode *)ptr)->mr_Bytes= bytes; + if ((char *)ptr < (char *)mn + mn->mr_Bytes) { + panic("zfree(%p,%ju): corrupt memlist2", ptr, + (uintmax_t)bytes); } - *pmn = mn = (MemNode *)ptr; - - /* - * merge against previous area (if there is a previous - * area). - */ - - if (pmn != &mp->mp_First) { - if ((char*)pmn + ((MemNode*)pmn)->mr_Bytes == (char*)ptr) { - ((MemNode *)pmn)->mr_Next = mn->mr_Next; - ((MemNode *)pmn)->mr_Bytes += mn->mr_Bytes; - mn = (MemNode *)pmn; - } - } - return; - /* NOT REACHED */ - } - if ((char *)ptr < (char *)mn + mn->mr_Bytes) { - panic("zfree(%p,%ju): corrupt memlist2", ptr, - (uintmax_t)bytes); - } } /* * We are beyond the last MemNode, append new MemNode. Merge against * previous area if possible. */ - if (pmn == &mp->mp_First || - (char *)pmn + ((MemNode *)pmn)->mr_Bytes != (char *)ptr - ) { - ((MemNode *)ptr)->mr_Next = NULL; - ((MemNode *)ptr)->mr_Bytes = bytes; - *pmn = (MemNode *)ptr; - mn = (MemNode *)ptr; + if (pmn == &mp->mp_First || + (char *)pmn + ((MemNode *)pmn)->mr_Bytes != (char *)ptr) { + ((MemNode *)ptr)->mr_Next = NULL; + ((MemNode *)ptr)->mr_Bytes = bytes; + *pmn = (MemNode *)ptr; + mn = (MemNode *)ptr; } else { - ((MemNode *)pmn)->mr_Bytes += bytes; - mn = (MemNode *)pmn; + ((MemNode *)pmn)->mr_Bytes += bytes; + mn = (MemNode *)pmn; } - } } /* @@ -256,26 +252,26 @@ zfree(MemPool *mp, void *ptr, uintptr_t bytes) void zextendPool(MemPool *mp, void *base, uintptr_t bytes) { - if (mp->mp_Size == 0) { - mp->mp_Base = base; - mp->mp_Used = bytes; - mp->mp_End = (char *)base + bytes; - mp->mp_Size = bytes; - } else { - void *pend = (char *)mp->mp_Base + mp->mp_Size; + if (mp->mp_Size == 0) { + mp->mp_Base = base; + mp->mp_Used = bytes; + mp->mp_End = (char *)base + bytes; + mp->mp_Size = bytes; + } else { + void *pend = (char *)mp->mp_Base + mp->mp_Size; - if (base < mp->mp_Base) { - mp->mp_Size += (char *)mp->mp_Base - (char *)base; - mp->mp_Used += (char *)mp->mp_Base - (char *)base; - mp->mp_Base = base; + if (base < mp->mp_Base) { + mp->mp_Size += (char *)mp->mp_Base - (char *)base; + mp->mp_Used += (char *)mp->mp_Base - (char *)base; + mp->mp_Base = base; + } + base = (char *)base + bytes; + if (base > pend) { + mp->mp_Size += (char *)base - (char *)pend; + mp->mp_Used += (char *)base - (char *)pend; + mp->mp_End = (char *)base; + } } - base = (char *)base + bytes; - if (base > pend) { - mp->mp_Size += (char *)base - (char *)pend; - mp->mp_Used += (char *)base - (char *)pend; - mp->mp_End = (char *)base; - } - } } #ifdef ZALLOCDEBUG @@ -283,34 +279,32 @@ zextendPool(MemPool *mp, void *base, uintptr_t bytes) void zallocstats(MemPool *mp) { - int abytes = 0; - int hbytes = 0; - int fcount = 0; - MemNode *mn; + int abytes = 0; + int hbytes = 0; + int fcount = 0; + MemNode *mn; - printf("%d bytes reserved", (int) mp->mp_Size); + printf("%d bytes reserved", (int)mp->mp_Size); - mn = mp->mp_First; + mn = mp->mp_First; - if ((void *)mn != (void *)mp->mp_Base) { - abytes += (char *)mn - (char *)mp->mp_Base; - } - - while (mn) { - if ((char *)mn + mn->mr_Bytes != mp->mp_End) { - hbytes += mn->mr_Bytes; - ++fcount; + if ((void *)mn != (void *)mp->mp_Base) { + abytes += (char *)mn - (char *)mp->mp_Base; } - if (mn->mr_Next) - abytes += (char *)mn->mr_Next - ((char *)mn + mn->mr_Bytes); - mn = mn->mr_Next; - } - printf(" %d bytes allocated\n%d fragments (%d bytes fragmented)\n", - abytes, - fcount, - hbytes - ); + + while (mn != NULL) { + if ((char *)mn + mn->mr_Bytes != mp->mp_End) { + hbytes += mn->mr_Bytes; + ++fcount; + } + if (mn->mr_Next != NULL) { + abytes += (char *)mn->mr_Next - + ((char *)mn + mn->mr_Bytes); + } + mn = mn->mr_Next; + } + printf(" %d bytes allocated\n%d fragments (%d bytes fragmented)\n", + abytes, fcount, hbytes); } #endif - diff --git a/stand/libsa/zalloc_defs.h b/stand/libsa/zalloc_defs.h index 7f2cc1202c9b..bb7c593ba822 100644 --- a/stand/libsa/zalloc_defs.h +++ b/stand/libsa/zalloc_defs.h @@ -1,5 +1,5 @@ /* - * This module derived from code donated to the FreeBSD Project by + * This module derived from code donated to the FreeBSD Project by * Matthew Dillon * * Copyright (c) 1998 The FreeBSD Project @@ -33,23 +33,26 @@ * DEFS.H */ -#define USEGUARD /* use stard/end guard bytes */ -#define USEENDGUARD -#define DMALLOCDEBUG /* add debugging code to gather stats */ -#define ZALLOCDEBUG +#ifndef _ZALLOC_DEFS_H +#define _ZALLOC_DEFS_H + +#define USEGUARD /* use stard/end guard bytes */ +#define USEENDGUARD +#define DMALLOCDEBUG /* add debugging code to gather stats */ +#define ZALLOCDEBUG #include #include "stand.h" #include "zalloc_mem.h" -#define Library extern +#define Library extern /* * block extension for sbrk() */ -#define BLKEXTEND (4 * 1024) -#define BLKEXTENDMASK (BLKEXTEND - 1) +#define BLKEXTEND (4 * 1024) +#define BLKEXTENDMASK (BLKEXTEND - 1) /* * Required malloc alignment. @@ -68,11 +71,13 @@ #define MALLOCALIGN_MASK (MALLOCALIGN - 1) typedef struct Guard { - size_t ga_Bytes; - size_t ga_Magic; /* must be at least 32 bits */ + size_t ga_Bytes; + size_t ga_Magic; /* must be at least 32 bits */ } Guard; -#define GAMAGIC 0x55FF44FD -#define GAFREE 0x5F54F4DF +#define GAMAGIC 0x55FF44FD +#define GAFREE 0x5F54F4DF #include "zalloc_protos.h" + +#endif /* _ZALLOC_DEFS_H */ diff --git a/stand/libsa/zalloc_malloc.c b/stand/libsa/zalloc_malloc.c index f0eddb123178..da68cf331961 100644 --- a/stand/libsa/zalloc_malloc.c +++ b/stand/libsa/zalloc_malloc.c @@ -1,5 +1,5 @@ /* - * This module derived from code donated to the FreeBSD Project by + * This module derived from code donated to the FreeBSD Project by * Matthew Dillon * * Copyright (c) 1998 The FreeBSD Project @@ -53,141 +53,145 @@ void mallocstats(void); void * Malloc(size_t bytes, const char *file, int line) { - Guard *res; + Guard *res; - if (bytes == 0) - return (NULL); + if (bytes == 0) + return (NULL); #ifdef USEENDGUARD - bytes += MALLOCALIGN + 1; + bytes += MALLOCALIGN + 1; #else - bytes += MALLOCALIGN; + bytes += MALLOCALIGN; #endif - while ((res = znalloc(&MallocPool, bytes)) == NULL) { - int incr = (bytes + BLKEXTENDMASK) & ~BLKEXTENDMASK; - char *base; + while ((res = znalloc(&MallocPool, bytes)) == NULL) { + int incr = (bytes + BLKEXTENDMASK) & ~BLKEXTENDMASK; + char *base; - if ((base = sbrk(incr)) == (char *)-1) - return(NULL); - zextendPool(&MallocPool, base, incr); - zfree(&MallocPool, base, incr); - } + if ((base = sbrk(incr)) == (char *)-1) + return (NULL); + zextendPool(&MallocPool, base, incr); + zfree(&MallocPool, base, incr); + } #ifdef DMALLOCDEBUG - if (++MallocCount > MallocMax) - MallocMax = MallocCount; + if (++MallocCount > MallocMax) + MallocMax = MallocCount; #endif #ifdef USEGUARD - res->ga_Magic = GAMAGIC; + res->ga_Magic = GAMAGIC; #endif - res->ga_Bytes = bytes; + res->ga_Bytes = bytes; #ifdef USEENDGUARD - *((signed char *)res + bytes - 1) = -2; + *((signed char *)res + bytes - 1) = -2; #endif - return((char *)res + MALLOCALIGN); + return ((char *)res + MALLOCALIGN); } void Free(void *ptr, const char *file, int line) { - size_t bytes; + size_t bytes; - if (ptr != NULL) { - Guard *res = (void *)((char *)ptr - MALLOCALIGN); + if (ptr != NULL) { + Guard *res = (void *)((char *)ptr - MALLOCALIGN); - if (file == NULL) - file = "unknown"; + if (file == NULL) + file = "unknown"; #ifdef USEGUARD - if (res->ga_Magic == GAFREE) { - printf("free: duplicate free @ %p from %s:%d\n", ptr, file, line); - return; - } - if (res->ga_Magic != GAMAGIC) - panic("free: guard1 fail @ %p from %s:%d", ptr, file, line); - res->ga_Magic = GAFREE; + if (res->ga_Magic == GAFREE) { + printf("free: duplicate free @ %p from %s:%d\n", + ptr, file, line); + return; + } + if (res->ga_Magic != GAMAGIC) + panic("free: guard1 fail @ %p from %s:%d", + ptr, file, line); + res->ga_Magic = GAFREE; #endif #ifdef USEENDGUARD - if (*((signed char *)res + res->ga_Bytes - 1) == -1) { - printf("free: duplicate2 free @ %p from %s:%d\n", ptr, file, line); - return; - } - if (*((signed char *)res + res->ga_Bytes - 1) != -2) - panic("free: guard2 fail @ %p + %zu from %s:%d", ptr, res->ga_Bytes - MALLOCALIGN, file, line); - *((signed char *)res + res->ga_Bytes - 1) = -1; + if (*((signed char *)res + res->ga_Bytes - 1) == -1) { + printf("free: duplicate2 free @ %p from %s:%d\n", + ptr, file, line); + return; + } + if (*((signed char *)res + res->ga_Bytes - 1) != -2) + panic("free: guard2 fail @ %p + %zu from %s:%d", + ptr, res->ga_Bytes - MALLOCALIGN, file, line); + *((signed char *)res + res->ga_Bytes - 1) = -1; #endif - bytes = res->ga_Bytes; - zfree(&MallocPool, res, bytes); + bytes = res->ga_Bytes; + zfree(&MallocPool, res, bytes); #ifdef DMALLOCDEBUG - --MallocCount; + --MallocCount; #endif - } + } } void * Calloc(size_t n1, size_t n2, const char *file, int line) { - uintptr_t bytes = (uintptr_t)n1 * (uintptr_t)n2; - void *res; + uintptr_t bytes = (uintptr_t)n1 * (uintptr_t)n2; + void *res; - if ((res = Malloc(bytes, file, line)) != NULL) { - bzero(res, bytes); + if ((res = Malloc(bytes, file, line)) != NULL) { + bzero(res, bytes); #ifdef DMALLOCDEBUG - if (++MallocCount > MallocMax) - MallocMax = MallocCount; + if (++MallocCount > MallocMax) + MallocMax = MallocCount; #endif - } - return(res); + } + return (res); } /* * realloc() - I could be fancier here and free the old buffer before - * allocating the new one (saving potential fragmentation + * allocating the new one (saving potential fragmentation * and potential buffer copies). But I don't bother. */ void * Realloc(void *ptr, size_t size, const char *file, int line) { - void *res; - size_t old; + void *res; + size_t old; - if ((res = Malloc(size, file, line)) != NULL) { - if (ptr) { - Guard *g = (Guard *)((char *)ptr - MALLOCALIGN); + if ((res = Malloc(size, file, line)) != NULL) { + if (ptr != NULL) { + Guard *g = (Guard *)((char *)ptr - MALLOCALIGN); - old = g->ga_Bytes - MALLOCALIGN; - if (old < size) - bcopy(ptr, res, old); - else - bcopy(ptr, res, size); - Free(ptr, file, line); - } else { + old = g->ga_Bytes - MALLOCALIGN; + if (old < size) + bcopy(ptr, res, old); + else + bcopy(ptr, res, size); + Free(ptr, file, line); + } else { #ifdef DMALLOCDEBUG - if (++MallocCount > MallocMax) - MallocMax = MallocCount; + if (++MallocCount > MallocMax) + MallocMax = MallocCount; #ifdef EXITSTATS - if (DidAtExit == 0) { - DidAtExit = 1; - atexit(mallocstats); - } + if (DidAtExit == 0) { + DidAtExit = 1; + atexit(mallocstats); + } #endif #endif + } } - } - return(res); + return (res); } void * Reallocf(void *ptr, size_t size, const char *file, int line) { - void *res; + void *res; - if ((res = Realloc(ptr, size, file, line)) == NULL) - Free(ptr, file, line); - return(res); + if ((res = Realloc(ptr, size, file, line)) == NULL) + Free(ptr, file, line); + return (res); } #ifdef DMALLOCDEBUG @@ -195,11 +199,10 @@ Reallocf(void *ptr, size_t size, const char *file, int line) void mallocstats(void) { - printf("Active Allocations: %d/%d\n", MallocCount, MallocMax); + printf("Active Allocations: %d/%d\n", MallocCount, MallocMax); #ifdef ZALLOCDEBUG - zallocstats(&MallocPool); + zallocstats(&MallocPool); #endif } #endif - diff --git a/stand/libsa/zalloc_mem.h b/stand/libsa/zalloc_mem.h index 26d388dfc75e..5a6854105974 100644 --- a/stand/libsa/zalloc_mem.h +++ b/stand/libsa/zalloc_mem.h @@ -1,5 +1,5 @@ /* - * This module derived from code donated to the FreeBSD Project by + * This module derived from code donated to the FreeBSD Project by * Matthew Dillon * * Copyright (c) 1998 The FreeBSD Project @@ -34,20 +34,23 @@ * * Basic memory pool / memory node structures. */ +#ifndef _ZALLOC_MEM_H +#define _ZALLOC_MEM_H typedef struct MemNode { - struct MemNode *mr_Next; - uintptr_t mr_Bytes; + struct MemNode *mr_Next; + uintptr_t mr_Bytes; } MemNode; typedef struct MemPool { - void *mp_Base; - void *mp_End; - MemNode *mp_First; - uintptr_t mp_Size; - uintptr_t mp_Used; + void *mp_Base; + void *mp_End; + MemNode *mp_First; + uintptr_t mp_Size; + uintptr_t mp_Used; } MemPool; -#define ZNOTE_FREE 0 -#define ZNOTE_REUSE 1 +#define ZNOTE_FREE 0 +#define ZNOTE_REUSE 1 +#endif /* _ZALLOC_MEM_H */ diff --git a/stand/libsa/zalloc_protos.h b/stand/libsa/zalloc_protos.h index 53a40e400dcd..aba3cb32b9b3 100644 --- a/stand/libsa/zalloc_protos.h +++ b/stand/libsa/zalloc_protos.h @@ -1,5 +1,5 @@ /* - * This module derived from code donated to the FreeBSD Project by + * This module derived from code donated to the FreeBSD Project by * Matthew Dillon * * Copyright (c) 1998 The FreeBSD Project @@ -29,7 +29,12 @@ * $FreeBSD$ */ +#ifndef _ZALLOC_PROTOS_H +#define _ZALLOC_PROTOS_H + Library void *znalloc(struct MemPool *mpool, uintptr_t bytes); Library void zfree(struct MemPool *mpool, void *ptr, uintptr_t bytes); Library void zextendPool(MemPool *mp, void *base, uintptr_t bytes); Library void zallocstats(struct MemPool *mp); + +#endif /* _ZALLOC_PROTOS_H */ From 62ea4c11d597cceccfca27c67b35365e4ea54d94 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Tue, 17 Sep 2019 13:07:02 +0000 Subject: [PATCH 05/46] loader: stand.h should define reallocf as Reallocf Use the same approach as other zalloc functions. --- stand/libsa/stand.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stand/libsa/stand.h b/stand/libsa/stand.h index 3d0cb4ff0c50..82a28b34380c 100644 --- a/stand/libsa/stand.h +++ b/stand/libsa/stand.h @@ -264,9 +264,6 @@ static __inline int tolower(int c) extern void setheap(void *base, void *top); extern char *sbrk(int incr); -extern void *reallocf(void *ptr, size_t size); -extern void mallocstats(void); - extern int printf(const char *fmt, ...) __printflike(1, 2); extern int asprintf(char **buf, const char *cfmt, ...) __printflike(2, 3); extern int sprintf(char *buf, const char *cfmt, ...) __printflike(2, 3); @@ -433,17 +430,20 @@ void *Malloc(size_t, const char *, int); void *Calloc(size_t, size_t, const char *, int); void *Realloc(void *, size_t, const char *, int); void Free(void *, const char *, int); +extern void mallocstats(void); #ifdef DEBUG_MALLOC #define malloc(x) Malloc(x, __FILE__, __LINE__) #define calloc(x, y) Calloc(x, y, __FILE__, __LINE__) #define free(x) Free(x, __FILE__, __LINE__) #define realloc(x, y) Realloc(x, y, __FILE__, __LINE__) +#define reallocf(x, y) Reallocf(x, y, __FILE__, __LINE__) #else #define malloc(x) Malloc(x, NULL, 0) #define calloc(x, y) Calloc(x, y, NULL, 0) #define free(x) Free(x, NULL, 0) #define realloc(x, y) Realloc(x, y, NULL, 0) +#define reallocf(x, y) Reallocf(x, y, NULL, 0) #endif #endif /* STAND_H */ From 11db1a1654a7a8dedaee5330cb8d348bf6524787 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Tue, 17 Sep 2019 13:15:27 +0000 Subject: [PATCH 06/46] loader: add memalign() to libsa Implement memalign(size_t alignment, size_t size) to allocate aligned memory. --- stand/libsa/stand.h | 4 ++++ stand/libsa/zalloc.c | 32 ++++++++++++++++++++++++++++++-- stand/libsa/zalloc_malloc.c | 22 ++++++++++++++++++++-- stand/libsa/zalloc_protos.h | 2 +- 4 files changed, 55 insertions(+), 5 deletions(-) diff --git a/stand/libsa/stand.h b/stand/libsa/stand.h index 82a28b34380c..c9e691b55103 100644 --- a/stand/libsa/stand.h +++ b/stand/libsa/stand.h @@ -427,19 +427,23 @@ extern uint16_t ntohs(uint16_t); #endif void *Malloc(size_t, const char *, int); +void *Memalign(size_t, size_t, const char *, int); void *Calloc(size_t, size_t, const char *, int); void *Realloc(void *, size_t, const char *, int); +void *Reallocf(void *, size_t, const char *, int); void Free(void *, const char *, int); extern void mallocstats(void); #ifdef DEBUG_MALLOC #define malloc(x) Malloc(x, __FILE__, __LINE__) +#define memalign(x, y) Memalign(x, y, __FILE__, __LINE__) #define calloc(x, y) Calloc(x, y, __FILE__, __LINE__) #define free(x) Free(x, __FILE__, __LINE__) #define realloc(x, y) Realloc(x, y, __FILE__, __LINE__) #define reallocf(x, y) Reallocf(x, y, __FILE__, __LINE__) #else #define malloc(x) Malloc(x, NULL, 0) +#define memalign(x, y) Memalign(x, y, NULL, 0) #define calloc(x, y) Calloc(x, y, NULL, 0) #define free(x) Free(x, NULL, 0) #define realloc(x, y) Realloc(x, y, NULL, 0) diff --git a/stand/libsa/zalloc.c b/stand/libsa/zalloc.c index f359e1830aa4..371a1449409b 100644 --- a/stand/libsa/zalloc.c +++ b/stand/libsa/zalloc.c @@ -30,6 +30,8 @@ #include __FBSDID("$FreeBSD$"); +#include + /* * LIB/MEMORY/ZALLOC.C - self contained low-overhead memory pool/allocation * subsystem @@ -86,7 +88,7 @@ typedef char assert_align[(sizeof(struct MemNode) <= MALLOCALIGN) ? 1 : -1]; */ void * -znalloc(MemPool *mp, uintptr_t bytes) +znalloc(MemPool *mp, uintptr_t bytes, size_t align) { MemNode **pmn; MemNode *mn; @@ -111,14 +113,40 @@ znalloc(MemPool *mp, uintptr_t bytes) for (pmn = &mp->mp_First; (mn = *pmn) != NULL; pmn = &mn->mr_Next) { char *ptr = (char *)mn; + uintptr_t dptr; + char *aligned; + size_t extra; - if (bytes > mn->mr_Bytes) + dptr = (uintptr_t)(ptr + MALLOCALIGN); /* pointer to data */ + aligned = (char *)(roundup2(dptr, align) - MALLOCALIGN); + extra = aligned - ptr; + + if (bytes + extra > mn->mr_Bytes) continue; + /* + * Cut extra from head and create new memory node from reminder. + */ + + if (extra != 0) { + MemNode *new; + + new = (MemNode *)aligned; + new->mr_Next = mn->mr_Next; + new->mr_Bytes = mn->mr_Bytes - extra; + + /* And update current memory node */ + mn->mr_Bytes = extra; + mn->mr_Next = new; + /* In next iteration, we will get our aligned address */ + continue; + } + /* * Cut a chunk of memory out of the beginning of this * block and fixup the link appropriately. */ + if (mn->mr_Bytes == bytes) { *pmn = mn->mr_Next; } else { diff --git a/stand/libsa/zalloc_malloc.c b/stand/libsa/zalloc_malloc.c index da68cf331961..17c1648e05e3 100644 --- a/stand/libsa/zalloc_malloc.c +++ b/stand/libsa/zalloc_malloc.c @@ -50,8 +50,26 @@ void mallocstats(void); #undef free #endif +static void *Malloc_align(size_t, size_t); + void * -Malloc(size_t bytes, const char *file, int line) +Malloc(size_t bytes, const char *file __unused, int line __unused) +{ + return (Malloc_align(bytes, 1)); +} + +void * +Memalign(size_t alignment, size_t bytes, const char *file __unused, + int line __unused) +{ + if (alignment == 0) + alignment = 1; + + return (Malloc_align(bytes, alignment)); +} + +static void * +Malloc_align(size_t bytes, size_t alignment) { Guard *res; @@ -64,7 +82,7 @@ Malloc(size_t bytes, const char *file, int line) bytes += MALLOCALIGN; #endif - while ((res = znalloc(&MallocPool, bytes)) == NULL) { + while ((res = znalloc(&MallocPool, bytes, alignment)) == NULL) { int incr = (bytes + BLKEXTENDMASK) & ~BLKEXTENDMASK; char *base; diff --git a/stand/libsa/zalloc_protos.h b/stand/libsa/zalloc_protos.h index aba3cb32b9b3..d129a64f2993 100644 --- a/stand/libsa/zalloc_protos.h +++ b/stand/libsa/zalloc_protos.h @@ -32,7 +32,7 @@ #ifndef _ZALLOC_PROTOS_H #define _ZALLOC_PROTOS_H -Library void *znalloc(struct MemPool *mpool, uintptr_t bytes); +Library void *znalloc(struct MemPool *mpool, uintptr_t bytes, size_t align); Library void zfree(struct MemPool *mpool, void *ptr, uintptr_t bytes); Library void zextendPool(MemPool *mp, void *base, uintptr_t bytes); Library void zallocstats(struct MemPool *mp); From 840fa0f86d6de5c7b45c37a69ae86e07795b2f2a Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Tue, 17 Sep 2019 13:50:25 +0000 Subject: [PATCH 07/46] loader.efi: efipart needs to use ioalign UEFI specification 2.7A, EFI_BLOCK_IO_PROTOCOL, page 566. The ioalign property does define the alignment of data buffer. If the alignment is required and our buffer is not aligned, or if the data buffer is not multiple of Blocksize, we need to use bounce buffer to perform the block IO. This is much like with BIOS version, except there the INT13 needs buffer to be located in low memory. Additionally, we need to handle disk writes properly. --- stand/efi/libefi/efipart.c | 137 ++++++++++++++++++++++++++++++------- 1 file changed, 113 insertions(+), 24 deletions(-) diff --git a/stand/efi/libefi/efipart.c b/stand/efi/libefi/efipart.c index 973ad8f7722f..2affab1c4274 100644 --- a/stand/efi/libefi/efipart.c +++ b/stand/efi/libefi/efipart.c @@ -64,6 +64,9 @@ static int efipart_printhd(int); #define PNP0700 0x700 #define PNP0701 0x701 +/* Bounce buffer max size */ +#define BIO_BUFFER_SIZE 0x4000 + struct devsw efipart_fddev = { .dv_name = "fd", .dv_type = DEVT_FD, @@ -266,6 +269,12 @@ efipart_inithandles(void) continue; } + /* Allowed values are 0, 1 and power of 2. */ + if (blkio->Media->IoAlign > 1 && + !powerof2(blkio->Media->IoAlign)) { + continue; + } + /* This is bad. */ if ((pd = calloc(1, sizeof(*pd))) == NULL) { printf("efipart_inithandles: Out of memory.\n"); @@ -979,8 +988,10 @@ efipart_realstrategy(void *devdata, int rw, daddr_t blk, size_t size, EFI_BLOCK_IO *blkio; uint64_t off, disk_blocks, d_offset = 0; char *blkbuf; - size_t blkoff, blksz; - int error; + size_t blkoff, blksz, bio_size; + unsigned ioalign; + bool need_buf; + int rc; uint64_t diskend, readstart; if (dev == NULL || blk < 0) @@ -1028,40 +1039,118 @@ efipart_realstrategy(void *devdata, int rw, daddr_t blk, size_t size, size = size * blkio->Media->BlockSize; } - if (rsize != NULL) - *rsize = size; - + need_buf = true; + /* Do we need bounce buffer? */ if ((size % blkio->Media->BlockSize == 0) && (off % blkio->Media->BlockSize == 0)) - return (efipart_readwrite(blkio, rw, - off / blkio->Media->BlockSize, - size / blkio->Media->BlockSize, buf)); + need_buf = false; + + /* Do we have IO alignment requirement? */ + ioalign = blkio->Media->IoAlign; + if (ioalign == 0) + ioalign++; + + if (ioalign > 1 && (uintptr_t)buf != roundup2((uintptr_t)buf, ioalign)) + need_buf = true; + + if (need_buf) { + for (bio_size = BIO_BUFFER_SIZE; bio_size > 0; + bio_size -= blkio->Media->BlockSize) { + blkbuf = memalign(ioalign, bio_size); + if (blkbuf != NULL) + break; + } + } else { + blkbuf = buf; + bio_size = size; + } - /* - * The buffer size is not a multiple of the media block size. - */ - blkbuf = malloc(blkio->Media->BlockSize); if (blkbuf == NULL) return (ENOMEM); - error = 0; + if (rsize != NULL) + *rsize = size; + + rc = 0; blk = off / blkio->Media->BlockSize; blkoff = off % blkio->Media->BlockSize; - blksz = blkio->Media->BlockSize - blkoff; + while (size > 0) { - error = efipart_readwrite(blkio, rw, blk, 1, blkbuf); - if (error) + size_t x = min(size, bio_size); + + if (x < blkio->Media->BlockSize) + x = 1; + else + x /= blkio->Media->BlockSize; + + switch (rw & F_MASK) { + case F_READ: + blksz = blkio->Media->BlockSize * x - blkoff; + if (size < blksz) + blksz = size; + + rc = efipart_readwrite(blkio, rw, blk, x, blkbuf); + if (rc != 0) + goto error; + + if (need_buf) + bcopy(blkbuf + blkoff, buf, blksz); break; - if (size < blksz) - blksz = size; - bcopy(blkbuf + blkoff, buf, blksz); + case F_WRITE: + rc = 0; + if (blkoff != 0) { + /* + * We got offset to sector, read 1 sector to + * blkbuf. + */ + x = 1; + blksz = blkio->Media->BlockSize - blkoff; + blksz = min(blksz, size); + rc = efipart_readwrite(blkio, F_READ, blk, x, + blkbuf); + } else if (size < blkio->Media->BlockSize) { + /* + * The remaining block is not full + * sector. Read 1 sector to blkbuf. + */ + x = 1; + blksz = size; + rc = efipart_readwrite(blkio, F_READ, blk, x, + blkbuf); + } else { + /* We can write full sector(s). */ + blksz = blkio->Media->BlockSize * x; + } + + if (rc != 0) + goto error; + /* + * Put your Data In, Put your Data out, + * Put your Data In, and shake it all about + */ + if (need_buf) + bcopy(buf, blkbuf + blkoff, blksz); + rc = efipart_readwrite(blkio, F_WRITE, blk, x, blkbuf); + if (rc != 0) + goto error; + break; + default: + /* DO NOTHING */ + rc = EROFS; + goto error; + } + + blkoff = 0; buf += blksz; size -= blksz; - blk++; - blkoff = 0; - blksz = blkio->Media->BlockSize; + blk += x; } - free(blkbuf); - return (error); +error: + if (rsize != NULL) + *rsize -= size; + + if (need_buf) + free(blkbuf); + return (rc); } From 496ba62c3692f9a2805d0a7da43c02f92f659cb2 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 17 Sep 2019 13:58:15 +0000 Subject: [PATCH 08/46] MFZoL: Add -vnP support to 'zfs send' for bookmarks zfsonlinux/zfs@835db58592d7d947e5818eb7281882e2a46073e0 We have long supported estimating a size of an incremental stream from a snapshot. We should do the same for bookmarks as well. Obtained from: ZoL Author: loli10K MFC after: 3 days --- cddl/contrib/opensolaris/cmd/zfs/zfs.8 | 27 ++++++++-- cddl/contrib/opensolaris/cmd/zfs/zfs_main.c | 17 ++----- .../opensolaris/lib/libzfs/common/libzfs.h | 2 +- .../lib/libzfs/common/libzfs_sendrecv.c | 51 +++++++++++++------ 4 files changed, 64 insertions(+), 33 deletions(-) diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 index 53c144d1db0e..84f13273af22 100644 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 +++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 @@ -190,8 +190,8 @@ .Ar snapshot .Nm .Cm send -.Op Fl Lce -.Op Fl i Ar snapshot Ns | Ns bookmark +.Op Fl LPcenv +.Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Nm .Cm send @@ -2766,7 +2766,7 @@ on future versions of .It Xo .Nm .Cm send -.Op Fl Lce +.Op Fl LPcenv .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Xc @@ -2780,7 +2780,7 @@ stream generated from a filesystem or volume is received, the default snapshot name will be .Pq --head-- . .Bl -tag -width indent -.It Fl i Ar snapshot Ns | Ns bookmark +.It Fl i Ar snapshot Ns | Ns Ar bookmark Generate an incremental send stream. The incremental source must be an earlier snapshot in the destination's history. @@ -2792,6 +2792,23 @@ specified as the last component of the name If the incremental target is a clone, the incremental source can be the origin snapshot, or an earlier snapshot in the origin's filesystem, or the origin's origin, etc. +.It Fl n, -dryrun +Do a dry-run +.Pq Qq No-op +send. +Do not generate any actual send data. +This is useful in conjunction with the +.Fl v +or +.Fl P +flags to determine what data will be sent. +In this case, the verbose output will be written to standard output +.Po contrast with a non-dry-run, where the stream is written to standard output +and the verbose output goes to standard error +.Pc . +.It Fl v, -verbose +Print verbose information about the stream package generated. +This information includes a per-second report of how much data has been sent. .It Fl L, -large-block Generate a stream which may contain blocks larger than 128KB. This flag @@ -2808,6 +2825,8 @@ See for details on ZFS feature flags and the .Sy large_blocks feature. +.It Fl P, -parsable +Print machine-parsable verbose information about the stream package generated. .It Fl c, -compressed Generate a more compact stream by using compressed WRITE records for blocks which are compressed on disk and in memory (see the diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c index 232181a87345..65dd40e33316 100644 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c +++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c @@ -3928,13 +3928,11 @@ zfs_do_send(int argc, char **argv) if (strchr(argv[0], '@') == NULL || (fromname && strchr(fromname, '#') != NULL)) { char frombuf[ZFS_MAX_DATASET_NAME_LEN]; - enum lzc_send_flags lzc_flags = 0; if (flags.replicate || flags.doall || flags.props || - flags.dedup || flags.dryrun || flags.verbose || - flags.progress) { - (void) fprintf(stderr, - gettext("Error: " + flags.dedup || (strchr(argv[0], '@') == NULL && + (flags.dryrun || flags.verbose || flags.progress))) { + (void) fprintf(stderr, gettext("Error: " "Unsupported flag with filesystem or bookmark.\n")); return (1); } @@ -3943,13 +3941,6 @@ zfs_do_send(int argc, char **argv) if (zhp == NULL) return (1); - if (flags.largeblock) - lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK; - if (flags.embed_data) - lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; - if (flags.compress) - lzc_flags |= LZC_SEND_FLAG_COMPRESS; - if (fromname != NULL && (fromname[0] == '#' || fromname[0] == '@')) { /* @@ -3963,7 +3954,7 @@ zfs_do_send(int argc, char **argv) (void) strlcat(frombuf, fromname, sizeof (frombuf)); fromname = frombuf; } - err = zfs_send_one(zhp, fromname, STDOUT_FILENO, lzc_flags); + err = zfs_send_one(zhp, fromname, STDOUT_FILENO, flags); zfs_close(zhp); return (err != 0); } diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h index f75effd338fc..15b1d54e6341 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -660,7 +660,7 @@ typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); extern int zfs_send(zfs_handle_t *, const char *, const char *, sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); -extern int zfs_send_one(zfs_handle_t *, const char *, int, enum lzc_send_flags); +extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t flags); extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd, const char *); extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c index b7e624e4621c..cf5aedafca7e 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c @@ -1190,16 +1190,14 @@ send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap, } } - if (size != 0) { - if (parsable) { - (void) fprintf(fout, "\t%llu", - (longlong_t)size); - } else { - char buf[16]; - zfs_nicenum(size, buf, sizeof (buf)); - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - " estimated size is %s"), buf); - } + if (parsable) { + (void) fprintf(fout, "\t%llu", + (longlong_t)size); + } else if (size != 0) { + char buf[16]; + zfs_nicenum(size, buf, sizeof (buf)); + (void) fprintf(fout, dgettext(TEXT_DOMAIN, + " estimated size is %s"), buf); } (void) fprintf(fout, "\n"); } @@ -2037,17 +2035,40 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, } int -zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, - enum lzc_send_flags flags) +zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t flags) { - int err; + int err = 0; libzfs_handle_t *hdl = zhp->zfs_hdl; - + enum lzc_send_flags lzc_flags = 0; + FILE *fout = (flags.verbose && flags.dryrun) ? stdout : stderr; char errbuf[1024]; + + if (flags.largeblock) + lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK; + if (flags.embed_data) + lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; + if (flags.compress) + lzc_flags |= LZC_SEND_FLAG_COMPRESS; + + if (flags.verbose) { + uint64_t size = 0; + err = lzc_send_space(zhp->zfs_name, from, lzc_flags, &size); + if (err == 0) { + send_print_verbose(fout, zhp->zfs_name, from, size, + flags.parsable); + } else { + (void) fprintf(stderr, "Cannot estimate send size: " + "%s\n", strerror(errno)); + } + } + + if (flags.dryrun) + return (err); + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "warning: cannot send '%s'"), zhp->zfs_name); - err = lzc_send(zhp->zfs_name, from, fd, flags); + err = lzc_send(zhp->zfs_name, from, fd, lzc_flags); if (err != 0) { switch (errno) { case EXDEV: From 7b2f790200c8518657c917e801d741d72f8a03ab Mon Sep 17 00:00:00 2001 From: Li-Wen Hsu Date: Tue, 17 Sep 2019 14:08:09 +0000 Subject: [PATCH 09/46] Temporarily skip flakey test case lib.libc.sys.stat_test.stat_socket PR: 240621 Sponsored by: The FreeBSD Foundation --- contrib/netbsd-tests/lib/libc/sys/t_stat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/contrib/netbsd-tests/lib/libc/sys/t_stat.c b/contrib/netbsd-tests/lib/libc/sys/t_stat.c index 9d0136dae50b..372a9093fac5 100644 --- a/contrib/netbsd-tests/lib/libc/sys/t_stat.c +++ b/contrib/netbsd-tests/lib/libc/sys/t_stat.c @@ -332,6 +332,9 @@ ATF_TC_BODY(stat_socket, tc) uint32_t iaddr; int fd, flags; + if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false)) + atf_tc_skip("https://bugs.freebsd.org/240621"); + (void)memset(&st, 0, sizeof(struct stat)); (void)memset(&addr, 0, sizeof(struct sockaddr_in)); From 8569a95e76c88106543ec58c196cf995fd98445d Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 17 Sep 2019 14:15:48 +0000 Subject: [PATCH 10/46] fixup up fallout from r352447 in libbe I totally forgot that we now have another in-tree consumer of libzfs. MFC after: 3 days X-MFC with: r352447 --- lib/libbe/be.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/libbe/be.c b/lib/libbe/be.c index 849a25859fa5..249fa2f3361c 100644 --- a/lib/libbe/be.c +++ b/lib/libbe/be.c @@ -775,6 +775,7 @@ be_export(libbe_handle_t *lbh, const char *bootenv, int fd) char snap_name[BE_MAXPATHLEN]; char buf[BE_MAXPATHLEN]; zfs_handle_t *zfs; + sendflags_t flags = { 0 }; int err; if ((err = be_snapshot(lbh, bootenv, NULL, true, snap_name)) != 0) @@ -786,7 +787,7 @@ be_export(libbe_handle_t *lbh, const char *bootenv, int fd) if ((zfs = zfs_open(lbh->lzh, buf, ZFS_TYPE_DATASET)) == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); - err = zfs_send_one(zfs, NULL, fd, 0); + err = zfs_send_one(zfs, NULL, fd, flags); zfs_close(zfs); return (err); From d245aa1e72617a650058af61750506d95f64b726 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 17 Sep 2019 15:53:40 +0000 Subject: [PATCH 11/46] vfs: apply r352437 to the fast path as well This one is very hard to run into. If the filesystem is being unmounted or the mount point is freed the vfs_op_thread_enter will fail. For it to succeed the mount point itself would have to be reallocated in the time window between the initial read and the attempt to enter. Sponsored by: The FreeBSD Foundation --- sys/kern/vfs_default.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 2fd95fbc8b46..8aee8abfbe7e 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -606,11 +606,13 @@ vop_stdgetwritemount(ap) return (0); } if (vfs_op_thread_enter(mp)) { - if (mp == vp->v_mount) + if (mp == vp->v_mount) { vfs_mp_count_add_pcpu(mp, ref, 1); - else + vfs_op_thread_exit(mp); + } else { + vfs_op_thread_exit(mp); mp = NULL; - vfs_op_thread_exit(mp); + } } else { MNT_ILOCK(mp); if (mp == vp->v_mount) { From d6b090d1d4cbed343ec601c7ae21b05f35ace245 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Tue, 17 Sep 2019 16:16:46 +0000 Subject: [PATCH 12/46] loader: revert r352421 As insisted by kib, malloc(0) is quite legal. --- stand/libsa/zalloc_malloc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/stand/libsa/zalloc_malloc.c b/stand/libsa/zalloc_malloc.c index 17c1648e05e3..98e28b8ef7f5 100644 --- a/stand/libsa/zalloc_malloc.c +++ b/stand/libsa/zalloc_malloc.c @@ -73,9 +73,6 @@ Malloc_align(size_t bytes, size_t alignment) { Guard *res; - if (bytes == 0) - return (NULL); - #ifdef USEENDGUARD bytes += MALLOCALIGN + 1; #else From 100369071d10827df9225ed0a3b3b20b8e286d45 Mon Sep 17 00:00:00 2001 From: Kirk McKusick Date: Tue, 17 Sep 2019 17:44:50 +0000 Subject: [PATCH 13/46] The VFS-level clustering code collects together sequential blocks by issuing delayed-writes (bdwrite()) until a non-sequential block is written or the maximum cluster size is reached. At that point it collects the delayed buffers together (using bread()) to write them in a single operation. The assumption was that since we just looked at them they will still be in memory so there is no need to check for a read error from bread(). Very occationally (apparently every 10-hours or so when being pounded by Peter Holm's tests) this assumption is wrong. The fix is to check for errors from bread() and fail the cluster write thus falling back to the default individual flushing of any still dirty buffers. Reported by: Peter Holm and Chuck Silvers Reviewed by: kib MFC after: 3 days --- sys/kern/vfs_cluster.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 6ff0b1c3b327..21efe900eea0 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -718,6 +718,14 @@ cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount, struct cluster_save *buflist; buflist = cluster_collectbufs(vp, bp, gbflags); + if (buflist == NULL) { + /* + * Cluster build failed so just write + * it now. + */ + bawrite(bp); + return; + } endbp = &buflist->bs_children [buflist->bs_nchildren - 1]; if (VOP_REALLOCBLKS(vp, buflist)) { @@ -1056,7 +1064,7 @@ cluster_collectbufs(struct vnode *vp, struct buf *last_bp, int gbflags) struct cluster_save *buflist; struct buf *bp; daddr_t lbn; - int i, len; + int i, j, len, error; len = vp->v_lastw - vp->v_cstart + 1; buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), @@ -1064,8 +1072,18 @@ cluster_collectbufs(struct vnode *vp, struct buf *last_bp, int gbflags) buflist->bs_nchildren = 0; buflist->bs_children = (struct buf **) (buflist + 1); for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) { - (void)bread_gb(vp, lbn, last_bp->b_bcount, NOCRED, + error = bread_gb(vp, lbn, last_bp->b_bcount, NOCRED, gbflags, &bp); + if (error != 0) { + /* + * If read fails, release collected buffers + * and return failure. + */ + for (j = 0; j < i; j++) + brelse(buflist->bs_children[j]); + free(buflist, M_SEGMENT); + return (NULL); + } buflist->bs_children[i] = bp; if (bp->b_blkno == bp->b_lblkno) VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, From 68861a62f5363e6984ba96efe6463e882a9c4896 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Tue, 17 Sep 2019 18:05:33 +0000 Subject: [PATCH 14/46] loader: provide u> and xemit words if needed We have external interpreter (userboot.so) which may be lagging behind with updates and may be missing u> xemit words. --- stand/forth/loader.4th | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/stand/forth/loader.4th b/stand/forth/loader.4th index 6c2f7b378f6b..89d532fda317 100644 --- a/stand/forth/loader.4th +++ b/stand/forth/loader.4th @@ -27,6 +27,29 @@ only forth definitions +\ provide u> if needed +s" u>" sfind [if] drop [else] + drop +: u> + 2dup u< if 2drop 0 exit then + swap u< if -1 exit then + 0 +; +[then] + +\ provide xemit if needed +s" xemit" sfind [if] drop [else] + drop +: xemit + dup 0x80 u< if emit exit then + 0 swap 0x3F + begin 2dup u> while + 2/ >r dup 0x3F and 0x80 or swap 6 rshift r> + repeat 0x7F xor 2* or + begin dup 0x80 u< 0= while emit repeat drop +; +[then] + s" arch-i386" environment? [if] [if] s" loader_version" environment? [if] 11 < [if] From 55894117b126dddc10bf497a53afb6e269b9a542 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 17 Sep 2019 18:32:18 +0000 Subject: [PATCH 15/46] Return EISDIR when directory is opened with O_CREAT without O_DIRECTORY. Reviewed by: bcr (man page), emaste (previous version) PR: 240452 Sponsored by: The FreeBSD Foundation MFC after: 1 week DIfferential revision: https://reviews.freebsd.org/D21634 --- lib/libc/sys/open.2 | 7 ++++++- sys/kern/vfs_vnops.c | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2 index a876cccb84b5..fe50b292d1bd 100644 --- a/lib/libc/sys/open.2 +++ b/lib/libc/sys/open.2 @@ -28,7 +28,7 @@ .\" @(#)open.2 8.2 (Berkeley) 11/16/93 .\" $FreeBSD$ .\" -.Dd June 14, 2019 +.Dd September 17, 2019 .Dt OPEN 2 .Os .Sh NAME @@ -419,6 +419,11 @@ Too many symbolic links were encountered in translating the pathname. .It Bq Er EISDIR The named file is a directory, and the arguments specify it is to be modified. +.It Bq Er EISDIR +The named file is a directory, and the flags specified +.Dv O_CREAT +without +.Dv O_DIRECTORY . .It Bq Er EROFS The named file resides on a read-only file system, and the file is to be modified. diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 4116ee51120d..3f8bd4504808 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -264,6 +264,10 @@ vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags, error = EEXIST; goto bad; } + if (vp->v_type == VDIR) { + error = EISDIR; + goto bad; + } fmode &= ~O_CREAT; } } else { From 0edc114ac0b998b06235da32bec24d55c10206cd Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 17 Sep 2019 18:36:29 +0000 Subject: [PATCH 16/46] realloc(x, 0) should not return NULL. See http://www.open-std.org/jtc1/sc22/wg14/www/docs/summary.htm#dr_400. Upstream jemalloc issue is opened by emaste at https://github.com/jemalloc/jemalloc/issues/1629. Reviewed by: emaste PR: 240456 Sponsored by: The FreeBSD Foundation MFC after: 1 week DIfferential revision: https://reviews.freebsd.org/D21632 --- contrib/jemalloc/src/jemalloc.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/contrib/jemalloc/src/jemalloc.c b/contrib/jemalloc/src/jemalloc.c index e0ad297be93a..28763a5f0504 100644 --- a/contrib/jemalloc/src/jemalloc.c +++ b/contrib/jemalloc/src/jemalloc.c @@ -2299,21 +2299,6 @@ je_realloc(void *ptr, size_t size) { LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size); if (unlikely(size == 0)) { - if (ptr != NULL) { - /* realloc(ptr, 0) is equivalent to free(ptr). */ - UTRACE(ptr, 0, 0); - tcache_t *tcache; - tsd_t *tsd = tsd_fetch(); - if (tsd_reentrancy_level_get(tsd) == 0) { - tcache = tcache_get(tsd); - } else { - tcache = NULL; - } - ifree(tsd, ptr, tcache, true); - - LOG("core.realloc.exit", "result: %p", NULL); - return NULL; - } size = 1; } From 6fd583583bdded9f34da7a41e976d64ce32f1c1f Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 17 Sep 2019 18:41:39 +0000 Subject: [PATCH 17/46] Further refine r352393, only call vnode_pager_setsize() outside the node lock when shrinking. This is similar to r252528, applied to the above commit. Apparently there is a race which makes necessary at least to keep the n_size and pager size consistent when extending. Current suspect is that iod threads perform vnode_pager_setsize() without taking the vnode lock, which corrupts the file content. Reported and tested by: Masachika ISHIZUKA Discussed with: rmacklem (related issues) Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/fs/nfsclient/nfs_clport.c | 41 +++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index 63ea47367075..a23b4ba4efae 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -414,12 +414,12 @@ nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, struct nfsnode *np; struct nfsmount *nmp; struct timespec mtime_save; + vm_object_t object; u_quad_t nsize; - int setnsize, error, force_fid_err; + int error, force_fid_err; + bool setnsize; error = 0; - setnsize = 0; - nsize = 0; /* * If v_type == VNON it is a new node, so fill in the v_type, @@ -511,8 +511,7 @@ nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, * zero np->n_attrstamp to indicate that * the attributes are stale. */ - nsize = vap->va_size = np->n_size; - setnsize = 1; + vap->va_size = np->n_size; np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } else if (np->n_flag & NMODIFIED) { @@ -526,22 +525,9 @@ nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; } - nsize = np->n_size; - setnsize = 1; - } else if (vap->va_size < np->n_size) { - /* - * When shrinking the size, the call to - * vnode_pager_setsize() cannot be done - * with the mutex held, so delay it until - * after the mtx_unlock call. - */ - nsize = np->n_size = vap->va_size; - np->n_flag |= NSIZECHANGED; - setnsize = 1; } else { - nsize = np->n_size = vap->va_size; + np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; - setnsize = 1; } } else { np->n_size = vap->va_size; @@ -579,6 +565,23 @@ nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, if (np->n_attrstamp != 0) KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error); #endif + nsize = vap->va_size; + object = vp->v_object; + setnsize = false; + if (object != NULL) { + if (OFF_TO_IDX(nsize + PAGE_MASK) < object->size) { + /* + * When shrinking the size, the call to + * vnode_pager_setsize() cannot be done with + * the mutex held, because we might need to + * wait for a busy page. Delay it until after + * the node is unlocked. + */ + setnsize = true; + } else { + vnode_pager_setsize(vp, nsize); + } + } NFSUNLOCKNODE(np); if (setnsize) vnode_pager_setsize(vp, nsize); From 247cf5664e1ad88d96568eb3f2101d383a539c08 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 17 Sep 2019 18:49:13 +0000 Subject: [PATCH 18/46] Add SIOCGIFDOWNREASON. The ioctl(2) is intended to provide more details about the cause of the down for the link. Eventually we might define a comprehensive list of codes for the situations. But interface also allows the driver to provide free-form null-terminated ASCII string to provide arbitrary non-formalized information. Sample implementation exists for mlx5(4), where the string is fetched from firmware controlling the port. Reviewed by: hselasky, rrs Sponsored by: Mellanox Technologies MFC after: 1 week Differential revision: https://reviews.freebsd.org/D21527 --- sys/net/if.c | 1 + sys/net/if.h | 10 ++++++++++ sys/sys/sockio.h | 2 ++ 3 files changed, 13 insertions(+) diff --git a/sys/net/if.c b/sys/net/if.c index cfd0c2065888..8de83b2d7bc5 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -2891,6 +2891,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) case SIOCGIFGENERIC: case SIOCGIFRSSKEY: case SIOCGIFRSSHASH: + case SIOCGIFDOWNREASON: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); diff --git a/sys/net/if.h b/sys/net/if.h index add4df55a3d9..fbcc06863bd4 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -585,6 +585,16 @@ struct ifrsshash { #define IFNET_PCP_NONE 0xff /* PCP disabled */ +#define IFDR_MSG_SIZE 64 +#define IFDR_REASON_MSG 1 +#define IFDR_REASON_VENDOR 2 +struct ifdownreason { + char ifdr_name[IFNAMSIZ]; + uint32_t ifdr_reason; + uint32_t ifdr_vendor; + char ifdr_msg[IFDR_MSG_SIZE]; +}; + #endif /* __BSD_VISIBLE */ #ifdef _KERNEL diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h index 89901bd40b44..447e2c884028 100644 --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -143,4 +143,6 @@ #define SIOCGLANPCP _IOWR('i', 152, struct ifreq) /* Get (V)LAN PCP */ #define SIOCSLANPCP _IOW('i', 153, struct ifreq) /* Set (V)LAN PCP */ +#define SIOCGIFDOWNREASON _IOWR('i', 154, struct ifdownreason) + #endif /* !_SYS_SOCKIO_H_ */ From 093aa3e311c4fb8268158ee679c9b1f15455574d Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 17 Sep 2019 18:51:10 +0000 Subject: [PATCH 19/46] ifconfig: add report of the string from SIOCGIFDOWNREASON. Sample output: # ifconfig mce0 mce0: flags=8802 metric 0 mtu 1500 options=3ed07bb ether e4:1d:2d:e7:10:0a media: Ethernet autoselect status: no carrier (Negotiation failure) nd6 options=21 Reviewed by: hselasky, rrs Sponsored by: Mellanox Technologies MFC after: 1 week Differential revision: https://reviews.freebsd.org/D21527 --- sbin/ifconfig/ifmedia.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/sbin/ifconfig/ifmedia.c b/sbin/ifconfig/ifmedia.c index e80ef592dc00..fdf0d6fb62a0 100644 --- a/sbin/ifconfig/ifmedia.c +++ b/sbin/ifconfig/ifmedia.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -110,18 +111,20 @@ static void media_status(int s) { struct ifmediareq ifmr; + struct ifdownreason ifdr; int *media_list, i; - int xmedia = 1; + bool no_carrier, xmedia; (void) memset(&ifmr, 0, sizeof(ifmr)); (void) strlcpy(ifmr.ifm_name, name, sizeof(ifmr.ifm_name)); + xmedia = true; /* * Check if interface supports extended media types. */ if (ioctl(s, SIOCGIFXMEDIA, (caddr_t)&ifmr) < 0) - xmedia = 0; - if (xmedia == 0 && ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) < 0) { + xmedia = false; + if (!xmedia && ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) < 0) { /* * Interface doesn't support SIOC{G,S}IFMEDIA. */ @@ -158,6 +161,7 @@ media_status(int s) putchar('\n'); if (ifmr.ifm_status & IFM_AVALID) { + no_carrier = false; printf("\tstatus: "); switch (IFM_TYPE(ifmr.ifm_active)) { case IFM_ETHER: @@ -165,7 +169,7 @@ media_status(int s) if (ifmr.ifm_status & IFM_ACTIVE) printf("active"); else - printf("no carrier"); + no_carrier = true; break; case IFM_IEEE80211: @@ -176,9 +180,27 @@ media_status(int s) else printf("running"); } else - printf("no carrier"); + no_carrier = true; break; } + if (no_carrier) { + printf("no carrier"); + memset(&ifdr, 0, sizeof(ifdr)); + strlcpy(ifdr.ifdr_name, name, sizeof(ifdr.ifdr_name)); + if (ioctl(s, SIOCGIFDOWNREASON, (caddr_t)&ifdr) == 0) { + switch (ifdr.ifdr_reason) { + case IFDR_REASON_MSG: + printf(" (%s)", ifdr.ifdr_msg); + break; + case IFDR_REASON_VENDOR: + printf(" (vendor code %d)", + ifdr.ifdr_vendor); + break; + default: + break; + } + } + } putchar('\n'); } From cd4b2a3c08889873258af9e62f4345bbda7fbc12 Mon Sep 17 00:00:00 2001 From: Steve Wills Date: Tue, 17 Sep 2019 20:03:20 +0000 Subject: [PATCH 20/46] log daemon.info to /var/log/daemon.log by default log daemon facility now that daemon(8) has syslog support which defaults to daemon facility, info priority Reviewed by: bapt Approved by: bapt Differential Revision: https://reviews.freebsd.org/D21561 --- usr.sbin/newsyslog/newsyslog.conf | 1 + usr.sbin/syslogd/syslog.conf | 1 + 2 files changed, 2 insertions(+) diff --git a/usr.sbin/newsyslog/newsyslog.conf b/usr.sbin/newsyslog/newsyslog.conf index 43528a9a2b16..8390b1eb0877 100644 --- a/usr.sbin/newsyslog/newsyslog.conf +++ b/usr.sbin/newsyslog/newsyslog.conf @@ -30,6 +30,7 @@ /var/log/security 600 10 1000 * JC /var/log/utx.log 644 3 * @01T05 B /var/log/weekly.log 640 5 * $W6D0 JN +/var/log/daemon 644 5 1000 @0101T JC /etc/newsyslog.conf.d/[!.]*.conf /usr/local/etc/newsyslog.conf.d/[!.]*.conf diff --git a/usr.sbin/syslogd/syslog.conf b/usr.sbin/syslogd/syslog.conf index e0df421b9021..a556f309d943 100644 --- a/usr.sbin/syslogd/syslog.conf +++ b/usr.sbin/syslogd/syslog.conf @@ -14,6 +14,7 @@ cron.* /var/log/cron !-devd *.=debug /var/log/debug.log *.emerg * +daemon.info /var/log/daemon.log # uncomment this to log all writes to /dev/console to /var/log/console.log # touch /var/log/console.log and chmod it to mode 600 before it will work #console.info /var/log/console.log From 6cc4a3c970f3d976394ea4ace5d7bf31f374d6cd Mon Sep 17 00:00:00 2001 From: Steve Wills Date: Tue, 17 Sep 2019 20:05:06 +0000 Subject: [PATCH 21/46] Use correct filename in newsyslog.conf Approved by: bapt (implicit) Differential Revision: https://reviews.freebsd.org/D21561 --- usr.sbin/newsyslog/newsyslog.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/newsyslog/newsyslog.conf b/usr.sbin/newsyslog/newsyslog.conf index 8390b1eb0877..80e8270935a1 100644 --- a/usr.sbin/newsyslog/newsyslog.conf +++ b/usr.sbin/newsyslog/newsyslog.conf @@ -30,7 +30,7 @@ /var/log/security 600 10 1000 * JC /var/log/utx.log 644 3 * @01T05 B /var/log/weekly.log 640 5 * $W6D0 JN -/var/log/daemon 644 5 1000 @0101T JC +/var/log/daemon.log 644 5 1000 @0101T JC /etc/newsyslog.conf.d/[!.]*.conf /usr/local/etc/newsyslog.conf.d/[!.]*.conf From ae92090ae090dbddc3827cc3607e193aa06ff6cc Mon Sep 17 00:00:00 2001 From: Li-Wen Hsu Date: Tue, 17 Sep 2019 22:08:16 +0000 Subject: [PATCH 22/46] Temporarily skip sys.netpfil.common.forward.pf_v4 on i386 CI as it always fails PR: 240085 Sponsored by: The FreeBSD Foundation --- tests/sys/netpfil/common/forward.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/sys/netpfil/common/forward.sh b/tests/sys/netpfil/common/forward.sh index 9f73bb139598..0b2f7acfb785 100755 --- a/tests/sys/netpfil/common/forward.sh +++ b/tests/sys/netpfil/common/forward.sh @@ -40,6 +40,10 @@ v4_head() v4_body() { firewall=$1 + if [ "$(atf_config_get ci false)" = "true" ] && \ + [ "$(uname -p)" = "i386" ] && [ "${firewall}" = "pf" ]; then + atf_skip "https://bugs.freebsd.org/240085" + fi firewall_init $firewall epair_send=$(vnet_mkepair) From ebcb81079e5c150e8da9f8a4cbcd8409134217cc Mon Sep 17 00:00:00 2001 From: Li-Wen Hsu Date: Tue, 17 Sep 2019 22:09:14 +0000 Subject: [PATCH 23/46] Temporarily skip sys.netpfil.common.tos.pf_tos on i386 CI as it always fails PR: 240086 Sponsored by: The FreeBSD Foundation --- tests/sys/netpfil/common/tos.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/sys/netpfil/common/tos.sh b/tests/sys/netpfil/common/tos.sh index 452c6a002bbf..0b8bbb7dfc3e 100644 --- a/tests/sys/netpfil/common/tos.sh +++ b/tests/sys/netpfil/common/tos.sh @@ -40,6 +40,10 @@ tos_head() tos_body() { firewall=$1 + if [ "$(atf_config_get ci false)" = "true" ] && \ + [ "$(uname -p)" = "i386" ] && [ "${firewall}" = "pf" ]; then + atf_skip "https://bugs.freebsd.org/240086" + fi firewall_init $firewall epair_send=$(vnet_mkepair) From 8b2195605b84ac7df041f6212ec7130b7b9cfc78 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Wed, 18 Sep 2019 01:33:17 +0000 Subject: [PATCH 24/46] mips: ubldr: use truncated load address for mips32 BFD appears to silently truncate 0xffffffff80800000 when it processes the ldscript for 32-bit mips, but LLD chokes on it as the linker script tries to place elements above 32-bit range. It's unclear to me if silent truncation is kosher or not and whether this patch is really what we want to do, but it is one approach at least. Reviewed by: imp, mizhka Differential Revision: https://reviews.freebsd.org/D21487 --- stand/mips/uboot/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stand/mips/uboot/Makefile b/stand/mips/uboot/Makefile index 1751bcb80d54..1162b7fed3b0 100644 --- a/stand/mips/uboot/Makefile +++ b/stand/mips/uboot/Makefile @@ -19,7 +19,11 @@ INSTALLFLAGS= -b WARNS?= 1 # Address at which ubldr will be loaded. # This varies for different boards and SOCs. +.if ${MACHINE_ARCH:Mmips64*} UBLDR_LOADADDR?= 0xffffffff80800000 +.else +UBLDR_LOADADDR?= 0x80800000 +.endif # Architecture-specific loader code SRCS= start.S conf.c vers.c From 15b53426e8e297be5802b63401a56bb4a26d9b1a Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Wed, 18 Sep 2019 01:58:56 +0000 Subject: [PATCH 25/46] googletest: default-disable on all of MIPS for now Parts of the fusefs tests trigger a bug in current versions of llvm: IR representation of some routine for the MIPS targets is a function with a large number of arguments. This then leads the compiler on an hour+ long goose chase, which is OK if you build the current tree but less-so if you're trying external toolchain or doing a universe build involving mips when it eventually gets switched over to LLVM. Better, accurate details can be found in LLVM PR43263. --- share/mk/src.opts.mk | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/share/mk/src.opts.mk b/share/mk/src.opts.mk index 1ed38a2a076e..fac7f8d019db 100644 --- a/share/mk/src.opts.mk +++ b/share/mk/src.opts.mk @@ -108,7 +108,6 @@ __DEFAULT_YES_OPTIONS = \ GDB \ GNU_DIFF \ GNU_GREP \ - GOOGLETEST \ GPIO \ HAST \ HTML \ @@ -260,6 +259,15 @@ __TT=${TARGET} __TT=${MACHINE} .endif +# Default GOOGLETEST to off for MIPS while LLVM PR 43263 is active. Part +# of the fusefs tests trigger excessively long compile times. It does +# eventually succeed, but this shouldn't be forced on those building by default. +.if ${__TT} == "mips" +__DEFAULT_NO_OPTIONS+= GOOGLETEST +.else +__DEFAULT_YES_OPTIONS+= GOOGLETEST +.endif + # All supported backends for LLVM_TARGET_XXX __LLVM_TARGETS= \ aarch64 \ From 47590886659f09e9981e49cd1d036d787a0d0f0f Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Wed, 18 Sep 2019 02:03:39 +0000 Subject: [PATCH 26/46] Add description for WITH_GOOGLETEST This is the logical negation of WITHOUT_GOOGLETEST, and helpful to have as we now have different per-arch defaults for this option. --- tools/build/options/WITH_GOOGLETEST | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tools/build/options/WITH_GOOGLETEST diff --git a/tools/build/options/WITH_GOOGLETEST b/tools/build/options/WITH_GOOGLETEST new file mode 100644 index 000000000000..8ed29dd7a881 --- /dev/null +++ b/tools/build/options/WITH_GOOGLETEST @@ -0,0 +1,5 @@ +.\" $FreeBSD$ +Set to build and install +.Lb libgmock , +.Lb libgtest , +and dependent tests. From 05e08cbdaccc97cf11abac3919f7b0c6ea99637b Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Wed, 18 Sep 2019 02:04:41 +0000 Subject: [PATCH 27/46] src.conf(5): regenerate after r352465, r352466 These changed the defaults for the GOOGLETEST knob and added a description for WITH_GOOGLETEST. --- share/man/man5/src.conf.5 | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index 1199f74ddd3c..c2b19b2028bf 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,6 +1,6 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. .\" $FreeBSD$ -.Dd August 16, 2019 +.Dd September 17, 2019 .Dt SRC.CONF 5 .Os .Sh NAME @@ -831,6 +831,17 @@ Set to neither build nor install .Lb libgmock , .Lb libgtest , and dependent tests. +.Pp +This is a default setting on +mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf and mips/mips64hf. +.It Va WITH_GOOGLETEST +Set to build and install +.Lb libgmock , +.Lb libgtest , +and dependent tests. +.Pp +This is a default setting on +amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and sparc64/sparc64. .It Va WITHOUT_GPIO Set to not build .Xr gpioctl 8 From d567f909ef23d362b39eb2327b1c917d7c8bbfca Mon Sep 17 00:00:00 2001 From: Baptiste Daroussin Date: Wed, 18 Sep 2019 07:57:56 +0000 Subject: [PATCH 28/46] Add native support for zstd to libarchive Note that old pkg will failed to build after this. A recent ports tree (one providing pkg 1.12+) is required to build. Older already built pkg, should continue working as expected PR: 238797 Exp run by: antoine Reviewed by: cem Approved by: cem Differential Revision: https://reviews.freebsd.org/D20752 --- Makefile.inc1 | 1 + lib/Makefile | 2 +- lib/libarchive/Makefile | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Makefile.inc1 b/Makefile.inc1 index 52af7f143520..c5c3db8318e9 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -2770,6 +2770,7 @@ _prebuild_libs= ${_kerberos5_lib_libasn1} \ lib/libfigpar \ ${_lib_libgssapi} \ lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \ + lib/libzstd \ ${_lib_casper} \ lib/ncurses/ncurses lib/ncurses/ncursesw \ lib/libopie lib/libpam/libpam ${_lib_libthr} \ diff --git a/lib/Makefile b/lib/Makefile index 33f31f7f7ed6..d5ee01327e8c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -102,7 +102,7 @@ SUBDIR= ${SUBDIR_BOOTSTRAP} \ # libraries, those libraries should be listed as build order dependencies here. SUBDIR_DEPEND_geom= libufs -SUBDIR_DEPEND_libarchive= libz libbz2 libexpat liblzma libmd +SUBDIR_DEPEND_libarchive= libz libbz2 libexpat liblzma libmd libzstd SUBDIR_DEPEND_libauditdm= libbsm SUBDIR_DEPEND_libbsnmp= ${_libnetgraph} SUBDIR_DEPEND_libc++:= libcxxrt diff --git a/lib/libarchive/Makefile b/lib/libarchive/Makefile index 5c44669f6f85..474a91f7dda1 100644 --- a/lib/libarchive/Makefile +++ b/lib/libarchive/Makefile @@ -6,8 +6,8 @@ _LIBARCHIVEDIR= ${SRCTOP}/contrib/libarchive LIB= archive -LIBADD= z bz2 lzma bsdxml -CFLAGS+= -DHAVE_BZLIB_H=1 -DHAVE_LIBLZMA=1 -DHAVE_LZMA_H=1 +LIBADD= z bz2 lzma bsdxml zstd +CFLAGS+= -DHAVE_BZLIB_H=1 -DHAVE_LIBLZMA=1 -DHAVE_LZMA_H=1 -DHAVE_ZSTD_H=1 -DHAVE_LIBZSTD=1 # FreeBSD SHLIB_MAJOR value is managed as part of the FreeBSD system. # It has no real relation to the libarchive version number. @@ -15,6 +15,7 @@ SHLIB_MAJOR= 7 CFLAGS+= -DPLATFORM_CONFIG_H=\"${.CURDIR}/config_freebsd.h\" CFLAGS+= -I${.OBJDIR} +CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib .if ${MK_OPENSSL} != "no" CFLAGS+= -DWITH_OPENSSL From 85686f3425b9fd5a3dc9c74fc7813db1623b6ba1 Mon Sep 17 00:00:00 2001 From: Baptiste Daroussin Date: Wed, 18 Sep 2019 08:02:03 +0000 Subject: [PATCH 29/46] Add the missing bits for LIBADD to properly function now that libarchive is linked to libzstd Pointy hat: bapt Reported by: antoine --- share/mk/src.libnames.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk index 5710df3214c4..1f4294cb6a9a 100644 --- a/share/mk/src.libnames.mk +++ b/share/mk/src.libnames.mk @@ -230,7 +230,7 @@ LIBVERIEXEC?= ${LIBVERIEXECDIR}/libveriexec${PIE_SUFFIX}.a # Each library's LIBADD needs to be duplicated here for static linkage of # 2nd+ order consumers. Auto-generating this would be better. _DP_80211= sbuf bsdxml -_DP_archive= z bz2 lzma bsdxml +_DP_archive= z bz2 lzma bsdxml zstd _DP_zstd= pthread .if ${MK_BLACKLIST} != "no" _DP_blacklist+= pthread From 382e01c8dc7f328f46c61c82a29222f432f510f7 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 18 Sep 2019 16:13:10 +0000 Subject: [PATCH 30/46] sysctl: use names instead of magic numbers. Replace magic numbers with symbols for internal sysctl operations. Convert in-kernel and libc consumers. Submitted by: Pawel Biernacki MFC after: 1 week Differential revision: https://reviews.freebsd.org/D21693 --- lib/libc/gen/sysctlnametomib.c | 4 +-- sys/kern/kern_sysctl.c | 61 ++++++++++++++++++---------------- sys/sys/sysctl.h | 13 +++++++- 3 files changed, 47 insertions(+), 31 deletions(-) diff --git a/lib/libc/gen/sysctlnametomib.c b/lib/libc/gen/sysctlnametomib.c index f01be37a0d77..86e02e98f9e3 100644 --- a/lib/libc/gen/sysctlnametomib.c +++ b/lib/libc/gen/sysctlnametomib.c @@ -47,8 +47,8 @@ sysctlnametomib(const char *name, int *mibp, size_t *sizep) int oid[2]; int error; - oid[0] = 0; - oid[1] = 3; + oid[0] = CTL_SYSCTL; + oid[1] = CTL_SYSCTL_NAME2OID; *sizep *= sizeof(int); error = sysctl(oid, 2, mibp, sizep, name, strlen(name)); diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 2ccfc4324fab..e7f1319eaa14 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -935,13 +935,18 @@ SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL); * (be aware though, that the proper interface isn't as obvious as it * may seem, there are various conflicting requirements. * - * {0,0} printf the entire MIB-tree. - * {0,1,...} return the name of the "..." OID. - * {0,2,...} return the next OID. - * {0,3} return the OID of the name in "new" - * {0,4,...} return the kind & format info for the "..." OID. - * {0,5,...} return the description of the "..." OID. - * {0,6,...} return the aggregation label of the "..." OID. + * {CTL_SYSCTL, CTL_SYSCTL_DEBUG} printf the entire MIB-tree. + * {CTL_SYSCTL, CTL_SYSCTL_NAME, ...} return the name of the "..." + * OID. + * {CTL_SYSCTL, CTL_SYSCTL_NEXT, ...} return the next OID. + * {CTL_SYSCTL, CTL_SYSCTL_NAME2OID} return the OID of the name in + * "new" + * {CTL_SYSCTL, CTL_SYSCTL_OIDFMT, ...} return the kind & format info + * for the "..." OID. + * {CTL_SYSCTL, CTL_SYSCTL_OIDDESCR, ...} return the description of the + * "..." OID. + * {CTL_SYSCTL, CTL_SYSCTL_OIDLABEL, ...} return the aggregation label of + * the "..." OID. */ #ifdef SYSCTL_DEBUG @@ -1009,8 +1014,8 @@ sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) return (ENOENT); } -SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, - 0, 0, sysctl_sysctl_debug, "-", ""); +SYSCTL_PROC(_sysctl, CTL_SYSCTL_DEBUG, debug, CTLTYPE_STRING | CTLFLAG_RD | + CTLFLAG_MPSAFE, 0, 0, sysctl_sysctl_debug, "-", ""); #endif static int @@ -1075,8 +1080,8 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in * capability mode. */ -static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, - sysctl_sysctl_name, ""); +static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NAME, name, CTLFLAG_RD | + CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_name, ""); static int sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, @@ -1162,8 +1167,8 @@ sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in * capability mode. */ -static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, - sysctl_sysctl_next, ""); +static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXT, next, CTLFLAG_RD | + CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, ""); static int name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) @@ -1249,9 +1254,9 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in * capability mode. */ -SYSCTL_PROC(_sysctl, 3, name2oid, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE - | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", ""); +SYSCTL_PROC(_sysctl, CTL_SYSCTL_NAME2OID, name2oid, CTLTYPE_INT | CTLFLAG_RW | + CTLFLAG_ANYBODY | CTLFLAG_MPSAFE | CTLFLAG_CAPRW, 0, 0, + sysctl_sysctl_name2oid, "I", ""); static int sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) @@ -1279,8 +1284,8 @@ sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) } -static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, - sysctl_sysctl_oidfmt, ""); +static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDFMT, oidfmt, CTLFLAG_RD | + CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, ""); static int sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) @@ -1304,8 +1309,8 @@ sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) return (error); } -static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, - sysctl_sysctl_oiddescr, ""); +static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDDESCR, oiddescr, CTLFLAG_RD | + CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, ""); static int sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS) @@ -1329,8 +1334,8 @@ sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS) return (error); } -static SYSCTL_NODE(_sysctl, 6, oidlabel, - CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, ""); +static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDLABEL, oidlabel, CTLFLAG_RD | + CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, ""); /* * Default "handler" functions. @@ -1857,8 +1862,8 @@ kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, size_t oidlen, plen; int error; - oid[0] = 0; /* sysctl internal magic */ - oid[1] = 3; /* name2oid */ + oid[0] = CTL_SYSCTL; + oid[1] = CTL_SYSCTL_NAME2OID; oidlen = sizeof(oid); error = kernel_sysctl(td, oid, 2, oid, &oidlen, @@ -2165,8 +2170,8 @@ kern___sysctlbyname(struct thread *td, const char *oname, size_t namelen, if (error != 0) goto out; - oid[0] = 0; - oid[1] = 3; + oid[0] = CTL_SYSCTL; + oid[1] = CTL_SYSCTL_NAME2OID; oidlen = sizeof(oid); error = kernel_sysctl(td, oid, 2, oid, &oidlen, (void *)name, namelen, retval, flags); @@ -2667,8 +2672,8 @@ db_show_sysctl_all(int *oid, size_t len, int flags) int name1[CTL_MAXNAME + 2], name2[CTL_MAXNAME + 2]; size_t l1, l2; - name1[0] = 0; - name1[1] = 2; + name1[0] = CTL_SYSCTL; + name1[1] = CTL_SYSCTL_NEXT; l1 = 2; if (len) { memcpy(name1+2, oid, len * sizeof(int)); diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h index 74976be5eba7..e2facf8a1339 100644 --- a/sys/sys/sysctl.h +++ b/sys/sys/sysctl.h @@ -889,7 +889,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); /* * Top-level identifiers */ -#define CTL_UNSPEC 0 /* unused */ +#define CTL_SYSCTL 0 /* "magic" numbers */ #define CTL_KERN 1 /* "high kernel": proc, limits */ #define CTL_VM 2 /* virtual memory */ #define CTL_VFS 3 /* filesystem, mount type is next */ @@ -900,6 +900,17 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); #define CTL_USER 8 /* user-level */ #define CTL_P1003_1B 9 /* POSIX 1003.1B */ +/* + * CTL_SYSCTL identifiers + */ +#define CTL_SYSCTL_DEBUG 0 /* printf all nodes */ +#define CTL_SYSCTL_NAME 1 /* string name of OID */ +#define CTL_SYSCTL_NEXT 2 /* next OID */ +#define CTL_SYSCTL_NAME2OID 3 /* int array of name */ +#define CTL_SYSCTL_OIDFMT 4 /* OID's kind and format */ +#define CTL_SYSCTL_OIDDESCR 5 /* OID's description */ +#define CTL_SYSCTL_OIDLABEL 6 /* aggregation label */ + /* * CTL_KERN identifiers */ From 230754ccca858fb50dd64cc6c013f679d9a60dfe Mon Sep 17 00:00:00 2001 From: Ruslan Bukin Date: Wed, 18 Sep 2019 16:13:50 +0000 Subject: [PATCH 31/46] Add support for BERI statcounters. BERI stands for Bluespec Extensible RISC Implementation, based on MIPS. BERI has not implemented standard MIPS perfomance monitoring counters, instead it provides statistical counters. BERI statcounters have a several limitations: - They can't be written - They don't support start/stop operation - None of hardware interrupt is provided on a counter overflow. So make it separate to hwpmc_mips module and support process/system counting mode only. Sponsored by: DARPA, AFRL --- lib/libpmc/libpmc.c | 19 ++ sys/conf/files.mips | 4 +- sys/dev/hwpmc/hwpmc_beri.c | 540 +++++++++++++++++++++++++++++++++++++ sys/dev/hwpmc/hwpmc_beri.h | 107 ++++++++ sys/dev/hwpmc/pmc_events.h | 64 +++++ sys/sys/pmc.h | 4 +- 6 files changed, 736 insertions(+), 2 deletions(-) create mode 100644 sys/dev/hwpmc/hwpmc_beri.c create mode 100644 sys/dev/hwpmc/hwpmc_beri.h diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index 5286533bacdc..6e39373c1cb4 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -143,6 +143,7 @@ PMC_CLASSDEP_TABLE(k8, K8); PMC_CLASSDEP_TABLE(xscale, XSCALE); PMC_CLASSDEP_TABLE(armv7, ARMV7); PMC_CLASSDEP_TABLE(armv8, ARMV8); +PMC_CLASSDEP_TABLE(beri, BERI); PMC_CLASSDEP_TABLE(mips24k, MIPS24K); PMC_CLASSDEP_TABLE(mips74k, MIPS74K); PMC_CLASSDEP_TABLE(octeon, OCTEON); @@ -187,6 +188,7 @@ static const struct pmc_event_descr cortex_a57_event_table[] = PMC_MDEP_TABLE(k8, K8, PMC_CLASS_SOFT, PMC_CLASS_TSC); PMC_MDEP_TABLE(xscale, XSCALE, PMC_CLASS_SOFT, PMC_CLASS_XSCALE); +PMC_MDEP_TABLE(beri, BERI, PMC_CLASS_SOFT, PMC_CLASS_BERI); PMC_MDEP_TABLE(cortex_a8, ARMV7, PMC_CLASS_SOFT, PMC_CLASS_ARMV7); PMC_MDEP_TABLE(cortex_a9, ARMV7, PMC_CLASS_SOFT, PMC_CLASS_ARMV7); PMC_MDEP_TABLE(cortex_a53, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8); @@ -235,6 +237,7 @@ PMC_CLASS_TABLE_DESC(cortex_a53, ARMV8, cortex_a53, arm64); PMC_CLASS_TABLE_DESC(cortex_a57, ARMV8, cortex_a57, arm64); #endif #if defined(__mips__) +PMC_CLASS_TABLE_DESC(beri, BERI, beri, mips); PMC_CLASS_TABLE_DESC(mips24k, MIPS24K, mips24k, mips); PMC_CLASS_TABLE_DESC(mips74k, MIPS74K, mips74k, mips); PMC_CLASS_TABLE_DESC(octeon, OCTEON, octeon, mips); @@ -829,6 +832,11 @@ arm64_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, #if defined(__mips__) +static struct pmc_event_alias beri_aliases[] = { + EV_ALIAS("instructions", "INST"), + EV_ALIAS(NULL, NULL) +}; + static struct pmc_event_alias mips24k_aliases[] = { EV_ALIAS("instructions", "INSTR_EXECUTED"), EV_ALIAS("branches", "BRANCH_COMPLETED"), @@ -1267,6 +1275,10 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames, break; } break; + case PMC_CLASS_BERI: + ev = beri_event_table; + count = PMC_EVENT_TABLE_SIZE(beri); + break; case PMC_CLASS_MIPS24K: ev = mips24k_event_table; count = PMC_EVENT_TABLE_SIZE(mips24k); @@ -1508,6 +1520,10 @@ pmc_init(void) break; #endif #if defined(__mips__) + case PMC_CPU_MIPS_BERI: + PMC_MDEP_INIT(beri); + pmc_class_table[n] = &beri_class_table_descr; + break; case PMC_CPU_MIPS_24K: PMC_MDEP_INIT(mips24k); pmc_class_table[n] = &mips24k_class_table_descr; @@ -1645,6 +1661,9 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu) default: /* Unknown CPU type. */ break; } + } else if (pe >= PMC_EV_BERI_FIRST && pe <= PMC_EV_BERI_LAST) { + ev = beri_event_table; + evfence = beri_event_table + PMC_EVENT_TABLE_SIZE(beri); } else if (pe >= PMC_EV_MIPS24K_FIRST && pe <= PMC_EV_MIPS24K_LAST) { ev = mips24k_event_table; evfence = mips24k_event_table + PMC_EVENT_TABLE_SIZE(mips24k); diff --git a/sys/conf/files.mips b/sys/conf/files.mips index b87022dd58a8..b19944e5e6a1 100644 --- a/sys/conf/files.mips +++ b/sys/conf/files.mips @@ -91,7 +91,9 @@ dev/nvram2env/nvram2env_mips.c optional nvram2env dev/nvram2env/nvram2env.c optional nvram2env # hwpmc support -dev/hwpmc/hwpmc_mips.c optional hwpmc +dev/hwpmc/hwpmc_beri.c optional hwpmc_beri +dev/hwpmc/hwpmc_mips.c optional hwpmc_mips24k | \ + hwpmc_mips74k dev/hwpmc/hwpmc_mips24k.c optional hwpmc_mips24k dev/hwpmc/hwpmc_mips74k.c optional hwpmc_mips74k diff --git a/sys/dev/hwpmc/hwpmc_beri.c b/sys/dev/hwpmc/hwpmc_beri.c new file mode 100644 index 000000000000..4fec1b950299 --- /dev/null +++ b/sys/dev/hwpmc/hwpmc_beri.c @@ -0,0 +1,540 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2019 Ruslan Bukin + * + * This software was developed by SRI International and the University of + * Cambridge Computer Laboratory (Department of Computer Science and + * Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the + * DARPA SSITH research programme. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_hwpmc_hooks.h" + +#include +#include + +#include + +#define BERI_NCOUNTERS 56 +#define BERI_PMC_CAPS (PMC_CAP_USER | PMC_CAP_SYSTEM | \ + PMC_CAP_READ | PMC_CAP_WRITE ) + +struct beri_event_code_map { + uint32_t pe_ev; /* enum value */ + uint64_t (*get_func)(void); +}; + +const struct beri_event_code_map beri_event_codes[BERI_NCOUNTERS] = { + { PMC_EV_BERI_CYCLE, + statcounters_get_cycle_count }, + { PMC_EV_BERI_INST, + statcounters_get_inst_count }, + { PMC_EV_BERI_INST_USER, + statcounters_get_inst_user_count }, + { PMC_EV_BERI_INST_KERNEL, + statcounters_get_inst_kernel_count }, + { PMC_EV_BERI_IMPRECISE_SETBOUNDS, + statcounters_get_imprecise_setbounds_count }, + { PMC_EV_BERI_UNREPRESENTABLE_CAPS, + statcounters_get_unrepresentable_caps_count }, + { PMC_EV_BERI_ITLB_MISS, + statcounters_get_itlb_miss_count }, + { PMC_EV_BERI_DTLB_MISS, + statcounters_get_dtlb_miss_count }, + { PMC_EV_BERI_ICACHE_WRITE_HIT, + statcounters_get_icache_write_hit_count }, + { PMC_EV_BERI_ICACHE_WRITE_MISS, + statcounters_get_icache_write_miss_count }, + { PMC_EV_BERI_ICACHE_READ_HIT, + statcounters_get_icache_read_hit_count }, + { PMC_EV_BERI_ICACHE_READ_MISS, + statcounters_get_icache_read_miss_count }, + { PMC_EV_BERI_ICACHE_EVICT, + statcounters_get_icache_evict_count }, + { PMC_EV_BERI_DCACHE_WRITE_HIT, + statcounters_get_dcache_write_hit_count }, + { PMC_EV_BERI_DCACHE_WRITE_MISS, + statcounters_get_dcache_write_miss_count }, + { PMC_EV_BERI_DCACHE_READ_HIT, + statcounters_get_dcache_read_hit_count }, + { PMC_EV_BERI_DCACHE_READ_MISS, + statcounters_get_dcache_read_miss_count }, + { PMC_EV_BERI_DCACHE_EVICT, + statcounters_get_dcache_evict_count }, + { PMC_EV_BERI_DCACHE_SET_TAG_WRITE, + statcounters_get_dcache_set_tag_write_count }, + { PMC_EV_BERI_DCACHE_SET_TAG_READ, + statcounters_get_dcache_set_tag_read_count }, + { PMC_EV_BERI_L2CACHE_WRITE_HIT, + statcounters_get_l2cache_write_hit_count }, + { PMC_EV_BERI_L2CACHE_WRITE_MISS, + statcounters_get_l2cache_write_miss_count }, + { PMC_EV_BERI_L2CACHE_READ_HIT, + statcounters_get_l2cache_read_hit_count }, + { PMC_EV_BERI_L2CACHE_READ_MISS, + statcounters_get_l2cache_read_miss_count }, + { PMC_EV_BERI_L2CACHE_EVICT, + statcounters_get_l2cache_evict_count }, + { PMC_EV_BERI_L2CACHE_SET_TAG_WRITE, + statcounters_get_l2cache_set_tag_write_count }, + { PMC_EV_BERI_L2CACHE_SET_TAG_READ, + statcounters_get_l2cache_set_tag_read_count }, + { PMC_EV_BERI_MEM_BYTE_READ, + statcounters_get_mem_byte_read_count }, + { PMC_EV_BERI_MEM_BYTE_WRITE, + statcounters_get_mem_byte_write_count }, + { PMC_EV_BERI_MEM_HWORD_READ, + statcounters_get_mem_hword_read_count }, + { PMC_EV_BERI_MEM_HWORD_WRITE, + statcounters_get_mem_hword_write_count }, + { PMC_EV_BERI_MEM_WORD_READ, + statcounters_get_mem_word_read_count }, + { PMC_EV_BERI_MEM_WORD_WRITE, + statcounters_get_mem_word_write_count }, + { PMC_EV_BERI_MEM_DWORD_READ, + statcounters_get_mem_dword_read_count }, + { PMC_EV_BERI_MEM_DWORD_WRITE, + statcounters_get_mem_dword_write_count }, + { PMC_EV_BERI_MEM_CAP_READ, + statcounters_get_mem_cap_read_count }, + { PMC_EV_BERI_MEM_CAP_WRITE, + statcounters_get_mem_cap_write_count }, + { PMC_EV_BERI_MEM_CAP_READ_TAG_SET, + statcounters_get_mem_cap_read_tag_set_count }, + { PMC_EV_BERI_MEM_CAP_WRITE_TAG_SET, + statcounters_get_mem_cap_write_tag_set_count }, + { PMC_EV_BERI_TAGCACHE_WRITE_HIT, + statcounters_get_tagcache_write_hit_count }, + { PMC_EV_BERI_TAGCACHE_WRITE_MISS, + statcounters_get_tagcache_write_miss_count }, + { PMC_EV_BERI_TAGCACHE_READ_HIT, + statcounters_get_tagcache_read_hit_count }, + { PMC_EV_BERI_TAGCACHE_READ_MISS, + statcounters_get_tagcache_read_miss_count }, + { PMC_EV_BERI_TAGCACHE_EVICT, + statcounters_get_tagcache_evict_count }, + { PMC_EV_BERI_L2CACHEMASTER_READ_REQ, + statcounters_get_l2cachemaster_read_req_count }, + { PMC_EV_BERI_L2CACHEMASTER_WRITE_REQ, + statcounters_get_l2cachemaster_write_req_count }, + { PMC_EV_BERI_L2CACHEMASTER_WRITE_REQ_FLIT, + statcounters_get_l2cachemaster_write_req_flit_count }, + { PMC_EV_BERI_L2CACHEMASTER_READ_RSP, + statcounters_get_l2cachemaster_read_rsp_count }, + { PMC_EV_BERI_L2CACHEMASTER_READ_RSP_FLIT, + statcounters_get_l2cachemaster_read_rsp_flit_count }, + { PMC_EV_BERI_L2CACHEMASTER_WRITE_RSP, + statcounters_get_l2cachemaster_write_rsp_count }, + { PMC_EV_BERI_TAGCACHEMASTER_READ_REQ, + statcounters_get_tagcachemaster_read_req_count }, + { PMC_EV_BERI_TAGCACHEMASTER_WRITE_REQ, + statcounters_get_tagcachemaster_write_req_count }, + { PMC_EV_BERI_TAGCACHEMASTER_WRITE_REQ_FLIT, + statcounters_get_tagcachemaster_write_req_flit_count }, + { PMC_EV_BERI_TAGCACHEMASTER_READ_RSP, + statcounters_get_tagcachemaster_read_rsp_count }, + { PMC_EV_BERI_TAGCACHEMASTER_READ_RSP_FLIT, + statcounters_get_tagcachemaster_read_rsp_flit_count }, + { PMC_EV_BERI_TAGCACHEMASTER_WRITE_RSP, + statcounters_get_tagcachemaster_write_rsp_count }, +}; + +struct mips_pmc_spec beri_pmc_spec = { + .ps_cpuclass = PMC_CLASS_BERI, + .ps_cputype = PMC_CPU_MIPS_BERI, + .ps_capabilities = BERI_PMC_CAPS, + .ps_counter_width = 64 +}; + +/* + * Per-processor information. + */ +struct beri_cpu { + struct pmc_hw *pc_beripmcs; + uint64_t start_values[BERI_NCOUNTERS]; + uint64_t stop_values[BERI_NCOUNTERS]; + uint64_t saved_values[BERI_NCOUNTERS]; +}; + +int beri_npmcs; +static struct beri_cpu **beri_pcpu; + +static int +beri_allocate_pmc(int cpu, int ri, struct pmc *pm, + const struct pmc_op_pmcallocate *a) +{ + uint32_t config; + int i; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[beri,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < beri_npmcs, + ("[beri,%d] illegal row index %d", __LINE__, ri)); + + if (a->pm_class != beri_pmc_spec.ps_cpuclass) + return (EINVAL); + + for (i = 0; i < BERI_NCOUNTERS; i++) { + if (beri_event_codes[i].pe_ev == a->pm_ev) { + config = i; + break; + } + } + + if (i == BERI_NCOUNTERS) + return (EINVAL); + + pm->pm_md.pm_mips_evsel = config; + + PMCDBG2(MDP,ALL,2,"beri-allocate ri=%d -> config=0x%x", ri, config); + + return (0); +} + +static int +beri_read_pmc(int cpu, int ri, pmc_value_t *v) +{ + uint32_t config; + struct pmc *pm; + pmc_value_t new; + pmc_value_t start_val; + pmc_value_t stop_val; + pmc_value_t saved_val; + pmc_value_t result; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[beri,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < beri_npmcs, + ("[beri,%d] illegal row index %d", __LINE__, ri)); + + pm = beri_pcpu[cpu]->pc_beripmcs[ri].phw_pmc; + config = pm->pm_md.pm_mips_evsel; + + start_val = beri_pcpu[cpu]->start_values[config]; + if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { + stop_val = beri_event_codes[config].get_func(); + } else + stop_val = beri_pcpu[cpu]->stop_values[config]; + + if (start_val <= stop_val) + result = stop_val - start_val; + else { + if (config == 0) /* CYCLE counter is 48 bit */ + result = 0x00ffffffffffffffUL; + else + result = 0xffffffffffffffffUL; + result -= start_val; + result += stop_val; + } + + saved_val = beri_pcpu[cpu]->saved_values[config]; + + *v = result + saved_val; + + return (0); +} + +static int +beri_write_pmc(int cpu, int ri, pmc_value_t v) +{ + struct pmc *pm; + uint32_t config; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[beri,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < beri_npmcs, + ("[beri,%d] illegal row-index %d", __LINE__, ri)); + + pm = beri_pcpu[cpu]->pc_beripmcs[ri].phw_pmc; + config = pm->pm_md.pm_mips_evsel; + + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + v = (1UL << (beri_pmc_spec.ps_counter_width - 1)) - v; + + PMCDBG3(MDP,WRI,1,"beri-write cpu=%d ri=%d v=%jx", cpu, ri, v); + + if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) + beri_pcpu[cpu]->saved_values[config] = 0; + else + beri_pcpu[cpu]->saved_values[config] = v; + + return (0); +} + +static int +beri_config_pmc(int cpu, int ri, struct pmc *pm) +{ + struct pmc_hw *phw; + + PMCDBG3(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[beri,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < beri_npmcs, + ("[beri,%d] illegal row-index %d", __LINE__, ri)); + + phw = &beri_pcpu[cpu]->pc_beripmcs[ri]; + + KASSERT(pm == NULL || phw->phw_pmc == NULL, + ("[beri,%d] pm=%p phw->pm=%p hwpmc not unconfigured", + __LINE__, pm, phw->phw_pmc)); + + phw->phw_pmc = pm; + + return (0); +} + +static int +beri_start_pmc(int cpu, int ri) +{ + uint32_t config; + struct pmc *pm; + struct pmc_hw *phw; + pmc_value_t v; + + phw = &beri_pcpu[cpu]->pc_beripmcs[ri]; + pm = phw->phw_pmc; + config = pm->pm_md.pm_mips_evsel; + + v = beri_event_codes[config].get_func(); + beri_pcpu[cpu]->start_values[config] = v; + + return (0); +} + +static int +beri_stop_pmc(int cpu, int ri) +{ + uint32_t config; + struct pmc *pm; + struct pmc_hw *phw; + pmc_value_t v; + + phw = &beri_pcpu[cpu]->pc_beripmcs[ri]; + pm = phw->phw_pmc; + config = pm->pm_md.pm_mips_evsel; + + v = beri_event_codes[config].get_func(); + beri_pcpu[cpu]->stop_values[config] = v; + + return (0); +} + +static int +beri_release_pmc(int cpu, int ri, struct pmc *pmc) +{ + struct pmc_hw *phw; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[beri,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < beri_npmcs, + ("[beri,%d] illegal row-index %d", __LINE__, ri)); + + phw = &beri_pcpu[cpu]->pc_beripmcs[ri]; + KASSERT(phw->phw_pmc == NULL, + ("[beri,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc)); + + return (0); +} + +static int +beri_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) +{ + struct pmc_hw *phw; + char beri_name[PMC_NAME_MAX]; + int error; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[beri,%d], illegal CPU %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < beri_npmcs, + ("[beri,%d] row-index %d out of range", __LINE__, ri)); + + phw = &beri_pcpu[cpu]->pc_beripmcs[ri]; + snprintf(beri_name, sizeof(beri_name), "MIPS-%d", ri); + if ((error = copystr(beri_name, pi->pm_name, PMC_NAME_MAX, + NULL)) != 0) + return error; + pi->pm_class = beri_pmc_spec.ps_cpuclass; + if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { + pi->pm_enabled = TRUE; + *ppmc = phw->phw_pmc; + } else { + pi->pm_enabled = FALSE; + *ppmc = NULL; + } + + return (0); +} + +static int +beri_get_config(int cpu, int ri, struct pmc **ppm) +{ + + *ppm = beri_pcpu[cpu]->pc_beripmcs[ri].phw_pmc; + + return (0); +} + +static int +beri_pmc_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) +{ + + return (0); +} + +static int +beri_pmc_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) +{ + + return (0); +} + +static int +beri_pcpu_init(struct pmc_mdep *md, int cpu) +{ + int first_ri, i; + struct pmc_cpu *pc; + struct beri_cpu *pac; + struct pmc_hw *phw; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[beri,%d] wrong cpu number %d", __LINE__, cpu)); + PMCDBG1(MDP,INI,1,"beri-init cpu=%d", cpu); + + beri_pcpu[cpu] = pac = malloc(sizeof(struct beri_cpu), M_PMC, + M_WAITOK|M_ZERO); + pac->pc_beripmcs = malloc(sizeof(struct pmc_hw) * beri_npmcs, + M_PMC, M_WAITOK|M_ZERO); + pc = pmc_pcpu[cpu]; + first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_MIPS].pcd_ri; + KASSERT(pc != NULL, ("[beri,%d] NULL per-cpu pointer", __LINE__)); + + for (i = 0, phw = pac->pc_beripmcs; i < beri_npmcs; i++, phw++) { + phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | + PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(i); + phw->phw_pmc = NULL; + pc->pc_hwpmcs[i + first_ri] = phw; + } + + return (0); +} + +static int +beri_pcpu_fini(struct pmc_mdep *md, int cpu) +{ + + return (0); +} + +struct pmc_mdep * +pmc_beri_initialize() +{ + struct pmc_mdep *pmc_mdep; + struct pmc_classdep *pcd; + + snprintf(pmc_cpuid, sizeof(pmc_cpuid), "beri"); + + beri_npmcs = 2; + + PMCDBG1(MDP,INI,1,"beri-init npmcs=%d", beri_npmcs); + + /* + * Allocate space for pointers to PMC HW descriptors and for + * the MDEP structure used by MI code. + */ + beri_pcpu = malloc(sizeof(struct beri_cpu *) * pmc_cpu_max(), M_PMC, + M_WAITOK|M_ZERO); + + /* Just one class */ + pmc_mdep = pmc_mdep_alloc(1); + + pmc_mdep->pmd_cputype = beri_pmc_spec.ps_cputype; + + pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_MIPS]; + pcd->pcd_caps = beri_pmc_spec.ps_capabilities; + pcd->pcd_class = beri_pmc_spec.ps_cpuclass; + pcd->pcd_num = beri_npmcs; + pcd->pcd_ri = pmc_mdep->pmd_npmc; + pcd->pcd_width = beri_pmc_spec.ps_counter_width; + + pcd->pcd_allocate_pmc = beri_allocate_pmc; + pcd->pcd_config_pmc = beri_config_pmc; + pcd->pcd_pcpu_fini = beri_pcpu_fini; + pcd->pcd_pcpu_init = beri_pcpu_init; + pcd->pcd_describe = beri_describe; + pcd->pcd_get_config = beri_get_config; + pcd->pcd_read_pmc = beri_read_pmc; + pcd->pcd_release_pmc = beri_release_pmc; + pcd->pcd_start_pmc = beri_start_pmc; + pcd->pcd_stop_pmc = beri_stop_pmc; + pcd->pcd_write_pmc = beri_write_pmc; + + pmc_mdep->pmd_intr = NULL; + pmc_mdep->pmd_switch_in = beri_pmc_switch_in; + pmc_mdep->pmd_switch_out = beri_pmc_switch_out; + + pmc_mdep->pmd_npmc += beri_npmcs; + + return (pmc_mdep); +} + +void +pmc_beri_finalize(struct pmc_mdep *md) +{ + +} + +struct pmc_mdep * +pmc_md_initialize() +{ + + return (pmc_beri_initialize()); +} + +void +pmc_md_finalize(struct pmc_mdep *md) +{ + + return (pmc_beri_finalize(md)); +} + +int +pmc_save_kernel_callchain(uintptr_t *cc, int nframes, + struct trapframe *tf) +{ + + return (0); +} + +int +pmc_save_user_callchain(uintptr_t *cc, int nframes, + struct trapframe *tf) +{ + + return (0); +} diff --git a/sys/dev/hwpmc/hwpmc_beri.h b/sys/dev/hwpmc/hwpmc_beri.h new file mode 100644 index 000000000000..8f9aa48c7bae --- /dev/null +++ b/sys/dev/hwpmc/hwpmc_beri.h @@ -0,0 +1,107 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2019 Alex Richardson + * + * This software was developed by SRI International and the University of + * Cambridge Computer Laboratory (Department of Computer Science and + * Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the + * DARPA SSITH research programme. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _DEV_HWPMC_HWPMC_BERI_H_ +#define _DEV_HWPMC_HWPMC_BERI_H_ + +#define STATCOUNTER_ITEM(name, X, Y) \ +static inline uint64_t statcounters_get_##name##_count(void) \ +{ \ + uint64_t ret; \ + __asm __volatile( \ + ".word (0x1f << 26) | (0x0 << 21) | \ + (12 << 16) | ("#X" << 11) | \ + ( "#Y" << 6) | 0x3b\n\t" \ + "move %0,$12" : "=r" (ret) :: "$12"); \ + return (ret); \ +} + +STATCOUNTER_ITEM(cycle,2,0) +STATCOUNTER_ITEM(inst,4,0) +STATCOUNTER_ITEM(inst_user,4,1) +STATCOUNTER_ITEM(inst_kernel,4,2) +STATCOUNTER_ITEM(imprecise_setbounds,4,3) +STATCOUNTER_ITEM(unrepresentable_caps,4,4) +STATCOUNTER_ITEM(itlb_miss,5,0) +STATCOUNTER_ITEM(dtlb_miss,6,0) +STATCOUNTER_ITEM(icache_write_hit,8,0) +STATCOUNTER_ITEM(icache_write_miss,8,1) +STATCOUNTER_ITEM(icache_read_hit,8,2) +STATCOUNTER_ITEM(icache_read_miss,8,3) +STATCOUNTER_ITEM(icache_evict,8,6) +STATCOUNTER_ITEM(dcache_write_hit,9,0) +STATCOUNTER_ITEM(dcache_write_miss,9,1) +STATCOUNTER_ITEM(dcache_read_hit,9,2) +STATCOUNTER_ITEM(dcache_read_miss,9,3) +STATCOUNTER_ITEM(dcache_evict,9,6) +STATCOUNTER_ITEM(dcache_set_tag_write,9,8) +STATCOUNTER_ITEM(dcache_set_tag_read,9,9) +STATCOUNTER_ITEM(l2cache_write_hit,10,0) +STATCOUNTER_ITEM(l2cache_write_miss,10,1) +STATCOUNTER_ITEM(l2cache_read_hit,10,2) +STATCOUNTER_ITEM(l2cache_read_miss,10,3) +STATCOUNTER_ITEM(l2cache_evict,10,6) +STATCOUNTER_ITEM(l2cache_set_tag_write,10,8) +STATCOUNTER_ITEM(l2cache_set_tag_read,10,9) +STATCOUNTER_ITEM(mem_byte_read,11,0) +STATCOUNTER_ITEM(mem_byte_write,11,1) +STATCOUNTER_ITEM(mem_hword_read,11,2) +STATCOUNTER_ITEM(mem_hword_write,11,3) +STATCOUNTER_ITEM(mem_word_read,11,4) +STATCOUNTER_ITEM(mem_word_write,11,5) +STATCOUNTER_ITEM(mem_dword_read,11,6) +STATCOUNTER_ITEM(mem_dword_write,11,7) +STATCOUNTER_ITEM(mem_cap_read,11,8) +STATCOUNTER_ITEM(mem_cap_write,11,9) +STATCOUNTER_ITEM(mem_cap_read_tag_set,11,10) +STATCOUNTER_ITEM(mem_cap_write_tag_set,11,11) +STATCOUNTER_ITEM(tagcache_write_hit,12,0) +STATCOUNTER_ITEM(tagcache_write_miss,12,1) +STATCOUNTER_ITEM(tagcache_read_hit,12,2) +STATCOUNTER_ITEM(tagcache_read_miss,12,3) +STATCOUNTER_ITEM(tagcache_evict,12,6) +STATCOUNTER_ITEM(l2cachemaster_read_req,13,0) +STATCOUNTER_ITEM(l2cachemaster_write_req,13,1) +STATCOUNTER_ITEM(l2cachemaster_write_req_flit,13,2) +STATCOUNTER_ITEM(l2cachemaster_read_rsp,13,3) +STATCOUNTER_ITEM(l2cachemaster_read_rsp_flit,13,4) +STATCOUNTER_ITEM(l2cachemaster_write_rsp,13,5) +STATCOUNTER_ITEM(tagcachemaster_read_req,14,0) +STATCOUNTER_ITEM(tagcachemaster_write_req,14,1) +STATCOUNTER_ITEM(tagcachemaster_write_req_flit,14,2) +STATCOUNTER_ITEM(tagcachemaster_read_rsp,14,3) +STATCOUNTER_ITEM(tagcachemaster_read_rsp_flit,14,4) +STATCOUNTER_ITEM(tagcachemaster_write_rsp,14,5) + +#endif /* !_DEV_HWPMC_HWPMC_BERI_H_ */ diff --git a/sys/dev/hwpmc/pmc_events.h b/sys/dev/hwpmc/pmc_events.h index f956b5f7c227..6f7282f8ea55 100644 --- a/sys/dev/hwpmc/pmc_events.h +++ b/sys/dev/hwpmc/pmc_events.h @@ -1246,6 +1246,67 @@ __PMC_EV_ALIAS("unhalted-core-cycles", IAP_ARCH_UNH_COR_CYC) #define PMC_EV_MIPS74K_FIRST PMC_EV_MIPS74K_CYCLES #define PMC_EV_MIPS74K_LAST PMC_EV_MIPS74K_WBB_25_50_FULL +#define __PMC_EV_BERI() \ + __PMC_EV(BERI, CYCLE) \ + __PMC_EV(BERI, INST) \ + __PMC_EV(BERI, INST_USER) \ + __PMC_EV(BERI, INST_KERNEL) \ + __PMC_EV(BERI, IMPRECISE_SETBOUNDS) \ + __PMC_EV(BERI, UNREPRESENTABLE_CAPS) \ + __PMC_EV(BERI, ITLB_MISS) \ + __PMC_EV(BERI, DTLB_MISS) \ + __PMC_EV(BERI, ICACHE_WRITE_HIT) \ + __PMC_EV(BERI, ICACHE_WRITE_MISS) \ + __PMC_EV(BERI, ICACHE_READ_HIT) \ + __PMC_EV(BERI, ICACHE_READ_MISS) \ + __PMC_EV(BERI, ICACHE_EVICT) \ + __PMC_EV(BERI, DCACHE_WRITE_HIT) \ + __PMC_EV(BERI, DCACHE_WRITE_MISS) \ + __PMC_EV(BERI, DCACHE_READ_HIT) \ + __PMC_EV(BERI, DCACHE_READ_MISS) \ + __PMC_EV(BERI, DCACHE_EVICT) \ + __PMC_EV(BERI, DCACHE_SET_TAG_WRITE) \ + __PMC_EV(BERI, DCACHE_SET_TAG_READ) \ + __PMC_EV(BERI, L2CACHE_WRITE_HIT) \ + __PMC_EV(BERI, L2CACHE_WRITE_MISS) \ + __PMC_EV(BERI, L2CACHE_READ_HIT) \ + __PMC_EV(BERI, L2CACHE_READ_MISS) \ + __PMC_EV(BERI, L2CACHE_EVICT) \ + __PMC_EV(BERI, L2CACHE_SET_TAG_WRITE) \ + __PMC_EV(BERI, L2CACHE_SET_TAG_READ) \ + __PMC_EV(BERI, MEM_BYTE_READ) \ + __PMC_EV(BERI, MEM_BYTE_WRITE) \ + __PMC_EV(BERI, MEM_HWORD_READ) \ + __PMC_EV(BERI, MEM_HWORD_WRITE) \ + __PMC_EV(BERI, MEM_WORD_READ) \ + __PMC_EV(BERI, MEM_WORD_WRITE) \ + __PMC_EV(BERI, MEM_DWORD_READ) \ + __PMC_EV(BERI, MEM_DWORD_WRITE) \ + __PMC_EV(BERI, MEM_CAP_READ) \ + __PMC_EV(BERI, MEM_CAP_WRITE) \ + __PMC_EV(BERI, MEM_CAP_READ_TAG_SET) \ + __PMC_EV(BERI, MEM_CAP_WRITE_TAG_SET) \ + __PMC_EV(BERI, TAGCACHE_WRITE_HIT) \ + __PMC_EV(BERI, TAGCACHE_WRITE_MISS) \ + __PMC_EV(BERI, TAGCACHE_READ_HIT) \ + __PMC_EV(BERI, TAGCACHE_READ_MISS) \ + __PMC_EV(BERI, TAGCACHE_EVICT) \ + __PMC_EV(BERI, L2CACHEMASTER_READ_REQ) \ + __PMC_EV(BERI, L2CACHEMASTER_WRITE_REQ) \ + __PMC_EV(BERI, L2CACHEMASTER_WRITE_REQ_FLIT) \ + __PMC_EV(BERI, L2CACHEMASTER_READ_RSP) \ + __PMC_EV(BERI, L2CACHEMASTER_READ_RSP_FLIT) \ + __PMC_EV(BERI, L2CACHEMASTER_WRITE_RSP) \ + __PMC_EV(BERI, TAGCACHEMASTER_READ_REQ) \ + __PMC_EV(BERI, TAGCACHEMASTER_WRITE_REQ) \ + __PMC_EV(BERI, TAGCACHEMASTER_WRITE_REQ_FLIT) \ + __PMC_EV(BERI, TAGCACHEMASTER_READ_RSP) \ + __PMC_EV(BERI, TAGCACHEMASTER_READ_RSP_FLIT) \ + __PMC_EV(BERI, TAGCACHEMASTER_WRITE_RSP) + +#define PMC_EV_BERI_FIRST PMC_EV_BERI_CYCLE +#define PMC_EV_BERI_LAST PMC_EV_BERI_TAGCACHEMASTER_WRITE_RSP + /* * Cavium Octeon counters. Obtained from cvmx-core.h */ @@ -1774,6 +1835,7 @@ __PMC_EV_ALIAS("unhalted-core-cycles", IAP_ARCH_UNH_COR_CYC) * 0x11300 0x00FF MIPS 24K events * 0x11400 0x00FF Octeon events * 0x11500 0x00FF MIPS 74K events + * 0x11600 0x00FF BERI statcounters * 0x13000 0x00FF MPC7450 events * 0x13100 0x00FF IBM PPC970 events * 0x13300 0x00FF Freescale e500 events @@ -1798,6 +1860,8 @@ __PMC_EV_ALIAS("unhalted-core-cycles", IAP_ARCH_UNH_COR_CYC) __PMC_EV_OCTEON() \ __PMC_EV_BLOCK(MIPS74K, 0x11500) \ __PMC_EV_MIPS74K() \ + __PMC_EV_BLOCK(BERI, 0x11600) \ + __PMC_EV_BERI() \ __PMC_EV_BLOCK(UCP, 0x12080) \ __PMC_EV_UCP() \ __PMC_EV_BLOCK(PPC7450, 0x13000) \ diff --git a/sys/sys/pmc.h b/sys/sys/pmc.h index be4c1cda03b0..f7831f0ec4b2 100644 --- a/sys/sys/pmc.h +++ b/sys/sys/pmc.h @@ -114,6 +114,7 @@ extern char pmc_cpuid[PMC_CPUID_LEN]; __PMC_CPU(MIPS_24K, 0x200, "MIPS 24K") \ __PMC_CPU(MIPS_OCTEON, 0x201, "Cavium Octeon") \ __PMC_CPU(MIPS_74K, 0x202, "MIPS 74K") \ + __PMC_CPU(MIPS_BERI, 0x203, "BERI") \ __PMC_CPU(PPC_7450, 0x300, "PowerPC MPC7450") \ __PMC_CPU(PPC_E500, 0x340, "PowerPC e500 Core") \ __PMC_CPU(PPC_970, 0x380, "IBM PowerPC 970") \ @@ -160,7 +161,8 @@ enum pmc_cputype { __PMC_CLASS(ARMV7, 0x10, "ARMv7") \ __PMC_CLASS(ARMV8, 0x11, "ARMv8") \ __PMC_CLASS(MIPS74K, 0x12, "MIPS 74K") \ - __PMC_CLASS(E500, 0x13, "Freescale e500 class") + __PMC_CLASS(E500, 0x13, "Freescale e500 class") \ + __PMC_CLASS(BERI, 0x14, "MIPS BERI") enum pmc_class { #undef __PMC_CLASS From 43ce0d902c5aa8ce5dbbbc8b0809e0690e422c1c Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 18 Sep 2019 16:15:05 +0000 Subject: [PATCH 32/46] truss: decode sysctl names. Submitted by: Pawel Biernacki MFC after: 1 week Differential revision: https://reviews.freebsd.org/D21688 --- usr.bin/truss/syscall.h | 1 + usr.bin/truss/syscalls.c | 72 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/usr.bin/truss/syscall.h b/usr.bin/truss/syscall.h index 3ac2c1b7f4ac..74fbcb7b2545 100644 --- a/usr.bin/truss/syscall.h +++ b/usr.bin/truss/syscall.h @@ -131,6 +131,7 @@ enum Argtype { Sockprotocol, Socktype, Sysarch, + Sysctl, Umtxop, Waitoptions, Whence, diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index 9b883ff558a9..e6a1fbd21bf8 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include #define _WANT_FREEBSD11_STAT #include +#include #include #include #include @@ -506,6 +507,12 @@ static struct syscall decoded_syscalls[] = { .args = { { Name, 0 }, { Atfd, 1 }, { Name, 2 } } }, { .name = "sysarch", .ret_type = 1, .nargs = 2, .args = { { Sysarch, 0 }, { Ptr, 1 } } }, + { .name = "__sysctl", .ret_type = 1, .nargs = 6, + .args = { { Sysctl, 0 }, { Sizet, 1 }, { Ptr, 2 }, { Ptr, 3 }, + { Ptr, 4 }, { Sizet, 5 } } }, + { .name = "__sysctlbyname", .ret_type = 1, .nargs = 6, + .args = { { Name, 0 }, { Sizet, 1 }, { Ptr, 2 }, { Ptr, 3 }, + { Ptr, 4}, { Sizet, 5 } } }, { .name = "thr_kill", .ret_type = 1, .nargs = 2, .args = { { Long, 0 }, { Signal, 1 } } }, { .name = "thr_self", .ret_type = 1, .nargs = 1, @@ -1551,6 +1558,15 @@ print_cmsgs(FILE *fp, pid_t pid, bool receive, struct msghdr *msghdr) free(cmsgbuf); } +static void +print_sysctl_oid(FILE *fp, int *oid, int len) +{ + int i; + + for (i = 0; i < len; i++) + fprintf(fp, ".%d", oid[i]); +} + /* * Converts a syscall argument into a string. Said string is * allocated via malloc(), so needs to be free()'d. sc is @@ -2267,6 +2283,62 @@ print_arg(struct syscall_args *sc, unsigned long *args, register_t *retval, print_integer_arg(sysdecode_sysarch_number, fp, args[sc->offset]); break; + case Sysctl: { + char name[BUFSIZ]; + int oid[CTL_MAXNAME + 2], qoid[CTL_MAXNAME + 2]; + size_t i; + int len; + + memset(name, 0, sizeof(name)); + len = args[sc->offset + 1]; + if (get_struct(pid, (void *)args[sc->offset], oid, + len * sizeof(oid[0])) != -1) { + fprintf(fp, "\""); + if (oid[0] == CTL_SYSCTL) { + fprintf(fp, "sysctl."); + switch (oid[1]) { + case CTL_SYSCTL_DEBUG: + fprintf(fp, "debug"); + break; + case CTL_SYSCTL_NAME: + fprintf(fp, "name"); + print_sysctl_oid(fp, oid + 2, len - 2); + break; + case CTL_SYSCTL_NEXT: + fprintf(fp, "next"); + break; + case CTL_SYSCTL_NAME2OID: + fprintf(fp, "name2oid"); + break; + case CTL_SYSCTL_OIDFMT: + fprintf(fp, "oidfmt"); + print_sysctl_oid(fp, oid + 2, len - 2); + break; + case CTL_SYSCTL_OIDDESCR: + fprintf(fp, "oiddescr"); + print_sysctl_oid(fp, oid + 2, len - 2); + break; + case CTL_SYSCTL_OIDLABEL: + fprintf(fp, "oidlabel"); + print_sysctl_oid(fp, oid + 2, len - 2); + break; + default: + print_sysctl_oid(fp, oid + 1, len - 1); + } + } else { + qoid[0] = CTL_SYSCTL; + qoid[1] = CTL_SYSCTL_NAME; + memcpy(qoid + 2, oid, len * sizeof(int)); + i = sizeof(name); + if (sysctl(qoid, len + 2, name, &i, 0, 0) == -1) + print_sysctl_oid(fp, qoid + 2, len); + else + fprintf(fp, "%s", name); + } + fprintf(fp, "\""); + } + break; + } case PipeFds: /* * The pipe() system call in the kernel returns its From f4897c94dd1ff87fb13327df468eed7575affa67 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 18 Sep 2019 19:33:08 +0000 Subject: [PATCH 33/46] Fix typo, setting hidden flag instead of reparse. Submitted by: Ryan Moeller MFC after: 3 days Sponsored by: iXsystems, Inc. --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index d79cd2a2c0ca..54f5482aa773 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -5202,7 +5202,7 @@ zfs_freebsd_setattr(ap) FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, xvap.xva_xoptattrs.xoa_hidden); FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, - xvap.xva_xoptattrs.xoa_hidden); + xvap.xva_xoptattrs.xoa_reparse); FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, xvap.xva_xoptattrs.xoa_offline); FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, From 416e2de337187ada6eb808d5135448e56e2a3ed0 Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Wed, 18 Sep 2019 21:00:32 +0000 Subject: [PATCH 34/46] Add some tests for page fault signals and codes It is useful to have some tests for page fault signals. More tests would be useful but creating the conditions (such as various kinds of running out of memory and I/O errors) is more complicated. The tests page_fault_signal__bus_objerr_1 and page_fault_signal__bus_objerr_2 depend on https://reviews.freebsd.org/D21566 before they can pass. PR: 211924 Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D21624 --- tests/sys/vm/Makefile | 3 +- tests/sys/vm/page_fault_signal.c | 184 +++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 tests/sys/vm/page_fault_signal.c diff --git a/tests/sys/vm/Makefile b/tests/sys/vm/Makefile index 2ba6875f4b2e..c2c95d25407f 100644 --- a/tests/sys/vm/Makefile +++ b/tests/sys/vm/Makefile @@ -5,6 +5,7 @@ PACKAGE= tests TESTSDIR= ${TESTSBASE}/sys/vm ATF_TESTS_C+= mlock_test \ - mmap_test + mmap_test \ + page_fault_signal .include diff --git a/tests/sys/vm/page_fault_signal.c b/tests/sys/vm/page_fault_signal.c new file mode 100644 index 000000000000..f5701d67c8ba --- /dev/null +++ b/tests/sys/vm/page_fault_signal.c @@ -0,0 +1,184 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2019 Jilles Tjoelker + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include + +#include +#include +#include +#include +#include + +static sigjmp_buf sig_env; +static volatile int last_sig, last_code; + +static void +sighandler(int sig, siginfo_t *info, void *context __unused) +{ + + last_sig = sig; + last_code = info->si_code; + siglongjmp(sig_env, 1); +} + +static void +setup_signals(void) +{ + struct sigaction sa; + int r; + + sa.sa_sigaction = sighandler; + sa.sa_flags = SA_RESTART | SA_RESETHAND | SA_SIGINFO; + r = sigfillset(&sa.sa_mask); + ATF_REQUIRE(r != -1); + r = sigaction(SIGILL, &sa, NULL); + ATF_REQUIRE(r != -1); + r = sigaction(SIGBUS, &sa, NULL); + ATF_REQUIRE(r != -1); + r = sigaction(SIGSEGV, &sa, NULL); + ATF_REQUIRE(r != -1); +} + +ATF_TC_WITHOUT_HEAD(page_fault_signal__segv_maperr_1); +ATF_TC_BODY(page_fault_signal__segv_maperr_1, tc) +{ + int *p; + int r; + int sz; + + sz = getpagesize(); + p = mmap(NULL, sz, PROT_READ, MAP_ANON, -1, 0); + ATF_REQUIRE(p != MAP_FAILED); + r = munmap(p, sz); + ATF_REQUIRE(r != -1); + if (sigsetjmp(sig_env, 1) == 0) { + setup_signals(); + *(volatile int *)p = 1; + } + ATF_CHECK_EQ(SIGSEGV, last_sig); + ATF_CHECK_EQ(SEGV_MAPERR, last_code); +} + +ATF_TC_WITHOUT_HEAD(page_fault_signal__segv_accerr_1); +ATF_TC_BODY(page_fault_signal__segv_accerr_1, tc) +{ + int *p; + int sz; + + sz = getpagesize(); + p = mmap(NULL, sz, PROT_READ, MAP_ANON, -1, 0); + ATF_REQUIRE(p != MAP_FAILED); + if (sigsetjmp(sig_env, 1) == 0) { + setup_signals(); + *(volatile int *)p = 1; + } + (void)munmap(p, sz); + ATF_CHECK_EQ(SIGSEGV, last_sig); + ATF_CHECK_EQ(SEGV_ACCERR, last_code); +} + +ATF_TC_WITHOUT_HEAD(page_fault_signal__segv_accerr_2); +ATF_TC_BODY(page_fault_signal__segv_accerr_2, tc) +{ + int *p; + volatile int dummy; + int sz; + + sz = getpagesize(); + p = mmap(NULL, sz, PROT_NONE, MAP_ANON, -1, 0); + ATF_REQUIRE(p != MAP_FAILED); + if (sigsetjmp(sig_env, 1) == 0) { + setup_signals(); + dummy = *p; + } + (void)munmap(p, sz); + ATF_CHECK_EQ(SIGSEGV, last_sig); + ATF_CHECK_EQ(SEGV_ACCERR, last_code); +} + +ATF_TC_WITHOUT_HEAD(page_fault_signal__bus_objerr_1); +ATF_TC_BODY(page_fault_signal__bus_objerr_1, tc) +{ + int *p; + int fd; + int sz; + + atf_tc_expect_fail("bug 211924"); + sz = getpagesize(); + fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, 0600); + ATF_REQUIRE(fd != -1); + p = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + ATF_REQUIRE(p != MAP_FAILED); + if (sigsetjmp(sig_env, 1) == 0) { + setup_signals(); + *(volatile int *)p = 1; + } + (void)munmap(p, sz); + (void)close(fd); + ATF_CHECK_EQ(SIGBUS, last_sig); + ATF_CHECK_EQ(BUS_OBJERR, last_code); +} + +ATF_TC_WITHOUT_HEAD(page_fault_signal__bus_objerr_2); +ATF_TC_BODY(page_fault_signal__bus_objerr_2, tc) +{ + int *p; + int fd; + int r; + int sz; + + atf_tc_expect_fail("bug 211924"); + sz = getpagesize(); + fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, 0600); + ATF_REQUIRE(fd != -1); + r = ftruncate(fd, sz); + ATF_REQUIRE(r != -1); + p = mmap(NULL, sz * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + ATF_REQUIRE(p != MAP_FAILED); + if (sigsetjmp(sig_env, 1) == 0) { + setup_signals(); + ((volatile int *)p)[sz / sizeof(int)] = 1; + } + (void)munmap(p, sz * 2); + (void)close(fd); + ATF_CHECK_EQ(SIGBUS, last_sig); + ATF_CHECK_EQ(BUS_OBJERR, last_code); +} + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, page_fault_signal__segv_maperr_1); + ATF_TP_ADD_TC(tp, page_fault_signal__segv_accerr_1); + ATF_TP_ADD_TC(tp, page_fault_signal__segv_accerr_2); + ATF_TP_ADD_TC(tp, page_fault_signal__bus_objerr_1); + ATF_TP_ADD_TC(tp, page_fault_signal__bus_objerr_2); + + return (atf_no_error()); +} From 4a9c211af5425f6a484aff67e446941b7f191199 Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Thu, 19 Sep 2019 07:28:24 +0000 Subject: [PATCH 35/46] sys/vm/vm_glue.c: Incorrect function name in panic string Use __func__ to avoid this issue in the future. Submitted by: Wuyang Chung Reviewed by: markj, emaste Obtained from: https://github.com/freebsd/freebsd/pull/410 --- sys/vm/vm_glue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index be4c85c20000..7f35f2d9c479 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -320,7 +320,7 @@ vm_thread_stack_create(struct domainset *ds, vm_object_t *ksobjp, int pages) ks = kva_alloc((pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); #endif if (ks == 0) { - printf("vm_thread_new: kstack allocation failed\n"); + printf("%s: kstack allocation failed\n", __func__); vm_object_deallocate(ksobj); return (0); } @@ -362,7 +362,7 @@ vm_thread_stack_dispose(vm_object_t ksobj, vm_offset_t ks, int pages) for (i = 0; i < pages; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) - panic("vm_thread_dispose: kstack already missing?"); + panic("%s: kstack already missing?", __func__); vm_page_unwire_noq(m); vm_page_free(m); } From 9a2ed10014863e84ee404f8b62dcbdbd7f01fb45 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Thu, 19 Sep 2019 09:22:45 +0000 Subject: [PATCH 36/46] vt: fix problems with trying to switch to a closed VT If there is an attempt to switch from a process-owned VT to a closed VT, then vt(4) first requests the process to release its VT and only then realizes that the target VT is closed and, so, the switch is not possible. So, the driver does not actually do any switch, but at the same time the owning process is not notified about that and it does not re-acquire the VT. This change adds an early check for the target VT state, so that the switch can be refused before the process coordination dance. On top of that, the code now checks for a failure of vt_window_switch() and calls vt_window_postswitch() for the current VT if it is in the process mode. Test Plan: - configure VT1 - VT8 (ttyv0 - ttyv7) to be text consoles (run getty) - configure VT9 (ttyv8) to rn X server - make sure that the X server configuration allows VT switching - leave VT10 - VT12 unconfigured - while in the X server press Ctrl+Alt+F10 - without the patch, observe strange screen content and problems with keyboard input - with the patch, observe that nothing happens The problem has been observed and the fix has been tested with an nVidia graphics card and the proprietary nvidia driver. Not sure if that matters. Reviewed by: ray MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D21704 --- sys/dev/vt/vt_core.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c index 2f9413aaffd2..87bcc3a61a1e 100644 --- a/sys/dev/vt/vt_core.c +++ b/sys/dev/vt/vt_core.c @@ -335,7 +335,7 @@ static void vt_switch_timer(void *arg) { - vt_late_window_switch((struct vt_window *)arg); + (void)vt_late_window_switch((struct vt_window *)arg); } static int @@ -457,13 +457,22 @@ vt_window_postswitch(struct vt_window *vw) static int vt_late_window_switch(struct vt_window *vw) { + struct vt_window *curvw; int ret; callout_stop(&vw->vw_proc_dead_timer); ret = vt_window_switch(vw); - if (ret) + if (ret != 0) { + /* + * If the switch hasn't happened, then return the VT + * to the current owner, if any. + */ + curvw = vw->vw_device->vd_curwindow; + if (curvw->vw_smode.mode == VT_PROCESS) + (void)vt_window_postswitch(curvw); return (ret); + } /* Notify owner process about terminal availability. */ if (vw->vw_smode.mode == VT_PROCESS) { @@ -509,6 +518,19 @@ vt_proc_window_switch(struct vt_window *vw) return (0); /* success */ } + /* + * Early check for an attempt to switch to a non-functional VT. + * The same check is done in vt_window_switch(), but it's better + * to fail as early as possible to avoid needless pre-switch + * actions. + */ + VT_LOCK(vd); + if ((vw->vw_flags & (VWF_OPENED|VWF_CONSOLE)) == 0) { + VT_UNLOCK(vd); + return (EINVAL); + } + VT_UNLOCK(vd); + /* Ask current process permission to switch away. */ if (curvw->vw_smode.mode == VT_PROCESS) { DPRINTF(30, "%s: VT_PROCESS ", __func__); @@ -1792,7 +1814,7 @@ finish_vt_rel(struct vt_window *vw, int release, int *s) vw->vw_flags &= ~VWF_SWWAIT_REL; if (release) { callout_drain(&vw->vw_proc_dead_timer); - vt_late_window_switch(vw->vw_switch_to); + (void)vt_late_window_switch(vw->vw_switch_to); } return (0); } From 6caa629e735a800bf034dc5821c2516bb35ce28b Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Thu, 19 Sep 2019 09:43:56 +0000 Subject: [PATCH 37/46] fix dsl_scan_ds_clone_swapped logic It was incorrect with respect to swapping dataset IDs both in the on-disk ZAP object and the in-memory queue. In both cases, if only ds1 was already present, then it would be first replaced with ds2 and then ds2 would be replaced back with ds1. Also, both cases did not properly handle a situation where both ds1 and ds2 are already queued. A duplicate insertion would be attempted and its failure would result in a panic. This change has also been submitted to ZoL as zfsonlinux/zfs@dd262c9 PR: 239566 Reported by: pascal.guitierrez@gmail.com MFC after: 4 days Sponsored by: CyberSecure --- .../opensolaris/uts/common/fs/zfs/dsl_scan.c | 102 ++++++++++++------ 1 file changed, 70 insertions(+), 32 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c index a32df14e7507..2966df66da85 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c @@ -2014,16 +2014,17 @@ ds_clone_swapped_bookmark(dsl_dataset_t *ds1, dsl_dataset_t *ds2, } /* - * Called when a parent dataset and its clone are swapped. If we were + * Called when an origin dataset and its clone are swapped. If we were * currently traversing the dataset, we need to switch to traversing the - * newly promoted parent. + * newly promoted clone. */ void dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) { dsl_pool_t *dp = ds1->ds_dir->dd_pool; dsl_scan_t *scn = dp->dp_scan; - uint64_t mintxg; + uint64_t mintxg1, mintxg2; + boolean_t ds1_queued, ds2_queued; if (!dsl_scan_is_running(scn)) return; @@ -2031,44 +2032,81 @@ dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys.scn_bookmark); ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys_cached.scn_bookmark); - if (scan_ds_queue_contains(scn, ds1->ds_object, &mintxg)) { - scan_ds_queue_remove(scn, ds1->ds_object); - scan_ds_queue_insert(scn, ds2->ds_object, mintxg); + /* + * Handle the in-memory scan queue. + */ + ds1_queued = scan_ds_queue_contains(scn, ds1->ds_object, &mintxg1); + ds2_queued = scan_ds_queue_contains(scn, ds2->ds_object, &mintxg2); + + /* Sanity checking. */ + if (ds1_queued) { + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); } - if (scan_ds_queue_contains(scn, ds2->ds_object, &mintxg)) { - scan_ds_queue_remove(scn, ds2->ds_object); - scan_ds_queue_insert(scn, ds1->ds_object, mintxg); + if (ds2_queued) { + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); } - if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds1->ds_object, &mintxg) == 0) { - int err; - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + if (ds1_queued && ds2_queued) { + /* + * If both are queued, we don't need to do anything. + * The swapping code below would not handle this case correctly, + * since we can't insert ds2 if it is already there. That's + * because scan_ds_queue_insert() prohibits a duplicate insert + * and panics. + */ + } else if (ds1_queued) { + scan_ds_queue_remove(scn, ds1->ds_object); + scan_ds_queue_insert(scn, ds2->ds_object, mintxg1); + } else if (ds2_queued) { + scan_ds_queue_remove(scn, ds2->ds_object); + scan_ds_queue_insert(scn, ds1->ds_object, mintxg2); + } + + /* + * Handle the on-disk scan queue. + * The on-disk state is an out-of-date version of the in-memory state, + * so the in-memory and on-disk values for ds1_queued and ds2_queued may + * be different. Therefore we need to apply the swap logic to the + * on-disk state independently of the in-memory state. + */ + ds1_queued = zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds1->ds_object, &mintxg1) == 0; + ds2_queued = zap_lookup_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg2) == 0; + + /* Sanity checking. */ + if (ds1_queued) { + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + if (ds2_queued) { + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); + } + + if (ds1_queued && ds2_queued) { + /* + * If both are queued, we don't need to do anything. + * Alternatively, we could check for EEXIST from + * zap_add_int_key() and back out to the original state, but + * that would be more work than checking for this case upfront. + */ + } else if (ds1_queued) { + VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds1->ds_object, tx)); - err = zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx); - VERIFY(err == 0 || err == EEXIST); - if (err == EEXIST) { - /* Both were there to begin with */ - VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, - ds1->ds_object, mintxg, tx)); - } + VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg1, tx)); zfs_dbgmsg("clone_swap ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds1->ds_object, (u_longlong_t)ds2->ds_object); - } - if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds2->ds_object, &mintxg) == 0) { - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); - ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); - VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, + } else if (ds2_queued) { + VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds2->ds_object, tx)); - VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx)); + VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset, + scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg2, tx)); zfs_dbgmsg("clone_swap ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds2->ds_object, From dfb2b9a361c92691848f6f4473781304d8061bfa Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Thu, 19 Sep 2019 09:48:01 +0000 Subject: [PATCH 38/46] update zfs send usage help with r352447 MFC after: 3 days --- cddl/contrib/opensolaris/cmd/zfs/zfs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c index 65dd40e33316..bb5a2a94ccc0 100644 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c +++ b/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c @@ -290,7 +290,7 @@ get_usage(zfs_help_t idx) case HELP_SEND: return (gettext("\tsend [-DnPpRvLec] [-[iI] snapshot] " "\n" - "\tsend [-Le] [-i snapshot|bookmark] " + "\tsend [-LPcenv] [-i snapshot|bookmark] " "\n" "\tsend [-nvPe] -t \n")); case HELP_SET: From e7a541b0b99622dcce21fb087da016a948fcbb60 Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Thu, 19 Sep 2019 10:22:29 +0000 Subject: [PATCH 39/46] When processing an incoming IPv6 packet over the loopback interface which contains Hop-by-Hop options, the mbuf chain is potentially changed in ip6_hopopts_input(), called by ip6_input_hbh(). This can happen, because of the the use of IP6_EXTHDR_CHECK, which might call m_pullup(). So provide the updated pointer back to the called of ip6_input_hbh() to avoid using a freed mbuf chain in`ip6_input()`. Reviewed by: markj@ MFC after: 3 days Sponsored by: Netflix, Inc. Differential Revision: https://reviews.freebsd.org/D21664 --- sys/netinet6/ip6_input.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 626e4f0b2018..60a6c501158f 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -403,20 +403,22 @@ VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL); #endif static int -ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off, +ip6_input_hbh(struct mbuf **mp, uint32_t *plen, uint32_t *rtalert, int *off, int *nxt, int *ours) { + struct mbuf *m; struct ip6_hdr *ip6; struct ip6_hbh *hbh; - if (ip6_hopopts_input(plen, rtalert, &m, off)) { + if (ip6_hopopts_input(plen, rtalert, mp, off)) { #if 0 /*touches NULL pointer*/ - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); + in6_ifstat_inc((*mp)->m_pkthdr.rcvif, ifs6_in_discard); #endif goto out; /* m have already been freed */ } /* adjust pointer */ + m = *mp; ip6 = mtod(m, struct ip6_hdr *); /* @@ -854,7 +856,7 @@ ip6_input(struct mbuf *m) */ plen = (u_int32_t)ntohs(ip6->ip6_plen); if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { - if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0) + if (ip6_input_hbh(&m, &plen, &rtalert, &off, &nxt, &ours) != 0) return; } else nxt = ip6->ip6_nxt; From dd3121a8957d299335c8cd05cef0e8c6e0d8baf5 Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Thu, 19 Sep 2019 10:27:47 +0000 Subject: [PATCH 40/46] When the RACK stack computes the space for user data in a TCP segment, it wasn't taking the IP level options into account. This patch fixes this. In addition, it also corrects a KASSERT and adds protection code to assure that the IP header chain and the TCP head fit in the first fragment as required by RFC 7112. Reviewed by: rrs@ MFC after: 3 days Sponsored by: Nertflix, Inc. Differential Revision: https://reviews.freebsd.org/D21666 --- sys/netinet/tcp_stacks/rack.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index d70455c2f191..7ef1f3cc7832 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -7840,7 +7840,16 @@ rack_output(struct tcpcb *tp) hdrlen += sizeof(struct udphdr); } #endif - ipoptlen = 0; +#ifdef INET6 + if (isipv6) + ipoptlen = ip6_optlen(tp->t_inpcb); + else +#endif + if (tp->t_inpcb->inp_options) + ipoptlen = tp->t_inpcb->inp_options->m_len - + offsetof(struct ipoption, ipopt_list); + else + ipoptlen = 0; #if defined(IPSEC) || defined(IPSEC_SUPPORT) ipoptlen += ipsec_optlen; #endif @@ -7913,6 +7922,18 @@ rack_output(struct tcpcb *tp) sendalot = 1; } else { + if (optlen + ipoptlen > tp->t_maxseg) { + /* + * Since we don't have enough space to put + * the IP header chain and the TCP header in + * one packet as required by RFC 7112, don't + * send it. + */ + SOCKBUF_UNLOCK(&so->so_snd); + error = EMSGSIZE; + sack_rxmit = 0; + goto out; + } len = tp->t_maxseg - optlen - ipoptlen; sendalot = 1; } @@ -8414,15 +8435,9 @@ rack_output(struct tcpcb *tp) m->m_pkthdr.csum_flags |= CSUM_TSO; m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen; } -#if defined(IPSEC) || defined(IPSEC_SUPPORT) - KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL), - ("%s: mbuf chain shorter than expected: %d + %u + %u - %u != %u", - __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL))); -#else - KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL), - ("%s: mbuf chain shorter than expected: %d + %u + %u != %u", - __func__, len, hdrlen, ipoptlen, m_length(m, NULL))); -#endif + KASSERT(len + hdrlen == m_length(m, NULL), + ("%s: mbuf chain different than expected: %d + %u != %u", + __func__, len, hdrlen, m_length(m, NULL))); #ifdef TCP_HHOOK /* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */ From 18a52cf418eb84a71d1b58b09763f997ccea2506 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Thu, 19 Sep 2019 11:34:35 +0000 Subject: [PATCH 41/46] freebsd-update.8: appease igor igor follows American style guides in the belief that abbreviations i.e. and e.g. are always followed by a comma. Make that change now so that future updates to freebsd-update.8 do not complain about this. Submitted by: grembo Event: EuroBSDCon Norway FreeBSD DevSummit --- usr.sbin/freebsd-update/freebsd-update.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/freebsd-update/freebsd-update.8 b/usr.sbin/freebsd-update/freebsd-update.8 index ea9bd5b279f2..7a4350f5451a 100644 --- a/usr.sbin/freebsd-update/freebsd-update.8 +++ b/usr.sbin/freebsd-update/freebsd-update.8 @@ -95,7 +95,7 @@ Trust an RSA key with SHA256 of .Ar KEY . (default: read value from configuration file.) .It Fl r Ar newrelease -Specify the new release (e.g. 11.2-RELEASE) to which +Specify the new release (e.g., 11.2-RELEASE) to which .Nm should upgrade (upgrade command only). .It Fl s Ar server From 75cb6429f5ae7ead3c83a7a531561c9b36bb7dcc Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Thu, 19 Sep 2019 11:46:43 +0000 Subject: [PATCH 42/46] freebsd-update: make usage output consistent Drop trailing . which appeared only on description of IDS. Submitted by: grembo Event: EuroBSDCon Norway FreeBSD DevSummit --- usr.sbin/freebsd-update/freebsd-update.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/freebsd-update/freebsd-update.sh b/usr.sbin/freebsd-update/freebsd-update.sh index 51085d117e50..7c11d61f70ff 100644 --- a/usr.sbin/freebsd-update/freebsd-update.sh +++ b/usr.sbin/freebsd-update/freebsd-update.sh @@ -64,7 +64,7 @@ Commands: upgrade -- Fetch upgrades to FreeBSD version specified via -r option install -- Install downloaded updates or upgrades rollback -- Uninstall most recently installed updates - IDS -- Compare the system against an index of "known good" files. + IDS -- Compare the system against an index of "known good" files EOF exit 0 } From e7512153416d3a9672aebd95dfe517cf490bc28e Mon Sep 17 00:00:00 2001 From: Li-Wen Hsu Date: Thu, 19 Sep 2019 13:23:25 +0000 Subject: [PATCH 43/46] Temporarily add test_write_filter_zstd BROKEN_TESTS as it always fails in CI There is no trivial way to mark single libarchive test skip currently so just add it to BROKEN_TESTS for now. PR: 240683 Sponsored by: The FreeBSD Foundation --- lib/libarchive/tests/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/libarchive/tests/Makefile b/lib/libarchive/tests/Makefile index dd1a03e6276f..4371fc00710b 100644 --- a/lib/libarchive/tests/Makefile +++ b/lib/libarchive/tests/Makefile @@ -305,6 +305,9 @@ BROKEN_TESTS+= test_read_disk_directory_traversals # (Times out?) [and] crashes BROKEN_TESTS+= test_fuzz_rar +# https://bugs.freebsd.org/240683 +BROKEN_TESTS+= test_write_filter_zstd + # Build the test program. SRCS.libarchive_test= \ ${TESTS_SRCS} \ From 05a42b76216b79c108af2957bb7c55897673f03b Mon Sep 17 00:00:00 2001 From: Li-Wen Hsu Date: Thu, 19 Sep 2019 13:25:19 +0000 Subject: [PATCH 44/46] Whitespace cleanup, no functional change Sponsored by: The FreeBSD Foundation --- lib/libarchive/tests/Makefile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/libarchive/tests/Makefile b/lib/libarchive/tests/Makefile index 4371fc00710b..265aa4541bf6 100644 --- a/lib/libarchive/tests/Makefile +++ b/lib/libarchive/tests/Makefile @@ -520,12 +520,12 @@ ${PACKAGE}FILES+= test_read_format_rar5_blake2.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_compressed.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_different_window_size.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_distance_overflow.rar.uu -${PACKAGE}FILES+= test_read_format_rar5_extra_field_version.rar.uu +${PACKAGE}FILES+= test_read_format_rar5_extra_field_version.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_fileattr.rar.uu -${PACKAGE}FILES+= test_read_format_rar5_hardlink.rar.uu -${PACKAGE}FILES+= test_read_format_rar5_invalid_dict_reference.rar.uu -${PACKAGE}FILES+= test_read_format_rar5_leftshift1.rar.uu -${PACKAGE}FILES+= test_read_format_rar5_leftshift2.rar.uu +${PACKAGE}FILES+= test_read_format_rar5_hardlink.rar.uu +${PACKAGE}FILES+= test_read_format_rar5_invalid_dict_reference.rar.uu +${PACKAGE}FILES+= test_read_format_rar5_leftshift1.rar.uu +${PACKAGE}FILES+= test_read_format_rar5_leftshift2.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_multiarchive.part01.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_multiarchive.part02.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_multiarchive.part03.rar.uu @@ -541,13 +541,13 @@ ${PACKAGE}FILES+= test_read_format_rar5_multiarchive_solid.part04.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_multiple_files.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_multiple_files_solid.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_nonempty_dir_stream.rar.uu -${PACKAGE}FILES+= test_read_format_rar5_owner.rar.uu -${PACKAGE}FILES+= test_read_format_rar5_readtables_overflow.rar.uu +${PACKAGE}FILES+= test_read_format_rar5_owner.rar.uu +${PACKAGE}FILES+= test_read_format_rar5_readtables_overflow.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_solid.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_stored.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_stored_manyfiles.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_symlink.rar.uu -${PACKAGE}FILES+= test_read_format_rar5_truncated_huff.rar.uu +${PACKAGE}FILES+= test_read_format_rar5_truncated_huff.rar.uu ${PACKAGE}FILES+= test_read_format_rar5_win32.rar.uu ${PACKAGE}FILES+= test_read_format_raw.bufr.uu ${PACKAGE}FILES+= test_read_format_raw.data.Z.uu From 1e8687d261192910099cf9002c5db651e79b29d7 Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Thu, 19 Sep 2019 14:45:04 +0000 Subject: [PATCH 45/46] Reduce calls to close(2) at startup through the use of closefrom(2). Submitted by: pawel.biernacki@gmail.com Reviewed by: mjg, cy MFC after: 3 days Differential Revision: https://reviews.freebsd.org/D21715 --- usr.sbin/ntp/libntp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/ntp/libntp/Makefile b/usr.sbin/ntp/libntp/Makefile index 42c89350e257..1a392532f16b 100644 --- a/usr.sbin/ntp/libntp/Makefile +++ b/usr.sbin/ntp/libntp/Makefile @@ -83,7 +83,7 @@ CFLAGS+= -I${SRCTOP}/contrib/ntp/include \ -I${.CURDIR:H} \ -I${.CURDIR}/ -CFLAGS+= -DHAVE_BSD_NICE -DHAVE_STDINT_H +CFLAGS+= -DHAVE_BSD_NICE -DHAVE_STDINT_H -DHAVE_CLOSEFROM CLEANFILES+= .version version.c From 5f6bb72e7fdcdd50d18d98cb8ad6bd00fc240a5f Mon Sep 17 00:00:00 2001 From: Glen Barber Date: Thu, 19 Sep 2019 16:43:12 +0000 Subject: [PATCH 46/46] Apply r346792 (cperciva) from stable/12 to head. The original commit message: On non-x86 systems, use "quarterly" packages. x86 architectures have "latest" package builds on stable/*, so keep using those (they'll get switched over to "quarterly" during releases). The original commit was a direct commit to stable/12, as at the time it was presumed it would not be necessary for head. However, when it is time to create a releng branch or switch from PRERELEASE/STABLE to BETA/RC, the pkg(7) Makefile needs further adjusting. This commit includes those further adjustments, evaluating the BRANCH variable from release/Makefile to determine the pkg(7) repository to use. MFC after: immediate (if possible) Sponsored by: Rubicon Communications, LLC (Netgate) --- usr.sbin/pkg/Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/usr.sbin/pkg/Makefile b/usr.sbin/pkg/Makefile index 2d5f9c9fbb82..98b6cb8afdf9 100644 --- a/usr.sbin/pkg/Makefile +++ b/usr.sbin/pkg/Makefile @@ -1,6 +1,16 @@ # $FreeBSD$ +.if ${MACHINE} != "amd64" && ${MACHINE} != "i386" +PKGCONFBRANCH?= quarterly +.else +_BRANCH!= ${MAKE} -C ${SRCTOP}/release -V BRANCH +BRANCH?= ${_BRANCH} +. if ${BRANCH:MBETA*} || ${BRANCH:MRC*} || ${BRANCH:MRELEASE*} +PKGCONFBRANCH?= quarterly +. else PKGCONFBRANCH?= latest +. endif +.endif CONFS= FreeBSD.conf.${PKGCONFBRANCH} CONFSNAME= FreeBSD.conf CONFSDIR= /etc/pkg