From fe8e65ae3d326b51e6c570553857abae1c5dea71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Wed, 13 Jan 2016 20:35:02 +0000 Subject: [PATCH 01/88] drm: Revert the part of r288653 about M_WAITOK vs M_NOWAIT Using M_NOWAIT could lead to transient failures with ioctls. Suggested by: kib --- sys/dev/drm2/drm_crtc.c | 20 ++++++++++---------- sys/dev/drm2/drm_pci.c | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sys/dev/drm2/drm_crtc.c b/sys/dev/drm2/drm_crtc.c index 79f66aafc549..318a764805cb 100644 --- a/sys/dev/drm2/drm_crtc.c +++ b/sys/dev/drm2/drm_crtc.c @@ -663,7 +663,7 @@ int drm_plane_init(struct drm_device *dev, struct drm_plane *plane, plane->dev = dev; plane->funcs = funcs; plane->format_types = malloc(sizeof(uint32_t) * format_count, - DRM_MEM_KMS, M_NOWAIT); + DRM_MEM_KMS, M_WAITOK); if (!plane->format_types) { DRM_DEBUG_KMS("out of memory when allocating plane\n"); drm_mode_object_put(dev, &plane->base); @@ -1010,7 +1010,7 @@ int drm_mode_group_init(struct drm_device *dev, struct drm_mode_group *group) total_objects += dev->mode_config.num_encoder; group->id_list = malloc(total_objects * sizeof(uint32_t), - DRM_MEM_KMS, M_NOWAIT | M_ZERO); + DRM_MEM_KMS, M_WAITOK | M_ZERO); if (!group->id_list) return -ENOMEM; @@ -1998,7 +1998,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, connector_set = malloc(crtc_req->count_connectors * sizeof(struct drm_connector *), - DRM_MEM_KMS, M_NOWAIT); + DRM_MEM_KMS, M_WAITOK); if (!connector_set) { ret = -ENOMEM; goto out; @@ -2523,7 +2523,7 @@ int drm_mode_dirtyfb_ioctl(struct drm_device *dev, goto out_err1; } clips = malloc(num_clips * sizeof(*clips), DRM_MEM_KMS, - M_NOWAIT | M_ZERO); + M_WAITOK | M_ZERO); if (!clips) { ret = -ENOMEM; goto out_err1; @@ -2774,13 +2774,13 @@ struct drm_property *drm_property_create(struct drm_device *dev, int flags, int ret; property = malloc(sizeof(struct drm_property), DRM_MEM_KMS, - M_NOWAIT | M_ZERO); + M_WAITOK | M_ZERO); if (!property) return NULL; if (num_values) { property->values = malloc(sizeof(uint64_t)*num_values, DRM_MEM_KMS, - M_NOWAIT | M_ZERO); + M_WAITOK | M_ZERO); if (!property->values) goto fail; } @@ -2908,7 +2908,7 @@ int drm_property_add_enum(struct drm_property *property, int index, } prop_enum = malloc(sizeof(struct drm_property_enum), DRM_MEM_KMS, - M_NOWAIT | M_ZERO); + M_WAITOK | M_ZERO); if (!prop_enum) return -ENOMEM; @@ -3104,7 +3104,7 @@ static struct drm_property_blob *drm_property_create_blob(struct drm_device *dev return NULL; blob = malloc(sizeof(struct drm_property_blob)+length, DRM_MEM_KMS, - M_NOWAIT | M_ZERO); + M_WAITOK | M_ZERO); if (!blob) return NULL; @@ -3434,7 +3434,7 @@ int drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc, crtc->gamma_size = gamma_size; crtc->gamma_store = malloc(gamma_size * sizeof(uint16_t) * 3, - DRM_MEM_KMS, M_NOWAIT | M_ZERO); + DRM_MEM_KMS, M_WAITOK | M_ZERO); if (!crtc->gamma_store) { crtc->gamma_size = 0; return -ENOMEM; @@ -3632,7 +3632,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, file_priv->event_space -= sizeof e->event; mtx_unlock(&dev->event_lock); - e = malloc(sizeof *e, DRM_MEM_KMS, M_NOWAIT | M_ZERO); + e = malloc(sizeof *e, DRM_MEM_KMS, M_WAITOK | M_ZERO); if (e == NULL) { mtx_lock(&dev->event_lock); file_priv->event_space += sizeof e->event; diff --git a/sys/dev/drm2/drm_pci.c b/sys/dev/drm2/drm_pci.c index ecbfcb7c7df3..f2c5bbd2dc5e 100644 --- a/sys/dev/drm2/drm_pci.c +++ b/sys/dev/drm2/drm_pci.c @@ -225,7 +225,7 @@ int drm_pci_set_unique(struct drm_device *dev, master->unique_len = u->unique_len; master->unique_size = u->unique_len + 1; - master->unique = malloc(master->unique_size, DRM_MEM_DRIVER, M_NOWAIT); + master->unique = malloc(master->unique_size, DRM_MEM_DRIVER, M_WAITOK); if (!master->unique) { ret = -ENOMEM; goto err; From f5e45e53f90d0f97e1ffa8384025e16df7bfb4d3 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Wed, 13 Jan 2016 21:34:15 +0000 Subject: [PATCH 02/88] Add support for relocating AArch64 modules to kldxref. This fixes an error message where it fails to read the module as the unrelocated addresses are zero. Sponsored by: ABT Systems Ltd --- usr.sbin/kldxref/ef_aarch64.c | 77 +++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 usr.sbin/kldxref/ef_aarch64.c diff --git a/usr.sbin/kldxref/ef_aarch64.c b/usr.sbin/kldxref/ef_aarch64.c new file mode 100644 index 000000000000..cc55ed103be3 --- /dev/null +++ b/usr.sbin/kldxref/ef_aarch64.c @@ -0,0 +1,77 @@ +/*- + * Copyright (c) 2005 Peter Grehan. + * Copyright 1996-1998 John D. Polstra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include + +#include +#include +#include + +#include "ef.h" + +#include + +/* + * Apply relocations to the values obtained from the file. `relbase' is the + * target relocation address of the section, and `dataoff/len' is the region + * that is to be relocated, and has been copied to *dest + */ +int +ef_reloc(struct elf_file *ef, const void *reldata, int reltype, Elf_Off relbase, + Elf_Off dataoff, size_t len, void *dest) +{ + Elf_Addr *where, addend; + Elf_Size rtype, symidx; + const Elf_Rela *rela; + + if (reltype != EF_RELOC_RELA) + return (EINVAL); + + rela = (const Elf_Rela *)reldata; + where = (Elf_Addr *) ((Elf_Off)dest - dataoff + rela->r_offset); + addend = rela->r_addend; + rtype = ELF_R_TYPE(rela->r_info); + symidx = ELF_R_SYM(rela->r_info); + + if ((char *)where < (char *)dest || (char *)where >= (char *)dest + len) + return (0); + + switch(rtype) { + case R_AARCH64_RELATIVE: + *where = relbase + addend; + break; + case R_AARCH64_ABS64: + break; + default: + warnx("unhandled relocation type %lu", rtype); + break; + } + return (0); +} From e12a9f251e52ebf9c7b48467103760e68bbf362b Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Wed, 13 Jan 2016 21:47:27 +0000 Subject: [PATCH 03/88] Given that em(4), lem(4) and igb(4) hardware doesn't require the alignment guarantees provided by m_defrag(9), use m_collapse(9) instead for performance reasons. While at it, sanitize the statistics softc members, i. e. retire unused ones and add SYSCTL nodes missing for actually used ones. Differential Revision: https://reviews.freebsd.org/D4717 --- sys/dev/e1000/if_em.c | 19 ++++++++----------- sys/dev/e1000/if_em.h | 7 +++---- sys/dev/e1000/if_igb.c | 12 ++++++++---- sys/dev/e1000/if_igb.h | 19 +++++++++---------- sys/dev/e1000/if_lem.c | 10 +++++----- sys/dev/e1000/if_lem.h | 8 ++++---- 6 files changed, 37 insertions(+), 38 deletions(-) diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index f586d399822f..c1a9ce8717df 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -2035,9 +2035,9 @@ em_xmit(struct tx_ring *txr, struct mbuf **m_headp) if (error == EFBIG && remap) { struct mbuf *m; - m = m_defrag(*m_headp, M_NOWAIT); + m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); if (m == NULL) { - adapter->mbuf_alloc_failed++; + adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); @@ -5493,18 +5493,15 @@ em_add_hw_stats(struct adapter *adapter) char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", - CTLFLAG_RD, &adapter->link_irq, - "Link MSIX IRQ Handled"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", - CTLFLAG_RD, &adapter->mbuf_alloc_failed, - "Std mbuf failed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", - CTLFLAG_RD, &adapter->mbuf_cluster_failed, - "Std mbuf cluster failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", + CTLFLAG_RD, &adapter->link_irq, + "Link MSIX IRQ Handled"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", + CTLFLAG_RD, &adapter->mbuf_defrag_failed, + "Defragmenting mbuf chain failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h index be9fdc96e6d3..362df49442a7 100644 --- a/sys/dev/e1000/if_em.h +++ b/sys/dev/e1000/if_em.h @@ -473,13 +473,12 @@ struct adapter { /* Misc stats maintained by the driver */ unsigned long dropped_pkts; - unsigned long mbuf_alloc_failed; - unsigned long mbuf_cluster_failed; + unsigned long link_irq; + unsigned long mbuf_defrag_failed; + unsigned long no_tx_dma_setup; unsigned long no_tx_map_avail; - unsigned long no_tx_dma_setup; unsigned long rx_overruns; unsigned long watchdog_events; - unsigned long link_irq; struct e1000_hw_stats stats; }; diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index 498817838f3a..5b172dfe017f 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -1835,7 +1835,8 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp) /* Try it again? - one try */ if (remap == TRUE) { remap = FALSE; - m = m_defrag(*m_headp, M_NOWAIT); + m = m_collapse(*m_headp, M_NOWAIT, + IGB_MAX_SCATTER); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); @@ -5826,12 +5827,15 @@ igb_add_hw_stats(struct adapter *adapter) char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", - CTLFLAG_RD, &adapter->link_irq, - "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", + CTLFLAG_RD, &adapter->link_irq, + "Link MSIX IRQ Handled"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", + CTLFLAG_RD, &adapter->mbuf_defrag_failed, + "Defragmenting mbuf chain failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); diff --git a/sys/dev/e1000/if_igb.h b/sys/dev/e1000/if_igb.h index c14778891fe8..af42b56ec873 100644 --- a/sys/dev/e1000/if_igb.h +++ b/sys/dev/e1000/if_igb.h @@ -512,20 +512,19 @@ struct adapter { u8 *mta; /* Misc stats maintained by the driver */ - unsigned long dropped_pkts; - unsigned long mbuf_defrag_failed; - unsigned long mbuf_header_failed; - unsigned long mbuf_packet_failed; - unsigned long no_tx_dma_setup; - unsigned long watchdog_events; - unsigned long link_irq; - unsigned long rx_overruns; unsigned long device_control; - unsigned long rx_control; - unsigned long int_mask; + unsigned long dropped_pkts; unsigned long eint_mask; + unsigned long int_mask; + unsigned long link_irq; + unsigned long mbuf_defrag_failed; + unsigned long no_tx_dma_setup; unsigned long packet_buf_alloc_rx; unsigned long packet_buf_alloc_tx; + unsigned long rx_control; + unsigned long rx_overruns; + unsigned long watchdog_events; + /* Used in pf and vf */ void *stats; diff --git a/sys/dev/e1000/if_lem.c b/sys/dev/e1000/if_lem.c index 7476be50be7d..674fe5be1984 100644 --- a/sys/dev/e1000/if_lem.c +++ b/sys/dev/e1000/if_lem.c @@ -1675,9 +1675,9 @@ lem_xmit(struct adapter *adapter, struct mbuf **m_headp) if (error == EFBIG) { struct mbuf *m; - m = m_defrag(*m_headp, M_NOWAIT); + m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); if (m == NULL) { - adapter->mbuf_alloc_failed++; + adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); @@ -4526,12 +4526,12 @@ lem_add_hw_stats(struct adapter *adapter) struct sysctl_oid_list *stat_list; /* Driver Statistics */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", - CTLFLAG_RD, &adapter->mbuf_alloc_failed, - "Std mbuf failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", CTLFLAG_RD, &adapter->mbuf_cluster_failed, "Std mbuf cluster failed"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", + CTLFLAG_RD, &adapter->mbuf_defrag_failed, + "Defragmenting mbuf chain failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); diff --git a/sys/dev/e1000/if_lem.h b/sys/dev/e1000/if_lem.h index 4c43bddb5636..0dde26ecb4ee 100644 --- a/sys/dev/e1000/if_lem.h +++ b/sys/dev/e1000/if_lem.h @@ -417,17 +417,17 @@ struct adapter { /* Misc stats maintained by the driver */ unsigned long dropped_pkts; - unsigned long mbuf_alloc_failed; + unsigned long link_irq; unsigned long mbuf_cluster_failed; + unsigned long mbuf_defrag_failed; unsigned long no_tx_desc_avail1; unsigned long no_tx_desc_avail2; + unsigned long no_tx_dma_setup; unsigned long no_tx_map_avail; - unsigned long no_tx_dma_setup; unsigned long watchdog_events; - unsigned long rx_overruns; unsigned long rx_irq; + unsigned long rx_overruns; unsigned long tx_irq; - unsigned long link_irq; /* 82547 workaround */ uint32_t tx_fifo_size; From cd3dbc2573cd943a968260679a48ffca39080ceb Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Wed, 13 Jan 2016 21:49:01 +0000 Subject: [PATCH 04/88] Avoid reading pass the end of the source buffer when it is not NUL terminated. If this buffer is adjacent to an unmapped page or a version of C with bounds checked is used this may result in a crash. PR: 206177 Submitted by: Alexander Cherepanov MFC after: 1 week --- lib/libc/string/wcsncat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libc/string/wcsncat.c b/lib/libc/string/wcsncat.c index 44f1ff98980c..5a243477db5b 100644 --- a/lib/libc/string/wcsncat.c +++ b/lib/libc/string/wcsncat.c @@ -48,7 +48,7 @@ wcsncat(wchar_t * __restrict s1, const wchar_t * __restrict s2, size_t n) p++; q = p; r = s2; - while (*r && n) { + while (n && *r) { *q++ = *r++; n--; } From 216818a1bba6638d2d8e14592c6313fad7f7e930 Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Wed, 13 Jan 2016 21:50:08 +0000 Subject: [PATCH 05/88] Avoid reading pass the end of the source buffer when it is not NUL terminated. If this buffer is adjacent to an unmapped page or a version of C with bounds checked is used this may result in a crash. PR: 206178 Submitted by: Alexander Cherepanov MFC after: 1 week --- lib/libc/string/wcslcat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libc/string/wcslcat.c b/lib/libc/string/wcslcat.c index f5f1e1ee7559..2df94777d51f 100644 --- a/lib/libc/string/wcslcat.c +++ b/lib/libc/string/wcslcat.c @@ -54,7 +54,7 @@ wcslcat(wchar_t *dst, const wchar_t *src, size_t siz) size_t dlen; /* Find the end of dst and adjust bytes left but don't go past end */ - while (*d != '\0' && n-- != 0) + while (n-- != 0 && *d != '\0') d++; dlen = d - dst; n = siz - dlen; From 8e402f34b9312295173e182ce4a2c1356ae14774 Mon Sep 17 00:00:00 2001 From: Eric van Gyzen Date: Wed, 13 Jan 2016 22:34:55 +0000 Subject: [PATCH 06/88] libthr: const-ify two variables Make the default umutex and urwlock initializers const, because they can be, and as a microoptimization. MFC after: 5 days Sponsored by: Dell Inc. --- lib/libthr/thread/thr_umtx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/libthr/thread/thr_umtx.c b/lib/libthr/thread/thr_umtx.c index a61dab023f52..37e5df183eb8 100644 --- a/lib/libthr/thread/thr_umtx.c +++ b/lib/libthr/thread/thr_umtx.c @@ -42,7 +42,7 @@ int _umtx_op_err(void *obj, int op, u_long val, void *uaddr, void *uaddr2) void _thr_umutex_init(struct umutex *mtx) { - static struct umutex default_mtx = DEFAULT_UMUTEX; + static const struct umutex default_mtx = DEFAULT_UMUTEX; *mtx = default_mtx; } @@ -50,7 +50,8 @@ _thr_umutex_init(struct umutex *mtx) void _thr_urwlock_init(struct urwlock *rwl) { - static struct urwlock default_rwl = DEFAULT_URWLOCK; + static const struct urwlock default_rwl = DEFAULT_URWLOCK; + *rwl = default_rwl; } From 50d901977abb2faf5f929ef839c07891415bc56c Mon Sep 17 00:00:00 2001 From: Eric van Gyzen Date: Thu, 14 Jan 2016 00:31:00 +0000 Subject: [PATCH 07/88] bsdinstall: Suggest the GPT+Active workaround on Dell T5810 The Dell Precision Tower 5810 fails to boot from GPT in Legacy/BIOS mode without the Active flag in the Protective MBR. Suggest the workaround during installation. Since an increasing number of Dell systems exhibit this behavior, I imagine all Dells past a certain date will do so. I would like to suggest the workaround for all Dells with a BIOS date of, say, 2014 or later, but I would need to test a variety of systems before committing such a change. Reviewed by: allanjude, dteske MFC after: 5 days Relnotes: We should probably suggest using GPT+Active on "recent" Dells. Sponsored by: Dell Inc. Differential Revision: https://reviews.freebsd.org/D4075 --- usr.sbin/bsdinstall/scripts/auto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/bsdinstall/scripts/auto b/usr.sbin/bsdinstall/scripts/auto index c681a12fd2a3..db0fea925221 100755 --- a/usr.sbin/bsdinstall/scripts/auto +++ b/usr.sbin/bsdinstall/scripts/auto @@ -195,7 +195,7 @@ if f_interactive; then ;; "Dell Inc.") case "$sys_model" in - "Latitude E7440"|"Latitude E7240") + "Latitude E7440"|"Latitude E7240"|"Precision Tower 5810") dialog_workaround "$msg_gpt_active_fix" retval=$? f_dprintf "gpt_active_fix_prompt=[%s]" "$retval" From 807b6a646a0a0dbc258bf239468b5d9f901d1f92 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Thu, 14 Jan 2016 00:34:37 +0000 Subject: [PATCH 08/88] Remove a dead local variable, missed in r274565. MFC after: 3 days Sponsored by: EMC / Isilon Storage Division --- cddl/contrib/opensolaris/tools/ctf/cvt/merge.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/cddl/contrib/opensolaris/tools/ctf/cvt/merge.c b/cddl/contrib/opensolaris/tools/ctf/cvt/merge.c index d366f3182731..5b22a31140d7 100644 --- a/cddl/contrib/opensolaris/tools/ctf/cvt/merge.c +++ b/cddl/contrib/opensolaris/tools/ctf/cvt/merge.c @@ -283,7 +283,6 @@ static int equiv_su(tdesc_t *stdp, tdesc_t *ttdp, equiv_data_t *ed) { mlist_t *ml1 = stdp->t_members, *ml2 = ttdp->t_members; - mlist_t *olm1 = NULL; while (ml1 && ml2) { if (ml1->ml_offset != ml2->ml_offset || @@ -292,7 +291,6 @@ equiv_su(tdesc_t *stdp, tdesc_t *ttdp, equiv_data_t *ed) !equiv_node(ml1->ml_type, ml2->ml_type, ed)) return (0); - olm1 = ml1; ml1 = ml1->ml_next; ml2 = ml2->ml_next; } From 6e9a914bb2baaad03c56b39ff5f54752c27aae45 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Thu, 14 Jan 2016 01:32:17 +0000 Subject: [PATCH 09/88] Suggest setting gateway_enable="YES" instead of sysctl net.inet.ip.forwarding=1 to enable packet forwarding. Reviewed by: glebius --- share/examples/pf/pf.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/share/examples/pf/pf.conf b/share/examples/pf/pf.conf index d97b4ede16e3..950b47c7ba44 100644 --- a/share/examples/pf/pf.conf +++ b/share/examples/pf/pf.conf @@ -2,8 +2,8 @@ # $OpenBSD: pf.conf,v 1.34 2007/02/24 19:30:59 millert Exp $ # # See pf.conf(5) and /usr/share/examples/pf for syntax and examples. -# Remember to set net.inet.ip.forwarding=1 and/or net.inet6.ip6.forwarding=1 -# in /etc/sysctl.conf if packets are to be forwarded between interfaces. +# Remember to set gateway_enable="YES" and/or ipv6_gateway_enable="YES" +# in /etc/rc.conf if packets are to be forwarded between interfaces. #ext_if="ext0" #int_if="int0" From c911734adbb2126d22d53cba1fab0bb6ed281f2d Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Thu, 14 Jan 2016 01:33:16 +0000 Subject: [PATCH 10/88] - Add the 'restrict' type qualifier to match function prototype. - Remove sys/types.h. --- lib/libc/sys/stat.2 | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/libc/sys/stat.2 b/lib/libc/sys/stat.2 index 5e49b3c0b992..3405d6e3511c 100644 --- a/lib/libc/sys/stat.2 +++ b/lib/libc/sys/stat.2 @@ -28,7 +28,7 @@ .\" @(#)stat.2 8.4 (Berkeley) 5/1/95 .\" $FreeBSD$ .\" -.Dd June 2, 2012 +.Dd January 14, 2016 .Dt STAT 2 .Os .Sh NAME @@ -40,12 +40,11 @@ .Sh LIBRARY .Lb libc .Sh SYNOPSIS -.In sys/types.h .In sys/stat.h .Ft int -.Fn stat "const char *path" "struct stat *sb" +.Fn stat "const char * restrict path" "struct stat * restrict sb" .Ft int -.Fn lstat "const char *path" "struct stat *sb" +.Fn lstat "const char * restrict path" "struct stat * restrict sb" .Ft int .Fn fstat "int fd" "struct stat *sb" .Ft int From 414dbbaf56c7dd3fc6a90ff94730bcd672e00c6e Mon Sep 17 00:00:00 2001 From: Eric van Gyzen Date: Thu, 14 Jan 2016 01:34:41 +0000 Subject: [PATCH 11/88] numactl: fix CPU affinity when modifying an existing process or thread numactl was only modifying its own CPU affinity, which is fine when creating a new process, but not very helpful when modifying an existing processes. Reviewed by: adrian Sponsored by: Dell Inc. Differential Revision: https://reviews.freebsd.org/D4927 --- usr.bin/numactl/numactl.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/usr.bin/numactl/numactl.c b/usr.bin/numactl/numactl.c index ce1dfaecabc7..8ad0d0076c72 100644 --- a/usr.bin/numactl/numactl.c +++ b/usr.bin/numactl/numactl.c @@ -133,7 +133,7 @@ usage(void) } static int -set_numa_domain_cpuaffinity(int cpu_domain) +set_numa_domain_cpuaffinity(int cpu_domain, cpuwhich_t which, id_t id) { cpuset_t set; int error; @@ -142,8 +142,8 @@ set_numa_domain_cpuaffinity(int cpu_domain) cpu_domain, sizeof(set), &set); if (error != 0) err(1, "cpuset_getaffinity"); - error = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, - sizeof(set), &set); + error = cpuset_setaffinity(CPU_LEVEL_WHICH, which, id, sizeof(set), + &set); if (error != 0) err(1, "cpuset_setaffinity"); @@ -228,7 +228,8 @@ main(int argc, char *argv[]) /* If a CPU domain policy was given, include that too */ if (cpu_domain != -1) - (void) set_numa_domain_cpuaffinity(cpu_domain); + (void) set_numa_domain_cpuaffinity(cpu_domain, + CPU_WHICH_PID, -1); errno = 0; execvp(*argv, argv); @@ -278,7 +279,7 @@ main(int argc, char *argv[]) /* If a CPU domain policy was given, include that too */ if (cpu_domain != -1) - (void) set_numa_domain_cpuaffinity(cpu_domain); + (void) set_numa_domain_cpuaffinity(cpu_domain, which, id); exit(0); } From 9e35cfced589bcba4de1f0b9e92042099083a36a Mon Sep 17 00:00:00 2001 From: Devin Teske Date: Thu, 14 Jan 2016 01:59:20 +0000 Subject: [PATCH 12/88] Default to en_US.ISO8859-1 if no locale MFC after: 3 days X-MFC-to: stable/10 --- lib/libdpv/dpv.c | 6 ++++++ lib/libdpv/dpv.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/lib/libdpv/dpv.c b/lib/libdpv/dpv.c index 6a03922ff9fd..d3506ca9d82d 100644 --- a/lib/libdpv/dpv.c +++ b/lib/libdpv/dpv.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -482,6 +483,11 @@ dpv(struct dpv_config *config, struct dpv_file_node *file_list) /* Reads: label_size pbar_size pprompt aprompt dpv_nfiles */ /* Inits: dheight and dwidth */ + /* Default localeconv(3) settings for dialog(3) status */ + setlocale(LC_NUMERIC, + getenv("LC_ALL") == NULL && getenv("LC_NUMERIC") == NULL ? + LC_NUMERIC_DEFAULT : ""); + if (!debug) { /* Internally create the initial `--gauge' prompt text */ dprompt_recreate(file_list, (struct dpv_file_node *)NULL, 0); diff --git a/lib/libdpv/dpv.h b/lib/libdpv/dpv.h index dbcd59bd67c0..03768a7ced40 100644 --- a/lib/libdpv/dpv.h +++ b/lib/libdpv/dpv.h @@ -38,6 +38,9 @@ #define FALSE 0 #endif +/* localeconv(3) */ +#define LC_NUMERIC_DEFAULT "en_US.ISO8859-1" + /* Data to process */ extern long long dpv_overall_read; From 5de888779eea298bb4c489f1fad307c250b13473 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 14 Jan 2016 02:32:50 +0000 Subject: [PATCH 13/88] hyperv/hn: Unbreak LINT-NOIP Reported by: bz Approved by: adrain (mentor) Sponsored by: Microsoft OSTC --- sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c index 68f822a004bd..fd23db90e9d7 100644 --- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -391,12 +391,14 @@ netvsc_attach(device_t dev) sc->hn_carrier = 1; } +#if defined(INET) || defined(INET6) tcp_lro_init(&sc->hn_lro); /* Driver private LRO settings */ sc->hn_lro.ifp = ifp; #ifdef HN_LRO_HIWAT sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; #endif +#endif /* INET || INET6 */ ether_ifattach(ifp, device_info.mac_addr); @@ -475,7 +477,9 @@ netvsc_detach(device_t dev) hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL); ifmedia_removeall(&sc->hn_media); +#if defined(INET) || defined(INET6) tcp_lro_free(&sc->hn_lro); +#endif return (0); } @@ -1083,6 +1087,7 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { +#if defined(INET) || defined(INET6) struct lro_ctrl *lro = &sc->hn_lro; if (lro->lro_cnt) { @@ -1092,6 +1097,7 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, return 0; } } +#endif } /* We're not holding the lock here, so don't release it */ @@ -1103,6 +1109,7 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, void netvsc_recv_rollup(struct hv_device *device_ctx) { +#if defined(INET) || defined(INET6) hn_softc_t *sc = device_get_softc(device_ctx->device); struct lro_ctrl *lro = &sc->hn_lro; struct lro_entry *queued; @@ -1111,6 +1118,7 @@ netvsc_recv_rollup(struct hv_device *device_ctx) SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } +#endif } /* From 69a53a7a3a43d76894958eb76ff015a7f009d047 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 14 Jan 2016 02:50:13 +0000 Subject: [PATCH 14/88] hyperv: use x86 generic code to do the hypervisor detection This is first step to move the generic part of HV code into kernel instead of module, so that it is possible to use hypercall to implement some other paravirtualization code in the kernel. Submitted by: Howard Su Reviewed by: royger, delphij, adrian Approved by: adrian (mentor) Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D3072 --- sys/dev/hyperv/vmbus/hv_connection.c | 2 +- sys/dev/hyperv/vmbus/hv_hv.c | 53 ++++----------------- sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c | 30 +++++------- sys/x86/x86/identcpu.c | 2 + 4 files changed, 22 insertions(+), 65 deletions(-) diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c index 0300828961ba..93bd5b6cd690 100644 --- a/sys/dev/hyperv/vmbus/hv_connection.c +++ b/sys/dev/hyperv/vmbus/hv_connection.c @@ -251,7 +251,7 @@ hv_vmbus_connect(void) { hv_vmbus_protocal_version = version; if (bootverbose) - printf("VMBUS: Portocal Version: %d.%d\n", + printf("VMBUS: Protocol Version: %d.%d\n", version >> 16, version & 0xFFFF); sema_destroy(&msg_info->wait_sema); diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c index 84e2a5e46fcf..5d629dde5710 100644 --- a/sys/dev/hyperv/vmbus/hv_hv.c +++ b/sys/dev/hyperv/vmbus/hv_hv.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -55,12 +56,6 @@ __FBSDID("$FreeBSD$"); static u_int hv_get_timecount(struct timecounter *tc); -static inline void do_cpuid_inline(unsigned int op, unsigned int *eax, - unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - __asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), - "=d" (*edx) : "0" (op), "c" (ecx)); -} - /** * Globals */ @@ -86,27 +81,10 @@ hv_get_timecount(struct timecounter *tc) int hv_vmbus_query_hypervisor_presence(void) { - u_int regs[4]; - int hyper_v_detected = 0; - - /* - * When Xen is detected and native Xen PV support is enabled, - * ignore Xen's HyperV emulation. - */ - if (vm_guest == VM_GUEST_XEN) + if (vm_guest != VM_GUEST_HV) return (0); - do_cpuid(1, regs); - if (regs[2] & 0x80000000) { /* if(a hypervisor is detected) */ - /* make sure this really is Hyper-V */ - /* we look at the CPUID info */ - do_cpuid(HV_X64_MSR_GUEST_OS_ID, regs); - hyper_v_detected = - regs[0] >= HV_X64_CPUID_MIN && - regs[0] <= HV_X64_CPUID_MAX && - !memcmp("Microsoft Hv", ®s[1], 12); - } - return (hyper_v_detected); + return (hv_high >= HV_X64_CPUID_MIN && hv_high <= HV_X64_CPUID_MAX); } /** @@ -115,10 +93,7 @@ hv_vmbus_query_hypervisor_presence(void) static int hv_vmbus_get_hypervisor_version(void) { - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; + u_int regs[4]; unsigned int maxLeaf; unsigned int op; @@ -127,28 +102,16 @@ hv_vmbus_get_hypervisor_version(void) * Viridian is present * Query id and revision. */ - eax = 0; - ebx = 0; - ecx = 0; - edx = 0; op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION; - do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); + do_cpuid(op, regs); - maxLeaf = eax; - eax = 0; - ebx = 0; - ecx = 0; - edx = 0; + maxLeaf = regs[0]; op = HV_CPU_ID_FUNCTION_HV_INTERFACE; - do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); + do_cpuid(op, regs); if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) { - eax = 0; - ebx = 0; - ecx = 0; - edx = 0; op = HV_CPU_ID_FUNCTION_MS_HV_VERSION; - do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); + do_cpuid(op, regs); } return (maxLeaf); } diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c index 201f1c93e65b..3a008c0af1e3 100644 --- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c +++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c @@ -60,13 +60,15 @@ __FBSDID("$FreeBSD$"); #include "hv_vmbus_priv.h" - -#define VMBUS_IRQ 0x5 +#include +#include "acpi_if.h" static device_t vmbus_devp; static int vmbus_inited; static hv_setup_args setup_args; /* only CPU 0 supported at this time */ +static char *vmbus_ids[] = { "VMBUS", NULL }; + /** * @brief Software interrupt thread routine to handle channel messages from * the hypervisor. @@ -350,25 +352,15 @@ hv_vmbus_child_device_unregister(struct hv_device *child_dev) return(ret); } -static void -vmbus_identify(driver_t *driver, device_t parent) -{ - if (!hv_vmbus_query_hypervisor_presence()) - return; - - vm_guest = VM_GUEST_HV; - - BUS_ADD_CHILD(parent, 0, "vmbus", 0); -} - static int vmbus_probe(device_t dev) { - if(bootverbose) - device_printf(dev, "VMBUS: probe\n"); + if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL || + device_get_unit(dev) != 0) + return (ENXIO); device_set_desc(dev, "Vmbus Devices"); - return (BUS_PROBE_NOWILDCARD); + return (BUS_PROBE_DEFAULT); } #ifdef HYPERV @@ -723,7 +715,6 @@ vmbus_modevent(module_t mod, int what, void *arg) static device_method_t vmbus_methods[] = { /** Device interface */ - DEVMETHOD(device_identify, vmbus_identify), DEVMETHOD(device_probe, vmbus_probe), DEVMETHOD(device_attach, vmbus_attach), DEVMETHOD(device_detach, vmbus_detach), @@ -745,8 +736,9 @@ static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; devclass_t vmbus_devclass; -DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); -MODULE_VERSION(vmbus,1); +DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); +MODULE_DEPEND(vmbus, acpi, 1, 1, 1); +MODULE_VERSION(vmbus, 1); /* We want to be started after SMP is initialized */ SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL); diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index 38c1c6617c12..bc232a261ce3 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -1294,6 +1294,8 @@ identify_hypervisor(void) hv_vendor[12] = '\0'; if (strcmp(hv_vendor, "VMwareVMware") == 0) vm_guest = VM_GUEST_VMWARE; + else if (strcmp(hv_vendor, "Microsoft Hv") == 0) + vm_guest = VM_GUEST_HV; } return; } From 358d08b83bd6206aee580dc3bcea7d9eddc010dc Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 14 Jan 2016 02:55:28 +0000 Subject: [PATCH 15/88] hyperv: remove unused vmbus definitions We don't need them at all. Submitted by: Dexuan Cui Sponsored by: Microsoft OSTC Reviewed by: royger, adrian, delphij Approved by: adrian (mentor) Differential Revision: https://reviews.freebsd.org/D4595 --- sys/dev/hyperv/include/hyperv.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sys/dev/hyperv/include/hyperv.h b/sys/dev/hyperv/include/hyperv.h index b5600baf0149..1a45b7ba8b3b 100644 --- a/sys/dev/hyperv/include/hyperv.h +++ b/sys/dev/hyperv/include/hyperv.h @@ -335,11 +335,6 @@ typedef enum { HV_CHANNEL_MESSAGE_INITIATED_CONTACT = 14, HV_CHANNEL_MESSAGE_VERSION_RESPONSE = 15, HV_CHANNEL_MESSAGE_UNLOAD = 16, - -#ifdef HV_VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD - HV_CHANNEL_MESSAGE_VIEW_RANGE_ADD = 17, - HV_CHANNEL_MESSAGE_VIEW_RANGE_REMOVE = 18, -#endif HV_CHANNEL_MESSAGE_COUNT } hv_vmbus_channel_msg_type; From 82104eda6a5b4e245b6d3cf240e6bb6efdd186bb Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 14 Jan 2016 02:58:28 +0000 Subject: [PATCH 16/88] [gpiobus] handle the case of there being a single GPIO pin available. PR: kern/206035 Submitted by: Stanislav Galabov --- sys/dev/gpio/gpiobus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c index 67106eaaca82..4a7994ead3a1 100644 --- a/sys/dev/gpio/gpiobus.c +++ b/sys/dev/gpio/gpiobus.c @@ -184,7 +184,7 @@ gpiobus_init_softc(device_t dev) if (GPIO_PIN_MAX(sc->sc_dev, &sc->sc_npins) != 0) return (ENXIO); - KASSERT(sc->sc_npins != 0, ("GPIO device with no pins")); + KASSERT(sc->sc_npins >= 0, ("GPIO device with no pins")); /* Pins = GPIO_PIN_MAX() + 1 */ sc->sc_npins++; From 99781cb3536aa8b07839c1b9aa14d10cf9b6681d Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 14 Jan 2016 03:05:10 +0000 Subject: [PATCH 17/88] hyperv: implement an event timer Submitted by: Howard Su Reviewed by: delphij, royger, adrian Approved by: adrian (mentor) Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D4676 --- sys/conf/files.amd64 | 1 + sys/conf/files.i386 | 1 + sys/dev/hyperv/vmbus/hv_et.c | 131 ++++++++++++++++++++ sys/dev/hyperv/vmbus/hv_hv.c | 8 +- sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c | 31 ++++- sys/dev/hyperv/vmbus/hv_vmbus_priv.h | 57 +++++++-- sys/modules/hyperv/vmbus/Makefile | 1 + 7 files changed, 211 insertions(+), 19 deletions(-) create mode 100644 sys/dev/hyperv/vmbus/hv_et.c diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 0788b4669efa..11e9e12c2754 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -273,6 +273,7 @@ dev/hyperv/vmbus/hv_channel.c optional hyperv dev/hyperv/vmbus/hv_channel_mgmt.c optional hyperv dev/hyperv/vmbus/hv_connection.c optional hyperv dev/hyperv/vmbus/hv_hv.c optional hyperv +dev/hyperv/vmbus/hv_et.c optional hyperv dev/hyperv/vmbus/hv_ring_buffer.c optional hyperv dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c optional hyperv dev/nfe/if_nfe.c optional nfe pci diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index ffe272e382d7..334e85784955 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -247,6 +247,7 @@ dev/hyperv/vmbus/hv_channel.c optional hyperv dev/hyperv/vmbus/hv_channel_mgmt.c optional hyperv dev/hyperv/vmbus/hv_connection.c optional hyperv dev/hyperv/vmbus/hv_hv.c optional hyperv +dev/hyperv/vmbus/hv_et.c optional hyperv dev/hyperv/vmbus/hv_ring_buffer.c optional hyperv dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c optional hyperv dev/ichwd/ichwd.c optional ichwd diff --git a/sys/dev/hyperv/vmbus/hv_et.c b/sys/dev/hyperv/vmbus/hv_et.c new file mode 100644 index 000000000000..d96148673b6f --- /dev/null +++ b/sys/dev/hyperv/vmbus/hv_et.c @@ -0,0 +1,131 @@ +/*- + * Copyright (c) 2015 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include "hv_vmbus_priv.h" + +#define HV_TIMER_FREQUENCY (10 * 1000 * 1000LL) /* 100ns period */ +#define HV_MAX_DELTA_TICKS 0xffffffffLL +#define HV_MIN_DELTA_TICKS 1LL + +static struct eventtimer et; +static uint64_t periodticks[MAXCPU]; + +static inline uint64_t +sbintime2tick(sbintime_t time) +{ + struct timespec val; + + val = sbttots(time); + return val.tv_sec * HV_TIMER_FREQUENCY + val.tv_nsec / 100; +} + +static int +hv_et_start(struct eventtimer *et, sbintime_t firsttime, sbintime_t periodtime) +{ + union hv_timer_config timer_cfg; + uint64_t current; + + timer_cfg.as_uint64 = 0; + timer_cfg.auto_enable = 1; + timer_cfg.sintx = HV_VMBUS_MESSAGE_SINT; + + periodticks[curcpu] = sbintime2tick(periodtime); + if (firsttime == 0) + firsttime = periodtime; + + current = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + current += sbintime2tick(firsttime); + + wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); + wrmsr(HV_X64_MSR_STIMER0_COUNT, current); + + return (0); +} + +static int +hv_et_stop(struct eventtimer *et) +{ + wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0); + wrmsr(HV_X64_MSR_STIMER0_COUNT, 0); + + return (0); +} + +void +hv_et_intr(struct trapframe *frame) +{ + union hv_timer_config timer_cfg; + struct trapframe *oldframe; + struct thread *td; + + if (periodticks[curcpu] != 0) { + uint64_t tick = sbintime2tick(periodticks[curcpu]); + timer_cfg.as_uint64 = rdmsr(HV_X64_MSR_STIMER0_CONFIG); + timer_cfg.enable = 0; + timer_cfg.auto_enable = 1; + timer_cfg.periodic = 1; + periodticks[curcpu] = 0; + + wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); + wrmsr(HV_X64_MSR_STIMER0_COUNT, tick); + } + + if (et.et_active) { + td = curthread; + td->td_intr_nesting_level++; + oldframe = td->td_intr_frame; + td->td_intr_frame = frame; + et.et_event_cb(&et, et.et_arg); + td->td_intr_frame = oldframe; + td->td_intr_nesting_level--; + } +} + +void +hv_et_init(void) +{ + et.et_name = "HyperV"; + et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU | ET_FLAGS_PERIODIC; + et.et_quality = 1000; + et.et_frequency = HV_TIMER_FREQUENCY; + et.et_min_period = (1LL << 32) / HV_TIMER_FREQUENCY; + et.et_max_period = HV_MAX_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY); + et.et_start = hv_et_start; + et.et_stop = hv_et_stop; + et.et_priv = &et; + et_register(&et); +} + diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c index 5d629dde5710..ca5641f620bf 100644 --- a/sys/dev/hyperv/vmbus/hv_hv.c +++ b/sys/dev/hyperv/vmbus/hv_hv.c @@ -45,12 +45,6 @@ __FBSDID("$FreeBSD$"); #include "hv_vmbus_priv.h" -#define HV_X64_MSR_GUEST_OS_ID 0x40000000 - -#define HV_X64_CPUID_MIN 0x40000005 -#define HV_X64_CPUID_MAX 0x4000ffff -#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 - #define HV_NANOSECONDS_PER_SEC 1000000000L @@ -218,6 +212,8 @@ hv_vmbus_init(void) hv_vmbus_g_context.hypercall_page = virt_addr; tc_init(&hv_timecounter); /* register virtual timecount */ + + hv_et_init(); return (0); diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c index 3a008c0af1e3..8ff12e728360 100644 --- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c +++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -153,7 +154,7 @@ vmbus_msg_swintr(void *arg) * message to process - an event or a channel message. */ static inline int -hv_vmbus_isr(void *unused) +hv_vmbus_isr(struct trapframe *frame) { int cpu; hv_vmbus_message* msg; @@ -193,11 +194,35 @@ hv_vmbus_isr(void *unused) page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; + /* we call eventtimer process the message */ + if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) { + msg->header.message_type = HV_MESSAGE_TYPE_NONE; + + /* + * Make sure the write to message_type (ie set to + * HV_MESSAGE_TYPE_NONE) happens before we read the + * message_pending and EOMing. Otherwise, the EOMing will + * not deliver any more messages + * since there is no empty slot + */ + wmb(); + + if (msg->header.message_flags.u.message_pending) { + /* + * This will cause message queue rescan to possibly + * deliver another msg from the hypervisor + */ + wrmsr(HV_X64_MSR_EOM, 0); + } + hv_et_intr(frame); + return (FILTER_HANDLED); + } + if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0); } - return FILTER_HANDLED; + return (FILTER_HANDLED); } #ifdef HV_DEBUG_INTR @@ -227,7 +252,7 @@ hv_vector_handler(struct trapframe *trap_frame) hv_intr_count++; #endif - hv_vmbus_isr(NULL); + hv_vmbus_isr(trap_frame); /* * Enable preemption. diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h index 0503d06aeb57..74fe8240bfd4 100644 --- a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h +++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h @@ -359,11 +359,6 @@ typedef struct { struct sema control_sema; } hv_vmbus_connection; -/* - * Declare the MSR used to identify the guest OS - */ -#define HV_X64_MSR_GUEST_OS_ID 0x40000000 - typedef union { uint64_t as_uint64_t; struct { @@ -380,10 +375,6 @@ typedef union { } u; } hv_vmbus_x64_msr_guest_os_id_contents; -/* - * Declare the MSR used to setup pages used to communicate with the hypervisor - */ -#define HV_X64_MSR_HYPERCALL 0x40000001 typedef union { uint64_t as_uint64_t; @@ -512,6 +503,22 @@ typedef union { } u; } hv_vmbus_synic_sint; +/* + * Timer configuration register. + */ +union hv_timer_config { + uint64_t as_uint64; + struct { + uint64_t enable:1; + uint64_t periodic:1; + uint64_t lazy:1; + uint64_t auto_enable:1; + uint64_t reserved_z0:12; + uint64_t sintx:4; + uint64_t reserved_z1:44; + }; +}; + /* * Define syn_ic control register */ @@ -542,8 +549,21 @@ typedef union { uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT]; } hv_vmbus_synic_event_flags; +#define HV_X64_CPUID_MIN (0x40000005) +#define HV_X64_CPUID_MAX (0x4000ffff) + +/* + * Declare the MSR used to identify the guest OS + */ +#define HV_X64_MSR_GUEST_OS_ID (0x40000000) +/* + * Declare the MSR used to setup pages used to communicate with the hypervisor + */ +#define HV_X64_MSR_HYPERCALL (0x40000001) /* MSR used to provide vcpu index */ -#define HV_X64_MSR_VP_INDEX (0x40000002) +#define HV_X64_MSR_VP_INDEX (0x40000002) + +#define HV_X64_MSR_TIME_REF_COUNT (0x40000020) /* * Define synthetic interrupt controller model specific registers @@ -571,6 +591,18 @@ typedef union { #define HV_X64_MSR_SINT14 (0x4000009E) #define HV_X64_MSR_SINT15 (0x4000009F) +/* + * Synthetic Timer MSRs. Four timers per vcpu. + */ +#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 +#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 +#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 +#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 +#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 +#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 +#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 +#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 + /* * Declare the various hypercall operations */ @@ -678,6 +710,11 @@ int hv_vmbus_post_message(void *buffer, size_t buf_size); int hv_vmbus_set_event(hv_vmbus_channel *channel); void hv_vmbus_on_events(void *); +/** + * Event Timer interfaces + */ +void hv_et_init(void); +void hv_et_intr(struct trapframe*); /* * The guest OS needs to register the guest ID with the hypervisor. diff --git a/sys/modules/hyperv/vmbus/Makefile b/sys/modules/hyperv/vmbus/Makefile index 11228cd22c73..7d623c371ed6 100644 --- a/sys/modules/hyperv/vmbus/Makefile +++ b/sys/modules/hyperv/vmbus/Makefile @@ -7,6 +7,7 @@ KMOD= hv_vmbus SRCS= hv_channel.c \ hv_channel_mgmt.c \ hv_connection.c \ + hv_et.c \ hv_hv.c \ hv_ring_buffer.c \ hv_vmbus_drv_freebsd.c \ From 19529243337d02e56e366d55b3e7935fdbc995f2 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 14 Jan 2016 03:11:35 +0000 Subject: [PATCH 18/88] hyperv: add interrupt counters Submitted by: Howard Su Reviewed by: royger, Dexuan Cui , adrian Approved by: adrian (mentor) Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D4693 --- sys/dev/hyperv/vmbus/hv_connection.c | 17 ----------------- sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c | 17 ++++++----------- 2 files changed, 6 insertions(+), 28 deletions(-) diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c index 93bd5b6cd690..691d0694e1b6 100644 --- a/sys/dev/hyperv/vmbus/hv_connection.c +++ b/sys/dev/hyperv/vmbus/hv_connection.c @@ -423,12 +423,6 @@ VmbusProcessChannelEvent(uint32_t relid) // mtx_unlock(&channel->inbound_lock); } -#ifdef HV_DEBUG_INTR -extern uint32_t hv_intr_count; -extern uint32_t hv_vmbus_swintr_event_cpu[MAXCPU]; -extern uint32_t hv_vmbus_intr_cpu[MAXCPU]; -#endif - /** * Handler for events */ @@ -449,17 +443,6 @@ hv_vmbus_on_events(void *arg) KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: " "cpu out of range!")); -#ifdef HV_DEBUG_INTR - int i; - hv_vmbus_swintr_event_cpu[cpu]++; - if (hv_intr_count % 10000 == 0) { - printf("VMBUS: Total interrupt %d\n", hv_intr_count); - for (i = 0; i < mp_ncpus; i++) - printf("VMBUS: hw cpu[%d]: %d, event sw intr cpu[%d]: %d\n", - i, hv_vmbus_intr_cpu[i], i, hv_vmbus_swintr_event_cpu[i]); - } -#endif - if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) { maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5; diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c index 8ff12e728360..6df466468170 100644 --- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c +++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c @@ -225,32 +225,24 @@ hv_vmbus_isr(struct trapframe *frame) return (FILTER_HANDLED); } -#ifdef HV_DEBUG_INTR -uint32_t hv_intr_count = 0; -#endif uint32_t hv_vmbus_swintr_event_cpu[MAXCPU]; -uint32_t hv_vmbus_intr_cpu[MAXCPU]; +u_long *hv_vmbus_intr_cpu[MAXCPU]; void hv_vector_handler(struct trapframe *trap_frame) { -#ifdef HV_DEBUG_INTR int cpu; -#endif /* * Disable preemption. */ critical_enter(); -#ifdef HV_DEBUG_INTR /* * Do a little interrupt counting. */ cpu = PCPU_GET(cpuid); - hv_vmbus_intr_cpu[cpu]++; - hv_intr_count++; -#endif + (*hv_vmbus_intr_cpu[cpu])++; hv_vmbus_isr(trap_frame); @@ -479,6 +471,7 @@ static int vmbus_bus_init(void) { int i, j, n, ret; + char buf[MAXCOMLEN + 1]; if (vmbus_inited) return (0); @@ -515,13 +508,15 @@ vmbus_bus_init(void) setup_args.vector = hv_vmbus_g_context.hv_cb_vector; CPU_FOREACH(j) { - hv_vmbus_intr_cpu[j] = 0; hv_vmbus_swintr_event_cpu[j] = 0; hv_vmbus_g_context.hv_event_intr_event[j] = NULL; hv_vmbus_g_context.hv_msg_intr_event[j] = NULL; hv_vmbus_g_context.event_swintr[j] = NULL; hv_vmbus_g_context.msg_swintr[j] = NULL; + snprintf(buf, sizeof(buf), "cpu%d:hyperv", j); + intrcnt_add(buf, &hv_vmbus_intr_cpu[j]); + for (i = 0; i < 2; i++) setup_args.page_buffers[2 * j + i] = NULL; } From dd7a7dd6afff0c6b48bfa9cec871126ce4f27a85 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 14 Jan 2016 03:16:29 +0000 Subject: [PATCH 19/88] hyperv: set receive buffer size according to NVSP protocol version If the NVSP protocol version is not greater than NVSP_PROTOCOL_VERSION_2, then the recv buffer size is 15MB, otherwise the buffer size is 16MB. Submitted by: Hongjiang Zhang Reviewed by: royger, Dexuan Cui , adrian Approved by: adrian (mentor) Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D4814 --- sys/dev/hyperv/netvsc/hv_net_vsc.c | 9 ++++++--- sys/dev/hyperv/netvsc/hv_net_vsc.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c index 99e500c57479..65913b5a76f2 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -642,6 +642,12 @@ hv_nv_connect_to_vsp(struct hv_device *device) /* sema_wait(&NetVscChannel->channel_init_sema); */ /* Post the big receive buffer to NetVSP */ + if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) + net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; + else + net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; + net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; + ret = hv_nv_init_rx_buffer_with_net_vsp(device); if (ret == 0) ret = hv_nv_init_send_buffer_with_net_vsp(device); @@ -676,9 +682,6 @@ hv_nv_on_device_add(struct hv_device *device, void *additional_info) goto cleanup; /* Initialize the NetVSC channel extension */ - net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; - - net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; sema_init(&net_dev->channel_init_sema, 0, "netdev_sema"); diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.h b/sys/dev/hyperv/netvsc/hv_net_vsc.h index b1d1e37cedca..9157f918dbb1 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.h +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.h @@ -857,7 +857,7 @@ typedef struct nvsp_msg_ { #define NETVSC_SEND_BUFFER_SIZE (1024*1024*15) /* 15M */ #define NETVSC_SEND_BUFFER_ID 0xface - +#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_ID 0xcafe From 8d5bab80eaf23a54ec5b78dec1cf07f5e8d1d569 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Thu, 14 Jan 2016 05:02:33 +0000 Subject: [PATCH 20/88] Unbreak `make depend` with sys/modules/hyperv/vmbus after r293870 Pointyhat to: sephe --- sys/modules/hyperv/vmbus/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/modules/hyperv/vmbus/Makefile b/sys/modules/hyperv/vmbus/Makefile index 7d623c371ed6..637157b3ec49 100644 --- a/sys/modules/hyperv/vmbus/Makefile +++ b/sys/modules/hyperv/vmbus/Makefile @@ -12,7 +12,7 @@ SRCS= hv_channel.c \ hv_ring_buffer.c \ hv_vmbus_drv_freebsd.c \ hv_vmbus_priv.h -SRCS+= bus_if.h device_if.h +SRCS+= acpi_if.h bus_if.h device_if.h opt_acpi.h CFLAGS+= -I${.CURDIR}/../../../dev/hyperv/include \ -I${.CURDIR}/../../../dev/hyperv/vmbus \ From 4c29cf96d4b52c7ddd97f4bfe80999e45c0f98ef Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Thu, 14 Jan 2016 07:27:14 +0000 Subject: [PATCH 21/88] Remove unnecessary kldload logic added to geom_subr.sh in r293028 MFC after: 2 weeks Sponsored by: EMC / Isilon Storage Division --- tests/sys/geom/class/gate/conf.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/sys/geom/class/gate/conf.sh b/tests/sys/geom/class/gate/conf.sh index 4eede088ad29..7e22ce46af66 100755 --- a/tests/sys/geom/class/gate/conf.sh +++ b/tests/sys/geom/class/gate/conf.sh @@ -5,6 +5,4 @@ name="$(mktemp -u gate.XXXXXX)" class="gate" base=`basename $0` -kldstat -q -m g_${class} || kldload geom_${class} || exit 1 - . `dirname $0`/../geom_subr.sh From 31969237964b5e529aae30002608713c428965cb Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Thu, 14 Jan 2016 07:27:42 +0000 Subject: [PATCH 22/88] Remove an unneeded assignment of the return value. tdelete() is supposed to return the address of the parent node that has been deleted. We already keep track of this node in the loop between lines 94-107. The GO_LEFT()/GO_RIGHT() macros are used later on as well, so we must make sure not to change it to something else. --- lib/libc/stdlib/tdelete.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/libc/stdlib/tdelete.c b/lib/libc/stdlib/tdelete.c index 7799f35cc1e7..ff63576a1bf5 100644 --- a/lib/libc/stdlib/tdelete.c +++ b/lib/libc/stdlib/tdelete.c @@ -62,7 +62,6 @@ __FBSDID("$FreeBSD$"); base = leaf; \ path_init(&path); \ } \ - result = &(*leaf)->key; \ path_taking_right(&path); \ leaf = &(*leaf)->rlink; \ } while (0) From 0713024269f486d5e219169ba10fe114c5c62a2b Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Thu, 14 Jan 2016 07:39:05 +0000 Subject: [PATCH 23/88] PID file support hasn't been committed for ggated(8) yet. Unbreak running the testcase more than once by restoring the "killall ggated" MFC after: 15 days Sponsored by: EMC / Isilon Storage Division --- tests/sys/geom/class/gate/1_test.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/sys/geom/class/gate/1_test.sh b/tests/sys/geom/class/gate/1_test.sh index 83f609602ed3..3e277349631f 100644 --- a/tests/sys/geom/class/gate/1_test.sh +++ b/tests/sys/geom/class/gate/1_test.sh @@ -11,7 +11,6 @@ while [ -c /dev/ggate${us} ]; do : $(( us += 1 )) done conf=`mktemp $base.XXXXXX` || exit 1 -pidfile=/var/run/ggated.pid port=33080 work=$(attach_md -t malloc -s 1M) @@ -20,7 +19,7 @@ src=$(attach_md -t malloc -s 1M) test_cleanup() { ggatec destroy -f -u $us - pkill -F $pidfile + killall ggated geom_test_cleanup } trap test_cleanup ABRT EXIT INT TERM From 10e0e23528cbc37b0421ee5a24d387b94933ba5e Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Thu, 14 Jan 2016 08:54:44 +0000 Subject: [PATCH 24/88] Remove now-unused wrappers for various routing functions. --- sys/net/route.c | 60 ----------------------------------------- sys/net/route.h | 12 --------- sys/netinet/in_rmx.c | 12 --------- sys/netinet/in_var.h | 2 -- sys/netinet/ip_mroute.c | 2 +- 5 files changed, 1 insertion(+), 87 deletions(-) diff --git a/sys/net/route.c b/sys/net/route.c index bcc56d9d6fb6..a93f2ee972d1 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -342,35 +342,6 @@ sys_setfib(struct thread *td, struct setfib_args *uap) /* * Packet routing routines. */ -void -rtalloc(struct route *ro) -{ - - rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB); -} - -void -rtalloc_fib(struct route *ro, u_int fibnum) -{ - rtalloc_ign_fib(ro, 0UL, fibnum); -} - -void -rtalloc_ign(struct route *ro, u_long ignore) -{ - struct rtentry *rt; - - if ((rt = ro->ro_rt) != NULL) { - if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) - return; - RTFREE(rt); - ro->ro_rt = NULL; - } - ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); - if (ro->ro_rt) - RT_UNLOCK(ro->ro_rt); -} - void rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) { @@ -538,17 +509,6 @@ rtfree(struct rtentry *rt) * Normally called as a result of a routing redirect * message from the network layer. */ -void -rtredirect(struct sockaddr *dst, - struct sockaddr *gateway, - struct sockaddr *netmask, - int flags, - struct sockaddr *src) -{ - - rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); -} - void rtredirect_fib(struct sockaddr *dst, struct sockaddr *gateway, @@ -673,13 +633,6 @@ rtredirect_fib(struct sockaddr *dst, ifa_free(ifa); } -int -rtioctl(u_long req, caddr_t data) -{ - - return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); -} - /* * Routing table ioctl interface. */ @@ -775,19 +728,6 @@ ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway, * Do appropriate manipulations of a routing tree given * all the bits of info needed */ -int -rtrequest(int req, - struct sockaddr *dst, - struct sockaddr *gateway, - struct sockaddr *netmask, - int flags, - struct rtentry **ret_nrt) -{ - - return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, - RT_DEFAULT_FIB)); -} - int rtrequest_fib(int req, struct sockaddr *dst, diff --git a/sys/net/route.h b/sys/net/route.h index 9ce286523db0..ed21a299b7e9 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -436,8 +436,6 @@ int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); /* * Note the following locking behavior: * - * rtalloc_ign() and rtalloc() return ro->ro_rt unlocked - * * rtalloc1() returns a locked rtentry * * rtfree() and RTFREE_LOCKED() require a locked rtentry @@ -445,9 +443,7 @@ int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); * RTFREE() uses an unlocked entry. */ -int rt_expunge(struct radix_node_head *, struct rtentry *); void rtfree(struct rtentry *); -int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *); void rt_updatemtu(struct ifnet *); typedef int rt_walktree_f_t(struct rtentry *, void *); @@ -458,15 +454,8 @@ void rt_flushifroutes(struct ifnet *ifp); /* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */ /* Thes are used by old code not yet converted to use multiple FIBS */ -void rtalloc_ign(struct route *ro, u_long ignflags); -void rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */ struct rtentry *rtalloc1(struct sockaddr *, int, u_long); int rtinit(struct ifaddr *, int, int); -int rtioctl(u_long, caddr_t); -void rtredirect(struct sockaddr *, struct sockaddr *, - struct sockaddr *, int, struct sockaddr *); -int rtrequest(int, struct sockaddr *, - struct sockaddr *, struct sockaddr *, int, struct rtentry **); /* XXX MRT NEW VERSIONS THAT USE FIBs * For now the protocol indepedent versions are the same as the AF_INET ones @@ -474,7 +463,6 @@ int rtrequest(int, struct sockaddr *, */ int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum); void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum); -void rtalloc_fib(struct route *ro, u_int fibnum); struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int); int rtioctl_fib(u_long, caddr_t, u_int); void rtredirect_fib(struct sockaddr *, struct sockaddr *, diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c index 283c45087c15..ced53816ada6 100644 --- a/sys/netinet/in_rmx.c +++ b/sys/netinet/in_rmx.c @@ -191,12 +191,6 @@ in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum) rtalloc_ign_fib(ro, ignflags, fibnum); } -struct rtentry * -in_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) -{ - return (rtalloc1_fib(dst, report, ignflags, fibnum)); -} - void in_rtredirect(struct sockaddr *dst, struct sockaddr *gateway, @@ -208,9 +202,3 @@ in_rtredirect(struct sockaddr *dst, rtredirect_fib(dst, gateway, netmask, flags, src, fibnum); } -void -in_rtalloc(struct route *ro, u_int fibnum) -{ - rtalloc_ign_fib(ro, 0UL, fibnum); -} - diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index f21ddf43b4a6..121c6da19436 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -387,8 +387,6 @@ void in_domifdetach(struct ifnet *, void *); /* XXX */ void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum); -void in_rtalloc(struct route *ro, u_int fibnum); -struct rtentry *in_rtalloc1(struct sockaddr *, int, u_long, u_int); void in_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); #endif /* _KERNEL */ diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index 9b762d6a631f..69e12c3cd5b5 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -538,7 +538,7 @@ X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused) int error = 0; /* - * Currently the only function calling this ioctl routine is rtioctl(). + * Currently the only function calling this ioctl routine is rtioctl_fib(). * Typically, only root can create the raw socket in order to execute * this ioctl method, however the request might be coming from a prison */ From cfa023eb90a47bb15886191459fb985521d40cae Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 08:59:38 +0000 Subject: [PATCH 25/88] sfxge: add Medford NIC methods Submitted by: Mark Spender Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4908 --- sys/dev/sfxge/common/ef10_impl.h | 41 ++++++ sys/dev/sfxge/common/efx_impl.h | 1 + sys/dev/sfxge/common/efx_nic.c | 42 ++++++ sys/dev/sfxge/common/hunt_impl.h | 11 ++ sys/dev/sfxge/common/hunt_nic.c | 55 +++++--- sys/dev/sfxge/common/medford_impl.h | 22 ++++ sys/dev/sfxge/common/medford_nic.c | 198 ++++++++++++++++++++++++++++ 7 files changed, 350 insertions(+), 20 deletions(-) diff --git a/sys/dev/sfxge/common/ef10_impl.h b/sys/dev/sfxge/common/ef10_impl.h index 9b9f0aaf4b55..d6fab74edfdf 100644 --- a/sys/dev/sfxge/common/ef10_impl.h +++ b/sys/dev/sfxge/common/ef10_impl.h @@ -45,6 +45,47 @@ extern "C" { #define EF10_MAX_PIOBUF_NBUFS MEDFORD_PIOBUF_NBUFS #endif +extern __checkReturn efx_rc_t +efx_mcdi_get_port_assignment( + __in efx_nic_t *enp, + __out uint32_t *portp); + +extern __checkReturn efx_rc_t +efx_mcdi_get_port_modes( + __in efx_nic_t *enp, + __out uint32_t *modesp); + +extern __checkReturn efx_rc_t +efx_mcdi_get_mac_address_pf( + __in efx_nic_t *enp, + __out_ecount_opt(6) uint8_t mac_addrp[6]); + +extern __checkReturn efx_rc_t +efx_mcdi_get_mac_address_vf( + __in efx_nic_t *enp, + __out_ecount_opt(6) uint8_t mac_addrp[6]); + +extern __checkReturn efx_rc_t +efx_mcdi_get_clock( + __in efx_nic_t *enp, + __out uint32_t *sys_freqp); + +extern __checkReturn efx_rc_t +efx_mcdi_get_vector_cfg( + __in efx_nic_t *enp, + __out_opt uint32_t *vec_basep, + __out_opt uint32_t *pf_nvecp, + __out_opt uint32_t *vf_nvecp); + +extern __checkReturn efx_rc_t +ef10_get_datapath_caps( + __in efx_nic_t *enp); + +extern __checkReturn efx_rc_t +ef10_external_port_mapping( + __in efx_nic_t *enp, + __in uint32_t port, + __out uint8_t *external_portp); #ifdef __cplusplus diff --git a/sys/dev/sfxge/common/efx_impl.h b/sys/dev/sfxge/common/efx_impl.h index 5397ffebbbb7..b57b599601e6 100644 --- a/sys/dev/sfxge/common/efx_impl.h +++ b/sys/dev/sfxge/common/efx_impl.h @@ -358,6 +358,7 @@ typedef struct efx_intr_s { typedef struct efx_nic_ops_s { efx_rc_t (*eno_probe)(efx_nic_t *); + efx_rc_t (*eno_board_cfg)(efx_nic_t *); efx_rc_t (*eno_set_drv_limits)(efx_nic_t *, efx_drv_limits_t*); efx_rc_t (*eno_reset)(efx_nic_t *); efx_rc_t (*eno_init)(efx_nic_t *); diff --git a/sys/dev/sfxge/common/efx_nic.c b/sys/dev/sfxge/common/efx_nic.c index 07acc56d779c..f88f11862197 100644 --- a/sys/dev/sfxge/common/efx_nic.c +++ b/sys/dev/sfxge/common/efx_nic.c @@ -244,6 +244,7 @@ efx_nic_biu_test( static efx_nic_ops_t __efx_nic_falcon_ops = { falcon_nic_probe, /* eno_probe */ + NULL, /* eno_board_cfg */ NULL, /* eno_set_drv_limits */ falcon_nic_reset, /* eno_reset */ falcon_nic_init, /* eno_init */ @@ -263,6 +264,7 @@ static efx_nic_ops_t __efx_nic_falcon_ops = { static efx_nic_ops_t __efx_nic_siena_ops = { siena_nic_probe, /* eno_probe */ + NULL, /* eno_board_cfg */ NULL, /* eno_set_drv_limits */ siena_nic_reset, /* eno_reset */ siena_nic_init, /* eno_init */ @@ -282,6 +284,7 @@ static efx_nic_ops_t __efx_nic_siena_ops = { static efx_nic_ops_t __efx_nic_hunt_ops = { ef10_nic_probe, /* eno_probe */ + hunt_board_cfg, /* eno_board_cfg */ ef10_nic_set_drv_limits, /* eno_set_drv_limits */ ef10_nic_reset, /* eno_reset */ ef10_nic_init, /* eno_init */ @@ -297,6 +300,27 @@ static efx_nic_ops_t __efx_nic_hunt_ops = { #endif /* EFSYS_OPT_HUNTINGTON */ +#if EFSYS_OPT_MEDFORD + +static efx_nic_ops_t __efx_nic_medford_ops = { + ef10_nic_probe, /* eno_probe */ + medford_board_cfg, /* eno_board_cfg */ + ef10_nic_set_drv_limits, /* eno_set_drv_limits */ + ef10_nic_reset, /* eno_reset */ + ef10_nic_init, /* eno_init */ + ef10_nic_get_vi_pool, /* eno_get_vi_pool */ + ef10_nic_get_bar_region, /* eno_get_bar_region */ +#if EFSYS_OPT_DIAG + ef10_sram_test, /* eno_sram_test */ + ef10_nic_register_test, /* eno_register_test */ +#endif /* EFSYS_OPT_DIAG */ + ef10_nic_fini, /* eno_fini */ + ef10_nic_unprobe, /* eno_unprobe */ +}; + +#endif /* EFSYS_OPT_MEDFORD */ + + __checkReturn efx_rc_t efx_nic_create( __in efx_family_t family, @@ -361,6 +385,24 @@ efx_nic_create( break; #endif /* EFSYS_OPT_HUNTINGTON */ +#if EFSYS_OPT_MEDFORD + case EFX_FAMILY_MEDFORD: + enp->en_enop = (efx_nic_ops_t *)&__efx_nic_medford_ops; + /* + * FW_ASSISTED_TSO ommitted as Medford only supports firmware + * assisted TSO version 2, not the v1 scheme used on Huntington. + */ + enp->en_features = + EFX_FEATURE_IPV6 | + EFX_FEATURE_LINK_EVENTS | + EFX_FEATURE_PERIODIC_MAC_STATS | + EFX_FEATURE_MCDI | + EFX_FEATURE_MAC_HEADER_FILTERS | + EFX_FEATURE_MCDI_DMA | + EFX_FEATURE_PIO_BUFFERS; + break; +#endif /* EFSYS_OPT_MEDFORD */ + default: rc = ENOTSUP; goto fail2; diff --git a/sys/dev/sfxge/common/hunt_impl.h b/sys/dev/sfxge/common/hunt_impl.h index f8c3b5e67678..557839d4c99c 100644 --- a/sys/dev/sfxge/common/hunt_impl.h +++ b/sys/dev/sfxge/common/hunt_impl.h @@ -54,6 +54,13 @@ extern "C" { */ #define EF10_RX_WPTR_ALIGN 8 +/* + * Max byte offset into the packet the TCP header must start for the hardware + * to be able to parse the packet correctly. + * FIXME: Move to ef10_impl.h when it is included in all driver builds. + */ +#define EF10_TCP_HEADER_OFFSET_LIMIT 208 + /* Invalid RSS context handle */ #define EF10_RSS_CONTEXT_INVALID (0xffffffff) @@ -164,6 +171,10 @@ extern __checkReturn efx_rc_t ef10_nic_probe( __in efx_nic_t *enp); +extern __checkReturn efx_rc_t +hunt_board_cfg( + __in efx_nic_t *enp); + extern __checkReturn efx_rc_t ef10_nic_set_drv_limits( __inout efx_nic_t *enp, diff --git a/sys/dev/sfxge/common/hunt_nic.c b/sys/dev/sfxge/common/hunt_nic.c index d87f3cd0694f..21411afbb211 100644 --- a/sys/dev/sfxge/common/hunt_nic.c +++ b/sys/dev/sfxge/common/hunt_nic.c @@ -41,7 +41,7 @@ __FBSDID("$FreeBSD$"); #include "ef10_tlv_layout.h" -static __checkReturn efx_rc_t + __checkReturn efx_rc_t efx_mcdi_get_port_assignment( __in efx_nic_t *enp, __out uint32_t *portp) @@ -85,7 +85,7 @@ efx_mcdi_get_port_assignment( return (rc); } -static __checkReturn efx_rc_t + __checkReturn efx_rc_t efx_mcdi_get_port_modes( __in efx_nic_t *enp, __out uint32_t *modesp) @@ -205,7 +205,7 @@ efx_mcdi_vadaptor_free( return (rc); } -static __checkReturn efx_rc_t + __checkReturn efx_rc_t efx_mcdi_get_mac_address_pf( __in efx_nic_t *enp, __out_ecount_opt(6) uint8_t mac_addrp[6]) @@ -263,7 +263,7 @@ efx_mcdi_get_mac_address_pf( return (rc); } -static __checkReturn efx_rc_t + __checkReturn efx_rc_t efx_mcdi_get_mac_address_vf( __in efx_nic_t *enp, __out_ecount_opt(6) uint8_t mac_addrp[6]) @@ -326,7 +326,7 @@ efx_mcdi_get_mac_address_vf( return (rc); } -static __checkReturn efx_rc_t + __checkReturn efx_rc_t efx_mcdi_get_clock( __in efx_nic_t *enp, __out uint32_t *sys_freqp) @@ -376,7 +376,7 @@ efx_mcdi_get_clock( return (rc); } -static __checkReturn efx_rc_t + __checkReturn efx_rc_t efx_mcdi_get_vector_cfg( __in efx_nic_t *enp, __out_opt uint32_t *vec_basep, @@ -889,7 +889,7 @@ ef10_nic_pio_unlink( return (efx_mcdi_unlink_piobuf(enp, vi_index)); } -static __checkReturn efx_rc_t + __checkReturn efx_rc_t ef10_get_datapath_caps( __in efx_nic_t *enp) { @@ -992,6 +992,13 @@ static struct { (1 << TLV_PORT_MODE_10G_10G_10G_10G), 1 }, + { + EFX_FAMILY_MEDFORD, + (1 << TLV_PORT_MODE_10G) | + (1 << TLV_PORT_MODE_10G_10G) | + (1 << TLV_PORT_MODE_10G_10G_10G_10G), + 1 + }, /* Supported modes requiring 2 outputs per port */ { EFX_FAMILY_HUNTINGTON, @@ -1000,18 +1007,25 @@ static struct { (1 << TLV_PORT_MODE_40G_10G_10G) | (1 << TLV_PORT_MODE_10G_10G_40G), 2 - } - /* - * NOTE: Medford modes will require 4 outputs per port: - * TLV_PORT_MODE_10G_10G_10G_10G_Q - * TLV_PORT_MODE_10G_10G_10G_10G_Q2 - * The Q2 mode routes outputs to external port 2. Support for this - * will require a new field specifying the number to add after - * scaling by stride. This is fixed at 1 currently. - */ + }, + { + EFX_FAMILY_MEDFORD, + (1 << TLV_PORT_MODE_40G) | + (1 << TLV_PORT_MODE_40G_40G) | + (1 << TLV_PORT_MODE_40G_10G_10G) | + (1 << TLV_PORT_MODE_10G_10G_40G), + 2 + }, + /* Supported modes requiring 4 outputs per port */ + { + EFX_FAMILY_MEDFORD, + (1 << TLV_PORT_MODE_10G_10G_10G_10G_Q) | + (1 << TLV_PORT_MODE_10G_10G_10G_10G_Q2), + 4 + }, }; -static __checkReturn efx_rc_t + __checkReturn efx_rc_t ef10_external_port_mapping( __in efx_nic_t *enp, __in uint32_t port, @@ -1064,7 +1078,7 @@ ef10_external_port_mapping( return (rc); } -static __checkReturn efx_rc_t + __checkReturn efx_rc_t hunt_board_cfg( __in efx_nic_t *enp) { @@ -1320,7 +1334,7 @@ hunt_board_cfg( * Maximum number of bytes into the frame the TCP header can start for * firmware assisted TSO to work. */ - encp->enc_tx_tso_tcp_header_offset_limit = 208; + encp->enc_tx_tso_tcp_header_offset_limit = EF10_TCP_HEADER_OFFSET_LIMIT; return (0); @@ -1361,6 +1375,7 @@ hunt_board_cfg( ef10_nic_probe( __in efx_nic_t *enp) { + efx_nic_ops_t *enop = enp->en_enop; efx_nic_cfg_t *encp = &(enp->en_nic_cfg); efx_drv_cfg_t *edcp = &(enp->en_drv_cfg); efx_rc_t rc; @@ -1380,7 +1395,7 @@ ef10_nic_probe( if ((rc = efx_mcdi_drv_attach(enp, B_TRUE)) != 0) goto fail3; - if ((rc = hunt_board_cfg(enp)) != 0) + if ((rc = enop->eno_board_cfg(enp)) != 0) if (rc != EACCES) goto fail4; diff --git a/sys/dev/sfxge/common/medford_impl.h b/sys/dev/sfxge/common/medford_impl.h index 11084dc00f68..59ea35fb613d 100644 --- a/sys/dev/sfxge/common/medford_impl.h +++ b/sys/dev/sfxge/common/medford_impl.h @@ -37,7 +37,29 @@ extern "C" { #endif +/* Alignment requirement for value written to RX WPTR: + * the WPTR must be aligned to an 8 descriptor boundary + * + * FIXME: Is this the same on Medford as Huntington? + */ +#define MEDFORD_RX_WPTR_ALIGN 8 + + + +#ifndef ER_EZ_TX_PIOBUF_SIZE +#define ER_EZ_TX_PIOBUF_SIZE 4096 +#endif + + #define MEDFORD_PIOBUF_NBUFS (16) +#define MEDFORD_PIOBUF_SIZE (ER_EZ_TX_PIOBUF_SIZE) + +#define MEDFORD_MIN_PIO_ALLOC_SIZE (MEDFORD_PIOBUF_SIZE / 32) + + +extern __checkReturn efx_rc_t +medford_board_cfg( + __in efx_nic_t *enp); #ifdef __cplusplus diff --git a/sys/dev/sfxge/common/medford_nic.c b/sys/dev/sfxge/common/medford_nic.c index 68c8184f5b18..51a8ac0fe75e 100644 --- a/sys/dev/sfxge/common/medford_nic.c +++ b/sys/dev/sfxge/common/medford_nic.c @@ -39,7 +39,205 @@ __FBSDID("$FreeBSD$"); #include "ef10_tlv_layout.h" + __checkReturn efx_rc_t +medford_board_cfg( + __in efx_nic_t *enp) +{ + efx_mcdi_iface_t *emip = &(enp->en_mcdi.em_emip); + efx_nic_cfg_t *encp = &(enp->en_nic_cfg); + uint8_t mac_addr[6] = { 0 }; + uint32_t board_type = 0; + hunt_link_state_t hls; + efx_port_t *epp = &(enp->en_port); + uint32_t port; + uint32_t pf; + uint32_t vf; + uint32_t mask; + uint32_t flags; + uint32_t sysclk; + uint32_t base, nvec; + efx_rc_t rc; + /* + * FIXME: Likely to be incomplete and incorrect. + * Parts of this should be shared with Huntington. + */ + if ((rc = efx_mcdi_get_port_assignment(enp, &port)) != 0) + goto fail1; + + /* + * NOTE: The MCDI protocol numbers ports from zero. + * The common code MCDI interface numbers ports from one. + */ + emip->emi_port = port + 1; + + if ((rc = ef10_external_port_mapping(enp, port, + &encp->enc_external_port)) != 0) + goto fail2; + + /* + * Get PCIe function number from firmware (used for + * per-function privilege and dynamic config info). + * - PCIe PF: pf = PF number, vf = 0xffff. + * - PCIe VF: pf = parent PF, vf = VF number. + */ + if ((rc = efx_mcdi_get_function_info(enp, &pf, &vf)) != 0) + goto fail3; + + encp->enc_pf = pf; + encp->enc_vf = vf; + + /* MAC address for this function */ + if (EFX_PCI_FUNCTION_IS_PF(encp)) { + rc = efx_mcdi_get_mac_address_pf(enp, mac_addr); + if ((rc == 0) && (mac_addr[0] & 0x02)) { + /* + * If the static config does not include a global MAC + * address pool then the board may return a locally + * administered MAC address (this should only happen on + * incorrectly programmed boards). + */ + rc = EINVAL; + } + } else { + rc = efx_mcdi_get_mac_address_vf(enp, mac_addr); + } + if (rc != 0) + goto fail4; + + EFX_MAC_ADDR_COPY(encp->enc_mac_addr, mac_addr); + + /* Board configuration */ + rc = efx_mcdi_get_board_cfg(enp, &board_type, NULL, NULL); + if (rc != 0) { + /* Unprivileged functions may not be able to read board cfg */ + if (rc == EACCES) + board_type = 0; + else + goto fail5; + } + + encp->enc_board_type = board_type; + encp->enc_clk_mult = 1; /* not used for Medford */ + + /* Fill out fields in enp->en_port and enp->en_nic_cfg from MCDI */ + if ((rc = efx_mcdi_get_phy_cfg(enp)) != 0) + goto fail6; + + /* Obtain the default PHY advertised capabilities */ + if ((rc = hunt_phy_get_link(enp, &hls)) != 0) + goto fail7; + epp->ep_default_adv_cap_mask = hls.hls_adv_cap_mask; + epp->ep_adv_cap_mask = hls.hls_adv_cap_mask; + + if (EFX_PCI_FUNCTION_IS_VF(encp)) { + /* + * Interrupt testing does not work for VFs. See bug50084. + * FIXME: Does this still apply to Medford? + */ + encp->enc_bug41750_workaround = B_TRUE; + } + + /* Chained multicast is always enabled on Medford */ + encp->enc_bug26807_workaround = B_TRUE; + + /* Get sysclk frequency (in MHz). */ + if ((rc = efx_mcdi_get_clock(enp, &sysclk)) != 0) + goto fail8; + + /* + * The timer quantum is 1536 sysclk cycles, documented for the + * EV_TMR_VAL field of EV_TIMER_TBL. Scale for MHz and ns units. + */ + encp->enc_evq_timer_quantum_ns = 1536000UL / sysclk; /* 1536 cycles */ + encp->enc_evq_timer_max_us = (encp->enc_evq_timer_quantum_ns << + FRF_CZ_TC_TIMER_VAL_WIDTH) / 1000; + + /* Check capabilities of running datapath firmware */ + if ((rc = ef10_get_datapath_caps(enp)) != 0) + goto fail9; + + /* Alignment for receive packet DMA buffers */ + encp->enc_rx_buf_align_start = 1; + + /* FIXME: RX DMA end padding is configurable on Medford */ + encp->enc_rx_buf_align_end = 64; + + /* Alignment for WPTR updates */ + encp->enc_rx_push_align = EF10_RX_WPTR_ALIGN; + + /* + * Set resource limits for MC_CMD_ALLOC_VIS. Note that we cannot use + * MC_CMD_GET_RESOURCE_LIMITS here as that reports the available + * resources (allocated to this PCIe function), which is zero until + * after we have allocated VIs. + */ + encp->enc_evq_limit = 1024; + encp->enc_rxq_limit = EFX_RXQ_LIMIT_TARGET; + encp->enc_txq_limit = EFX_TXQ_LIMIT_TARGET; + + encp->enc_buftbl_limit = 0xFFFFFFFF; + + encp->enc_piobuf_limit = MEDFORD_PIOBUF_NBUFS; + encp->enc_piobuf_size = MEDFORD_PIOBUF_SIZE; + encp->enc_piobuf_min_alloc_size = MEDFORD_MIN_PIO_ALLOC_SIZE; + + /* + * Get the current privilege mask. Note that this may be modified + * dynamically, so this value is informational only. DO NOT use + * the privilege mask to check for sufficient privileges, as that + * can result in time-of-check/time-of-use bugs. + */ + if ((rc = efx_mcdi_privilege_mask(enp, pf, vf, &mask)) != 0) + goto fail10; + + encp->enc_privilege_mask = mask; + + /* Get interrupt vector limits */ + if ((rc = efx_mcdi_get_vector_cfg(enp, &base, &nvec, NULL)) != 0) { + if (EFX_PCI_FUNCTION_IS_PF(encp)) + goto fail11; + + /* Ignore error (cannot query vector limits from a VF). */ + base = 0; + nvec = 1024; + } + encp->enc_intr_vec_base = base; + encp->enc_intr_limit = nvec; + + /* + * Maximum number of bytes into the frame the TCP header can start for + * firmware assisted TSO to work. + */ + encp->enc_tx_tso_tcp_header_offset_limit = EF10_TCP_HEADER_OFFSET_LIMIT; + + return (0); + +fail11: + EFSYS_PROBE(fail11); +fail10: + EFSYS_PROBE(fail10); +fail9: + EFSYS_PROBE(fail9); +fail8: + EFSYS_PROBE(fail8); +fail7: + EFSYS_PROBE(fail7); +fail6: + EFSYS_PROBE(fail6); +fail5: + EFSYS_PROBE(fail5); +fail4: + EFSYS_PROBE(fail4); +fail3: + EFSYS_PROBE(fail3); +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} #endif /* EFSYS_OPT_MEDFORD */ From fd7501bf79194b95dc57e8201644ff3ea7f99510 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 09:00:35 +0000 Subject: [PATCH 26/88] sfxge: rework MCDI start request Submitted by: Andy Moreton Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4909 --- sys/dev/sfxge/common/efx_impl.h | 4 +- sys/dev/sfxge/common/efx_mcdi.c | 96 +++++++++++++++++++++++++--- sys/dev/sfxge/common/hunt_impl.h | 10 +-- sys/dev/sfxge/common/hunt_mcdi.c | 101 +----------------------------- sys/dev/sfxge/common/siena_impl.h | 10 +-- sys/dev/sfxge/common/siena_mcdi.c | 46 +------------- 6 files changed, 101 insertions(+), 166 deletions(-) diff --git a/sys/dev/sfxge/common/efx_impl.h b/sys/dev/sfxge/common/efx_impl.h index b57b599601e6..434ac68c6234 100644 --- a/sys/dev/sfxge/common/efx_impl.h +++ b/sys/dev/sfxge/common/efx_impl.h @@ -457,8 +457,8 @@ falconsiena_filter_tbl_clear( typedef struct efx_mcdi_ops_s { efx_rc_t (*emco_init)(efx_nic_t *, const efx_mcdi_transport_t *); - void (*emco_request_copyin)(efx_nic_t *, efx_mcdi_req_t *, - unsigned int, boolean_t, boolean_t); + void (*emco_send_request)(efx_nic_t *, void *, size_t, + void *, size_t); void (*emco_request_copyout)(efx_nic_t *, efx_mcdi_req_t *); efx_rc_t (*emco_poll_reboot)(efx_nic_t *); boolean_t (*emco_poll_response)(efx_nic_t *); diff --git a/sys/dev/sfxge/common/efx_mcdi.c b/sys/dev/sfxge/common/efx_mcdi.c index 07224c71d8b1..7224c3686c17 100644 --- a/sys/dev/sfxge/common/efx_mcdi.c +++ b/sys/dev/sfxge/common/efx_mcdi.c @@ -36,12 +36,32 @@ __FBSDID("$FreeBSD$"); #if EFSYS_OPT_MCDI +/* + * There are three versions of the MCDI interface: + * - MCDIv0: Siena BootROM. Transport uses MCDIv1 headers. + * - MCDIv1: Siena firmware and Huntington BootROM. + * - MCDIv2: EF10 firmware (Huntington/Medford) and Medford BootROM. + * Transport uses MCDIv2 headers. + * + * MCDIv2 Header NOT_EPOCH flag + * ---------------------------- + * A new epoch begins at initial startup or after an MC reboot, and defines when + * the MC should reject stale MCDI requests. + * + * The first MCDI request sent by the host should contain NOT_EPOCH=0, and all + * subsequent requests (until the next MC reboot) should contain NOT_EPOCH=1. + * + * After rebooting the MC will fail all requests with NOT_EPOCH=1 by writing a + * response with ERROR=1 and DATALEN=0 until a request is seen with NOT_EPOCH=0. + */ + + #if EFSYS_OPT_SIENA static efx_mcdi_ops_t __efx_mcdi_siena_ops = { siena_mcdi_init, /* emco_init */ - siena_mcdi_request_copyin, /* emco_request_copyin */ + siena_mcdi_send_request, /* emco_send_request */ siena_mcdi_request_copyout, /* emco_request_copyout */ siena_mcdi_poll_reboot, /* emco_poll_reboot */ siena_mcdi_poll_response, /* emco_poll_response */ @@ -56,7 +76,7 @@ static efx_mcdi_ops_t __efx_mcdi_siena_ops = { static efx_mcdi_ops_t __efx_mcdi_ef10_ops = { ef10_mcdi_init, /* emco_init */ - ef10_mcdi_request_copyin, /* emco_request_copyin */ + ef10_mcdi_send_request, /* emco_send_request */ ef10_mcdi_request_copyout, /* emco_request_copyout */ ef10_mcdi_poll_reboot, /* emco_poll_reboot */ ef10_mcdi_poll_response, /* emco_poll_response */ @@ -179,16 +199,16 @@ efx_mcdi_new_epoch( } static void -efx_mcdi_request_copyin( +efx_mcdi_send_request( __in efx_nic_t *enp, - __in efx_mcdi_req_t *emrp, - __in unsigned int seq, - __in boolean_t ev_cpl, - __in boolean_t new_epoch) + __in void *hdrp, + __in size_t hdr_len, + __in void *sdup, + __in size_t sdu_len) { efx_mcdi_ops_t *emcop = enp->en_mcdi.em_emcop; - emcop->emco_request_copyin(enp, emrp, seq, ev_cpl, new_epoch); + emcop->emco_send_request(enp, hdrp, hdr_len, sdup, sdu_len); } static void @@ -241,8 +261,15 @@ efx_mcdi_request_start( __in efx_mcdi_req_t *emrp, __in boolean_t ev_cpl) { +#if EFSYS_OPT_MCDI_LOGGING + const efx_mcdi_transport_t *emtp = enp->en_mcdi.em_emtp; +#endif efx_mcdi_iface_t *emip = &(enp->en_mcdi.em_emip); + efx_dword_t hdr[2]; + size_t hdr_len; + unsigned int max_version; unsigned int seq; + unsigned int xflags; boolean_t new_epoch; int state; @@ -269,9 +296,60 @@ efx_mcdi_request_start( emip->emi_poll_cnt = 0; seq = emip->emi_seq++ & EFX_MASK32(MCDI_HEADER_SEQ); new_epoch = emip->emi_new_epoch; + max_version = emip->emi_max_version; EFSYS_UNLOCK(enp->en_eslp, state); - efx_mcdi_request_copyin(enp, emrp, seq, ev_cpl, new_epoch); + xflags = 0; + if (ev_cpl) + xflags |= MCDI_HEADER_XFLAGS_EVREQ; + + /* + * Huntington firmware supports MCDIv2, but the Huntington BootROM only + * supports MCDIv1. Use MCDIv1 headers for MCDIv1 commands where + * possible to support this. + */ + if ((max_version >= 2) && + ((emrp->emr_cmd > MC_CMD_CMD_SPACE_ESCAPE_7) || + (emrp->emr_in_length > MCDI_CTL_SDU_LEN_MAX_V1))) { + /* Construct MCDI v2 header */ + hdr_len = sizeof (hdr); + EFX_POPULATE_DWORD_8(hdr[0], + MCDI_HEADER_CODE, MC_CMD_V2_EXTN, + MCDI_HEADER_RESYNC, 1, + MCDI_HEADER_DATALEN, 0, + MCDI_HEADER_SEQ, seq, + MCDI_HEADER_NOT_EPOCH, new_epoch ? 0 : 1, + MCDI_HEADER_ERROR, 0, + MCDI_HEADER_RESPONSE, 0, + MCDI_HEADER_XFLAGS, xflags); + + EFX_POPULATE_DWORD_2(hdr[1], + MC_CMD_V2_EXTN_IN_EXTENDED_CMD, emrp->emr_cmd, + MC_CMD_V2_EXTN_IN_ACTUAL_LEN, emrp->emr_in_length); + } else { + /* Construct MCDI v1 header */ + hdr_len = sizeof (hdr[0]); + EFX_POPULATE_DWORD_8(hdr[0], + MCDI_HEADER_CODE, emrp->emr_cmd, + MCDI_HEADER_RESYNC, 1, + MCDI_HEADER_DATALEN, emrp->emr_in_length, + MCDI_HEADER_SEQ, seq, + MCDI_HEADER_NOT_EPOCH, new_epoch ? 0 : 1, + MCDI_HEADER_ERROR, 0, + MCDI_HEADER_RESPONSE, 0, + MCDI_HEADER_XFLAGS, xflags); + } + +#if EFSYS_OPT_MCDI_LOGGING + if (emtp->emt_logger != NULL) { + emtp->emt_logger(emtp->emt_context, EFX_LOG_MCDI_REQUEST, + &hdr, hdr_len, + emrp->emr_in_buf, emrp->emr_in_length); + } +#endif /* EFSYS_OPT_MCDI_LOGGING */ + + efx_mcdi_send_request(enp, &hdr[0], hdr_len, + emrp->emr_in_buf, emrp->emr_in_length); } diff --git a/sys/dev/sfxge/common/hunt_impl.h b/sys/dev/sfxge/common/hunt_impl.h index 557839d4c99c..94d75109f0fb 100644 --- a/sys/dev/sfxge/common/hunt_impl.h +++ b/sys/dev/sfxge/common/hunt_impl.h @@ -287,12 +287,12 @@ ef10_mcdi_fini( __in efx_nic_t *enp); extern void -ef10_mcdi_request_copyin( +ef10_mcdi_send_request( __in efx_nic_t *enp, - __in efx_mcdi_req_t *emrp, - __in unsigned int seq, - __in boolean_t ev_cpl, - __in boolean_t new_epoch); + __in void *hdrp, + __in size_t hdr_len, + __in void *sdup, + __in size_t sdu_len); extern __checkReturn boolean_t ef10_mcdi_poll_response( diff --git a/sys/dev/sfxge/common/hunt_mcdi.c b/sys/dev/sfxge/common/hunt_mcdi.c index 1cccb23afce3..cef049bbc01a 100644 --- a/sys/dev/sfxge/common/hunt_mcdi.c +++ b/sys/dev/sfxge/common/hunt_mcdi.c @@ -43,37 +43,6 @@ __FBSDID("$FreeBSD$"); #error "WITH_MCDI_V2 required for EF10 MCDIv2 commands." #endif -typedef enum efx_mcdi_header_type_e { - EFX_MCDI_HEADER_TYPE_V1, /* MCDIv0 (BootROM), MCDIv1 commands */ - EFX_MCDI_HEADER_TYPE_V2, /* MCDIv2 commands */ -} efx_mcdi_header_type_t; - -/* - * Return the header format to use for sending an MCDI request. - * - * An MCDIv1 (Siena compatible) command should use MCDIv2 encapsulation if the - * request input buffer or response output buffer are too large for the MCDIv1 - * format. An MCDIv2 command must always be sent using MCDIv2 encapsulation. - */ -#define EFX_MCDI_HEADER_TYPE(_cmd, _length) \ - ((((_cmd) & ~EFX_MASK32(MCDI_HEADER_CODE)) || \ - ((_length) & ~EFX_MASK32(MCDI_HEADER_DATALEN))) ? \ - EFX_MCDI_HEADER_TYPE_V2 : EFX_MCDI_HEADER_TYPE_V1) - - -/* - * MCDI Header NOT_EPOCH flag - * ========================== - * A new epoch begins at initial startup or after an MC reboot, and defines when - * the MC should reject stale MCDI requests. - * - * The first MCDI request sent by the host should contain NOT_EPOCH=0, and all - * subsequent requests (until the next MC reboot) should contain NOT_EPOCH=1. - * - * After rebooting the MC will fail all requests with NOT_EPOCH=1 by writing a - * response with ERROR=1 and DATALEN=0 until a request is seen with NOT_EPOCH=0. - */ - __checkReturn efx_rc_t ef10_mcdi_init( @@ -139,7 +108,7 @@ ef10_mcdi_fini( emip->emi_new_epoch = B_FALSE; } -static void + void ef10_mcdi_send_request( __in efx_nic_t *enp, __in void *hdrp, @@ -181,74 +150,6 @@ ef10_mcdi_send_request( EFX_BAR_WRITED(enp, ER_DZ_MC_DB_HWRD_REG, &dword, B_FALSE); } - void -ef10_mcdi_request_copyin( - __in efx_nic_t *enp, - __in efx_mcdi_req_t *emrp, - __in unsigned int seq, - __in boolean_t ev_cpl, - __in boolean_t new_epoch) -{ -#if EFSYS_OPT_MCDI_LOGGING - const efx_mcdi_transport_t *emtp = enp->en_mcdi.em_emtp; -#endif /* EFSYS_OPT_MCDI_LOGGING */ - efx_mcdi_header_type_t hdr_type; - efx_dword_t hdr[2]; - size_t hdr_len; - unsigned int xflags; - - EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON || - enp->en_family == EFX_FAMILY_MEDFORD); - - xflags = 0; - if (ev_cpl) - xflags |= MCDI_HEADER_XFLAGS_EVREQ; - - hdr_type = EFX_MCDI_HEADER_TYPE(emrp->emr_cmd, - MAX(emrp->emr_in_length, emrp->emr_out_length)); - - if (hdr_type == EFX_MCDI_HEADER_TYPE_V2) { - /* Construct MCDI v2 header */ - hdr_len = sizeof (hdr); - EFX_POPULATE_DWORD_8(hdr[0], - MCDI_HEADER_CODE, MC_CMD_V2_EXTN, - MCDI_HEADER_RESYNC, 1, - MCDI_HEADER_DATALEN, 0, - MCDI_HEADER_SEQ, seq, - MCDI_HEADER_NOT_EPOCH, new_epoch ? 0 : 1, - MCDI_HEADER_ERROR, 0, - MCDI_HEADER_RESPONSE, 0, - MCDI_HEADER_XFLAGS, xflags); - - EFX_POPULATE_DWORD_2(hdr[1], - MC_CMD_V2_EXTN_IN_EXTENDED_CMD, emrp->emr_cmd, - MC_CMD_V2_EXTN_IN_ACTUAL_LEN, emrp->emr_in_length); - } else { - /* Construct MCDI v1 header */ - hdr_len = sizeof (hdr[0]); - EFX_POPULATE_DWORD_8(hdr[0], - MCDI_HEADER_CODE, emrp->emr_cmd, - MCDI_HEADER_RESYNC, 1, - MCDI_HEADER_DATALEN, emrp->emr_in_length, - MCDI_HEADER_SEQ, seq, - MCDI_HEADER_NOT_EPOCH, new_epoch ? 0 : 1, - MCDI_HEADER_ERROR, 0, - MCDI_HEADER_RESPONSE, 0, - MCDI_HEADER_XFLAGS, xflags); - } - -#if EFSYS_OPT_MCDI_LOGGING - if (emtp->emt_logger != NULL) { - emtp->emt_logger(emtp->emt_context, EFX_LOG_MCDI_REQUEST, - &hdr, hdr_len, - emrp->emr_in_buf, emrp->emr_in_length); - } -#endif /* EFSYS_OPT_MCDI_LOGGING */ - - ef10_mcdi_send_request(enp, &hdr[0], hdr_len, - emrp->emr_in_buf, emrp->emr_in_length); -} - void ef10_mcdi_request_copyout( __in efx_nic_t *enp, diff --git a/sys/dev/sfxge/common/siena_impl.h b/sys/dev/sfxge/common/siena_impl.h index 639ac6b43fb7..9f076c2e0f6a 100644 --- a/sys/dev/sfxge/common/siena_impl.h +++ b/sys/dev/sfxge/common/siena_impl.h @@ -114,12 +114,12 @@ siena_mcdi_init( __in const efx_mcdi_transport_t *mtp); extern void -siena_mcdi_request_copyin( +siena_mcdi_send_request( __in efx_nic_t *enp, - __in efx_mcdi_req_t *emrp, - __in unsigned int seq, - __in boolean_t ev_cpl, - __in boolean_t new_epoch); + __in void *hdrp, + __in size_t hdr_len, + __in void *sdup, + __in size_t sdu_len); extern __checkReturn boolean_t siena_mcdi_poll_response( diff --git a/sys/dev/sfxge/common/siena_mcdi.c b/sys/dev/sfxge/common/siena_mcdi.c index f3af2bf6f479..14ca6cfeac88 100644 --- a/sys/dev/sfxge/common/siena_mcdi.c +++ b/sys/dev/sfxge/common/siena_mcdi.c @@ -52,7 +52,7 @@ __FBSDID("$FreeBSD$"); : MC_SMEM_P1_STATUS_OFST >> 2) -static void + void siena_mcdi_send_request( __in efx_nic_t *enp, __in void *hdrp, @@ -89,50 +89,6 @@ siena_mcdi_send_request( EFX_BAR_TBL_WRITED(enp, FR_CZ_MC_TREG_SMEM, dbr, &dword, B_FALSE); } - void -siena_mcdi_request_copyin( - __in efx_nic_t *enp, - __in efx_mcdi_req_t *emrp, - __in unsigned int seq, - __in boolean_t ev_cpl, - __in boolean_t new_epoch) -{ -#if EFSYS_OPT_MCDI_LOGGING - const efx_mcdi_transport_t *emtp = enp->en_mcdi.em_emtp; -#endif - efx_dword_t hdr; - size_t hdr_len; - unsigned int xflags; - - EFSYS_ASSERT(enp->en_family == EFX_FAMILY_SIENA); - _NOTE(ARGUNUSED(new_epoch)) - - xflags = 0; - if (ev_cpl) - xflags |= MCDI_HEADER_XFLAGS_EVREQ; - - /* Construct the header */ - hdr_len = sizeof (hdr); - EFX_POPULATE_DWORD_6(hdr, - MCDI_HEADER_CODE, emrp->emr_cmd, - MCDI_HEADER_RESYNC, 1, - MCDI_HEADER_DATALEN, emrp->emr_in_length, - MCDI_HEADER_SEQ, seq, - MCDI_HEADER_RESPONSE, 0, - MCDI_HEADER_XFLAGS, xflags); - -#if EFSYS_OPT_MCDI_LOGGING - if (emtp->emt_logger != NULL) { - emtp->emt_logger(emtp->emt_context, EFX_LOG_MCDI_REQUEST, - &hdr, sizeof (hdr), - emrp->emr_in_buf, emrp->emr_in_length); - } -#endif /* EFSYS_OPT_MCDI_LOGGING */ - - siena_mcdi_send_request(enp, &hdr, hdr_len, - emrp->emr_in_buf, emrp->emr_in_length); -} - void siena_mcdi_request_copyout( __in efx_nic_t *enp, From 56bd83b0c51f8a1b71e5b74e0bfef5a8e95287c9 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 09:01:53 +0000 Subject: [PATCH 27/88] sfxge: convert nvram size method to use partition id Submitted by: Andy Moreton Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4910 --- sys/dev/sfxge/common/efx_impl.h | 2 +- sys/dev/sfxge/common/efx_nvram.c | 15 +++++++++++---- sys/dev/sfxge/common/hunt_impl.h | 18 ++++++------------ sys/dev/sfxge/common/hunt_nvram.c | 27 --------------------------- sys/dev/sfxge/common/siena_impl.h | 18 ++++++------------ sys/dev/sfxge/common/siena_nvram.c | 26 -------------------------- 6 files changed, 24 insertions(+), 82 deletions(-) diff --git a/sys/dev/sfxge/common/efx_impl.h b/sys/dev/sfxge/common/efx_impl.h index 434ac68c6234..ab4bf2fcd4b4 100644 --- a/sys/dev/sfxge/common/efx_impl.h +++ b/sys/dev/sfxge/common/efx_impl.h @@ -480,7 +480,6 @@ typedef struct efx_nvram_ops_s { #if EFSYS_OPT_DIAG efx_rc_t (*envo_test)(efx_nic_t *); #endif /* EFSYS_OPT_DIAG */ - efx_rc_t (*envo_size)(efx_nic_t *, efx_nvram_type_t, size_t *); efx_rc_t (*envo_get_version)(efx_nic_t *, efx_nvram_type_t, uint32_t *, uint16_t *); efx_rc_t (*envo_rw_start)(efx_nic_t *, efx_nvram_type_t, size_t *); @@ -495,6 +494,7 @@ typedef struct efx_nvram_ops_s { efx_rc_t (*envo_type_to_partn)(efx_nic_t *, efx_nvram_type_t, uint32_t *); + efx_rc_t (*envo_partn_size)(efx_nic_t *, uint32_t, size_t *); } efx_nvram_ops_t; #endif /* EFSYS_OPT_NVRAM */ diff --git a/sys/dev/sfxge/common/efx_nvram.c b/sys/dev/sfxge/common/efx_nvram.c index 759763623db8..a30548b8848c 100644 --- a/sys/dev/sfxge/common/efx_nvram.c +++ b/sys/dev/sfxge/common/efx_nvram.c @@ -42,7 +42,6 @@ static efx_nvram_ops_t __efx_nvram_falcon_ops = { #if EFSYS_OPT_DIAG falcon_nvram_test, /* envo_test */ #endif /* EFSYS_OPT_DIAG */ - falcon_nvram_size, /* envo_size */ falcon_nvram_get_version, /* envo_get_version */ falcon_nvram_rw_start, /* envo_rw_start */ falcon_nvram_read_chunk, /* envo_read_chunk */ @@ -51,6 +50,7 @@ static efx_nvram_ops_t __efx_nvram_falcon_ops = { falcon_nvram_rw_finish, /* envo_rw_finish */ falcon_nvram_set_version, /* envo_set_version */ falcon_nvram_type_to_partn, /* envo_type_to_partn */ + falcon_nvram_partn_size, /* envo_partn_size */ }; #endif /* EFSYS_OPT_FALCON */ @@ -61,7 +61,6 @@ static efx_nvram_ops_t __efx_nvram_siena_ops = { #if EFSYS_OPT_DIAG siena_nvram_test, /* envo_test */ #endif /* EFSYS_OPT_DIAG */ - siena_nvram_size, /* envo_size */ siena_nvram_get_version, /* envo_get_version */ siena_nvram_rw_start, /* envo_rw_start */ siena_nvram_read_chunk, /* envo_read_chunk */ @@ -70,6 +69,7 @@ static efx_nvram_ops_t __efx_nvram_siena_ops = { siena_nvram_rw_finish, /* envo_rw_finish */ siena_nvram_set_version, /* envo_set_version */ siena_nvram_type_to_partn, /* envo_type_to_partn */ + siena_nvram_partn_size, /* envo_partn_size */ }; #endif /* EFSYS_OPT_SIENA */ @@ -80,7 +80,6 @@ static efx_nvram_ops_t __efx_nvram_ef10_ops = { #if EFSYS_OPT_DIAG ef10_nvram_test, /* envo_test */ #endif /* EFSYS_OPT_DIAG */ - ef10_nvram_size, /* envo_size */ ef10_nvram_get_version, /* envo_get_version */ ef10_nvram_rw_start, /* envo_rw_start */ ef10_nvram_read_chunk, /* envo_read_chunk */ @@ -89,6 +88,7 @@ static efx_nvram_ops_t __efx_nvram_ef10_ops = { ef10_nvram_rw_finish, /* envo_rw_finish */ ef10_nvram_set_version, /* envo_set_version */ ef10_nvram_type_to_partn, /* envo_type_to_partn */ + ef10_nvram_partn_size, /* envo_partn_size */ }; #endif /* EFSYS_OPT_HUNTINGTON || EFSYS_OPT_MEDFORD */ @@ -178,6 +178,7 @@ efx_nvram_size( __out size_t *sizep) { efx_nvram_ops_t *envop = enp->en_envop; + uint32_t partn; efx_rc_t rc; EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); @@ -185,13 +186,19 @@ efx_nvram_size( EFSYS_ASSERT3U(type, <, EFX_NVRAM_NTYPES); - if ((rc = envop->envo_size(enp, type, sizep)) != 0) + if ((rc = envop->envo_type_to_partn(enp, type, &partn)) != 0) goto fail1; + if ((rc = envop->envo_partn_size(enp, partn, sizep)) != 0) + goto fail2; + return (0); +fail2: + EFSYS_PROBE(fail2); fail1: EFSYS_PROBE1(fail1, efx_rc_t, rc); + *sizep = 0; return (rc); } diff --git a/sys/dev/sfxge/common/hunt_impl.h b/sys/dev/sfxge/common/hunt_impl.h index 94d75109f0fb..8ac09dc21b8d 100644 --- a/sys/dev/sfxge/common/hunt_impl.h +++ b/sys/dev/sfxge/common/hunt_impl.h @@ -369,12 +369,6 @@ ef10_nvram_partn_write_segment_tlv( __in size_t size, __in boolean_t all_segments); -extern __checkReturn efx_rc_t -ef10_nvram_partn_size( - __in efx_nic_t *enp, - __in uint32_t partn, - __out size_t *sizep); - extern __checkReturn efx_rc_t ef10_nvram_partn_lock( __in efx_nic_t *enp, @@ -420,12 +414,6 @@ ef10_nvram_test( #endif /* EFSYS_OPT_DIAG */ -extern __checkReturn efx_rc_t -ef10_nvram_size( - __in efx_nic_t *enp, - __in efx_nvram_type_t type, - __out size_t *sizep); - extern __checkReturn efx_rc_t ef10_nvram_get_version( __in efx_nic_t *enp, @@ -483,6 +471,12 @@ ef10_nvram_type_to_partn( __in efx_nvram_type_t type, __out uint32_t *partnp); +extern __checkReturn efx_rc_t +ef10_nvram_partn_size( + __in efx_nic_t *enp, + __in uint32_t partn, + __out size_t *sizep); + #endif /* EFSYS_OPT_NVRAM */ diff --git a/sys/dev/sfxge/common/hunt_nvram.c b/sys/dev/sfxge/common/hunt_nvram.c index cbead9b66a9a..e1fa36a32951 100644 --- a/sys/dev/sfxge/common/hunt_nvram.c +++ b/sys/dev/sfxge/common/hunt_nvram.c @@ -1708,33 +1708,6 @@ ef10_nvram_test( #endif /* EFSYS_OPT_DIAG */ - __checkReturn efx_rc_t -ef10_nvram_size( - __in efx_nic_t *enp, - __in efx_nvram_type_t type, - __out size_t *sizep) -{ - uint32_t partn; - efx_rc_t rc; - - if ((rc = ef10_nvram_type_to_partn(enp, type, &partn)) != 0) - goto fail1; - - if ((rc = ef10_nvram_partn_size(enp, partn, sizep)) != 0) - goto fail2; - - return (0); - -fail2: - EFSYS_PROBE(fail2); -fail1: - EFSYS_PROBE1(fail1, efx_rc_t, rc); - - *sizep = 0; - - return (rc); -} - __checkReturn efx_rc_t ef10_nvram_get_version( __in efx_nic_t *enp, diff --git a/sys/dev/sfxge/common/siena_impl.h b/sys/dev/sfxge/common/siena_impl.h index 9f076c2e0f6a..d328395c277e 100644 --- a/sys/dev/sfxge/common/siena_impl.h +++ b/sys/dev/sfxge/common/siena_impl.h @@ -155,12 +155,6 @@ siena_mcdi_feature_supported( #if EFSYS_OPT_NVRAM || EFSYS_OPT_VPD -extern __checkReturn efx_rc_t -siena_nvram_partn_size( - __in efx_nic_t *enp, - __in uint32_t partn, - __out size_t *sizep); - extern __checkReturn efx_rc_t siena_nvram_partn_lock( __in efx_nic_t *enp, @@ -214,12 +208,6 @@ siena_nvram_test( #endif /* EFSYS_OPT_DIAG */ -extern __checkReturn efx_rc_t -siena_nvram_size( - __in efx_nic_t *enp, - __in efx_nvram_type_t type, - __out size_t *sizep); - extern __checkReturn efx_rc_t siena_nvram_get_subtype( __in efx_nic_t *enp, @@ -277,6 +265,12 @@ siena_nvram_type_to_partn( __in efx_nvram_type_t type, __out uint32_t *partnp); +extern __checkReturn efx_rc_t +siena_nvram_partn_size( + __in efx_nic_t *enp, + __in uint32_t partn, + __out size_t *sizep); + #endif /* EFSYS_OPT_NVRAM */ #if EFSYS_OPT_VPD diff --git a/sys/dev/sfxge/common/siena_nvram.c b/sys/dev/sfxge/common/siena_nvram.c index 8f107b49f61b..aae2f08f35ce 100644 --- a/sys/dev/sfxge/common/siena_nvram.c +++ b/sys/dev/sfxge/common/siena_nvram.c @@ -291,32 +291,6 @@ siena_nvram_test( #endif /* EFSYS_OPT_DIAG */ - __checkReturn efx_rc_t -siena_nvram_size( - __in efx_nic_t *enp, - __in efx_nvram_type_t type, - __out size_t *sizep) -{ - uint32_t partn; - efx_rc_t rc; - - if ((rc = siena_nvram_type_to_partn(enp, type, &partn)) != 0) - goto fail1; - - if ((rc = siena_nvram_partn_size(enp, partn, sizep)) != 0) - goto fail2; - - return (0); - -fail2: - EFSYS_PROBE(fail2); -fail1: - EFSYS_PROBE1(fail1, efx_rc_t, rc); - - *sizep = 0; - - return (rc); -} #define SIENA_DYNAMIC_CFG_SIZE(_nitems) \ (sizeof (siena_mc_dynamic_config_hdr_t) + ((_nitems) * \ From 0a91bc3ff46ac2b1b9b078361380932bfd13912d Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 09:03:02 +0000 Subject: [PATCH 28/88] sfxge: rx_prefix_pktlen methods do not require EFSYS_OPT_RX_SCALE Submitted by: Mark Spender Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4911 --- sys/dev/sfxge/common/efx_rx.c | 4 ++-- sys/dev/sfxge/common/hunt_impl.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/dev/sfxge/common/efx_rx.c b/sys/dev/sfxge/common/efx_rx.c index a0b143f2b764..8ebe205715bd 100644 --- a/sys/dev/sfxge/common/efx_rx.c +++ b/sys/dev/sfxge/common/efx_rx.c @@ -78,14 +78,14 @@ falconsiena_rx_prefix_hash( __in efx_rx_hash_alg_t func, __in uint8_t *buffer); +#endif /* EFSYS_OPT_RX_SCALE */ + static __checkReturn efx_rc_t falconsiena_rx_prefix_pktlen( __in efx_nic_t *enp, __in uint8_t *buffer, __out uint16_t *lengthp); -#endif /* EFSYS_OPT_RX_SCALE */ - static void falconsiena_rx_qpost( __in efx_rxq_t *erp, diff --git a/sys/dev/sfxge/common/hunt_impl.h b/sys/dev/sfxge/common/hunt_impl.h index 8ac09dc21b8d..6f0f31b60a63 100644 --- a/sys/dev/sfxge/common/hunt_impl.h +++ b/sys/dev/sfxge/common/hunt_impl.h @@ -891,14 +891,14 @@ ef10_rx_prefix_hash( __in efx_rx_hash_alg_t func, __in uint8_t *buffer); +#endif /* EFSYS_OPT_RX_SCALE */ + extern __checkReturn efx_rc_t ef10_rx_prefix_pktlen( __in efx_nic_t *enp, __in uint8_t *buffer, __out uint16_t *lengthp); -#endif /* EFSYS_OPT_RX_SCALE */ - extern void ef10_rx_qpost( __in efx_rxq_t *erp, From 4ab493691100bf66cf9dac2c2bd1ee33f62b8b90 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 09:05:51 +0000 Subject: [PATCH 29/88] sfxge: support FATSOv2 in common code Sponsored by: Solarflare Communications, Inc. Reviewed by: gnn MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4912 --- sys/dev/sfxge/common/efx.h | 18 ++++++++++++++++ sys/dev/sfxge/common/efx_impl.h | 3 +++ sys/dev/sfxge/common/efx_nic.c | 3 ++- sys/dev/sfxge/common/efx_tx.c | 22 ++++++++++++++++++++ sys/dev/sfxge/common/hunt_impl.h | 9 ++++++++ sys/dev/sfxge/common/hunt_nic.c | 7 +++++++ sys/dev/sfxge/common/hunt_tx.c | 35 +++++++++++++++++++++++++++++++- sys/dev/sfxge/common/siena_nic.c | 1 + 8 files changed, 96 insertions(+), 2 deletions(-) diff --git a/sys/dev/sfxge/common/efx.h b/sys/dev/sfxge/common/efx.h index 0483c9a7c4b4..332ce962aaf1 100644 --- a/sys/dev/sfxge/common/efx.h +++ b/sys/dev/sfxge/common/efx.h @@ -1071,6 +1071,7 @@ efx_bist_stop( #define EFX_FEATURE_TX_SRC_FILTERS 0x00000400 #define EFX_FEATURE_PIO_BUFFERS 0x00000800 #define EFX_FEATURE_FW_ASSISTED_TSO 0x00001000 +#define EFX_FEATURE_FW_ASSISTED_TSO_V2 0x00002000 typedef struct efx_nic_cfg_s { uint32_t enc_board_type; @@ -1152,6 +1153,7 @@ typedef struct efx_nic_cfg_s { */ uint32_t enc_tx_tso_tcp_header_offset_limit; boolean_t enc_fw_assisted_tso_enabled; + boolean_t enc_fw_assisted_tso_v2_enabled; boolean_t enc_hw_tx_insert_vlan_enabled; /* Datapath firmware vadapter/vport/vswitch support */ boolean_t enc_datapath_cap_evb; @@ -2002,6 +2004,7 @@ efx_tx_fini( #define EFX_TXQ_CKSUM_IPV4 0x0001 #define EFX_TXQ_CKSUM_TCPUDP 0x0002 +#define EFX_TXQ_FATSOV2 0x0004 extern __checkReturn efx_rc_t efx_tx_qcreate( @@ -2089,6 +2092,21 @@ efx_tx_qdesc_tso_create( __in uint8_t tcp_flags, __out efx_desc_t *edp); +/* Number of FATSOv2 option descriptors */ +#define EFX_TX_FATSOV2_OPT_NDESCS 2 + +/* Maximum number of DMA segments per TSO packet (not superframe) */ +#define EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX 24 + +extern void +efx_tx_qdesc_tso2_create( + __in efx_txq_t *etp, + __in uint16_t ipv4_id, + __in uint32_t tcp_seq, + __in uint16_t tcp_mss, + __out_ecount(count) efx_desc_t *edp, + __in int count); + extern void efx_tx_qdesc_vlantci_create( __in efx_txq_t *etp, diff --git a/sys/dev/sfxge/common/efx_impl.h b/sys/dev/sfxge/common/efx_impl.h index ab4bf2fcd4b4..46f0f7efe7d8 100644 --- a/sys/dev/sfxge/common/efx_impl.h +++ b/sys/dev/sfxge/common/efx_impl.h @@ -146,6 +146,9 @@ typedef struct efx_tx_ops_s { void (*etxo_qdesc_tso_create)(efx_txq_t *, uint16_t, uint32_t, uint8_t, efx_desc_t *); + void (*etxo_qdesc_tso2_create)(efx_txq_t *, uint16_t, + uint32_t, uint16_t, + efx_desc_t *, int); void (*etxo_qdesc_vlantci_create)(efx_txq_t *, uint16_t, efx_desc_t *); #if EFSYS_OPT_QSTATS diff --git a/sys/dev/sfxge/common/efx_nic.c b/sys/dev/sfxge/common/efx_nic.c index f88f11862197..28ef8c7b088d 100644 --- a/sys/dev/sfxge/common/efx_nic.c +++ b/sys/dev/sfxge/common/efx_nic.c @@ -381,7 +381,8 @@ efx_nic_create( EFX_FEATURE_MAC_HEADER_FILTERS | EFX_FEATURE_MCDI_DMA | EFX_FEATURE_PIO_BUFFERS | - EFX_FEATURE_FW_ASSISTED_TSO; + EFX_FEATURE_FW_ASSISTED_TSO | + EFX_FEATURE_FW_ASSISTED_TSO_V2; break; #endif /* EFSYS_OPT_HUNTINGTON */ diff --git a/sys/dev/sfxge/common/efx_tx.c b/sys/dev/sfxge/common/efx_tx.c index cff77426fe06..168fde4cfbc0 100644 --- a/sys/dev/sfxge/common/efx_tx.c +++ b/sys/dev/sfxge/common/efx_tx.c @@ -142,6 +142,7 @@ static efx_tx_ops_t __efx_tx_falcon_ops = { falconsiena_tx_qdesc_post, /* etxo_qdesc_post */ falconsiena_tx_qdesc_dma_create, /* etxo_qdesc_dma_create */ NULL, /* etxo_qdesc_tso_create */ + NULL, /* etxo_qdesc_tso2_create */ NULL, /* etxo_qdesc_vlantci_create */ #if EFSYS_OPT_QSTATS falconsiena_tx_qstats_update, /* etxo_qstats_update */ @@ -167,6 +168,7 @@ static efx_tx_ops_t __efx_tx_siena_ops = { falconsiena_tx_qdesc_post, /* etxo_qdesc_post */ falconsiena_tx_qdesc_dma_create, /* etxo_qdesc_dma_create */ NULL, /* etxo_qdesc_tso_create */ + NULL, /* etxo_qdesc_tso2_create */ NULL, /* etxo_qdesc_vlantci_create */ #if EFSYS_OPT_QSTATS falconsiena_tx_qstats_update, /* etxo_qstats_update */ @@ -192,6 +194,7 @@ static efx_tx_ops_t __efx_tx_hunt_ops = { ef10_tx_qdesc_post, /* etxo_qdesc_post */ ef10_tx_qdesc_dma_create, /* etxo_qdesc_dma_create */ hunt_tx_qdesc_tso_create, /* etxo_qdesc_tso_create */ + ef10_tx_qdesc_tso2_create, /* etxo_qdesc_tso2_create */ ef10_tx_qdesc_vlantci_create, /* etxo_qdesc_vlantci_create */ #if EFSYS_OPT_QSTATS ef10_tx_qstats_update, /* etxo_qstats_update */ @@ -217,6 +220,7 @@ static efx_tx_ops_t __efx_tx_medford_ops = { ef10_tx_qdesc_post, /* etxo_qdesc_post */ ef10_tx_qdesc_dma_create, /* etxo_qdesc_dma_create */ NULL, /* etxo_qdesc_tso_create */ + ef10_tx_qdesc_tso2_create, /* etxo_qdesc_tso2_create */ ef10_tx_qdesc_vlantci_create, /* etxo_qdesc_vlantci_create */ #if EFSYS_OPT_QSTATS ef10_tx_qstats_update, /* etxo_qstats_update */ @@ -640,6 +644,24 @@ efx_tx_qdesc_tso_create( etxop->etxo_qdesc_tso_create(etp, ipv4_id, tcp_seq, tcp_flags, edp); } + void +efx_tx_qdesc_tso2_create( + __in efx_txq_t *etp, + __in uint16_t ipv4_id, + __in uint32_t tcp_seq, + __in uint16_t mss, + __out_ecount(count) efx_desc_t *edp, + __in int count) +{ + efx_nic_t *enp = etp->et_enp; + efx_tx_ops_t *etxop = enp->en_etxop; + + EFSYS_ASSERT3U(etp->et_magic, ==, EFX_TXQ_MAGIC); + EFSYS_ASSERT(etxop->etxo_qdesc_tso2_create != NULL); + + etxop->etxo_qdesc_tso2_create(etp, ipv4_id, tcp_seq, mss, edp, count); +} + void efx_tx_qdesc_vlantci_create( __in efx_txq_t *etp, diff --git a/sys/dev/sfxge/common/hunt_impl.h b/sys/dev/sfxge/common/hunt_impl.h index 6f0f31b60a63..874c526ff6cf 100644 --- a/sys/dev/sfxge/common/hunt_impl.h +++ b/sys/dev/sfxge/common/hunt_impl.h @@ -700,6 +700,15 @@ hunt_tx_qdesc_tso_create( __in uint8_t tcp_flags, __out efx_desc_t *edp); +extern void +ef10_tx_qdesc_tso2_create( + __in efx_txq_t *etp, + __in uint16_t ipv4_id, + __in uint32_t tcp_seq, + __in uint16_t tcp_mss, + __out_ecount(count) efx_desc_t *edp, + __in int count); + extern void ef10_tx_qdesc_vlantci_create( __in efx_txq_t *etp, diff --git a/sys/dev/sfxge/common/hunt_nic.c b/sys/dev/sfxge/common/hunt_nic.c index 21411afbb211..a1cd990e6b87 100644 --- a/sys/dev/sfxge/common/hunt_nic.c +++ b/sys/dev/sfxge/common/hunt_nic.c @@ -920,6 +920,13 @@ ef10_get_datapath_caps( else encp->enc_fw_assisted_tso_enabled = B_FALSE; + /* Check if the firmware supports FATSOv2 */ + if (MCDI_CMD_DWORD_FIELD(&datapath_capabilities_v2, + GET_CAPABILITIES_V2_OUT_TX_TSO_V2) == 1) + encp->enc_fw_assisted_tso_v2_enabled = B_TRUE; + else + encp->enc_fw_assisted_tso_v2_enabled = B_FALSE; + /* Check if the firmware has vadapter/vport/vswitch support */ if (MCDI_CMD_DWORD_FIELD(&datapath_capabilities, GET_CAPABILITIES_OUT_EVB) == 1) diff --git a/sys/dev/sfxge/common/hunt_tx.c b/sys/dev/sfxge/common/hunt_tx.c index 593db8835058..baa74443544a 100755 --- a/sys/dev/sfxge/common/hunt_tx.c +++ b/sys/dev/sfxge/common/hunt_tx.c @@ -87,12 +87,13 @@ efx_mcdi_init_txq( MCDI_IN_SET_DWORD(req, INIT_TXQ_IN_LABEL, label); MCDI_IN_SET_DWORD(req, INIT_TXQ_IN_INSTANCE, instance); - MCDI_IN_POPULATE_DWORD_6(req, INIT_TXQ_IN_FLAGS, + MCDI_IN_POPULATE_DWORD_7(req, INIT_TXQ_IN_FLAGS, INIT_TXQ_IN_FLAG_BUFF_MODE, 0, INIT_TXQ_IN_FLAG_IP_CSUM_DIS, (flags & EFX_TXQ_CKSUM_IPV4) ? 0 : 1, INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, (flags & EFX_TXQ_CKSUM_TCPUDP) ? 0 : 1, + INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, (flags & EFX_TXQ_FATSOV2) ? 1 : 0, INIT_TXQ_IN_FLAG_TCP_UDP_ONLY, 0, INIT_TXQ_IN_CRC_MODE, 0, INIT_TXQ_IN_FLAG_TIMESTAMP, 0); @@ -588,6 +589,38 @@ hunt_tx_qdesc_tso_create( ESF_DZ_TX_TSO_TCP_SEQNO, tcp_seq); } + void +ef10_tx_qdesc_tso2_create( + __in efx_txq_t *etp, + __in uint16_t ipv4_id, + __in uint32_t tcp_seq, + __in uint16_t tcp_mss, + __out_ecount(count) efx_desc_t *edp, + __in int count) +{ + EFSYS_PROBE4(tx_desc_tso2_create, unsigned int, etp->et_index, + uint16_t, ipv4_id, uint32_t, tcp_seq, + uint16_t, tcp_mss); + + EFSYS_ASSERT(count >= EFX_TX_FATSOV2_OPT_NDESCS); + + EFX_POPULATE_QWORD_5(edp[0].ed_eq, + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, + ESE_DZ_TX_OPTION_DESC_TSO, + ESF_DZ_TX_TSO_OPTION_TYPE, + ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A, + ESF_DZ_TX_TSO_IP_ID, ipv4_id, + ESF_DZ_TX_TSO_TCP_SEQNO, tcp_seq); + EFX_POPULATE_QWORD_4(edp[1].ed_eq, + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, + ESE_DZ_TX_OPTION_DESC_TSO, + ESF_DZ_TX_TSO_OPTION_TYPE, + ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B, + ESF_DZ_TX_TSO_TCP_MSS, tcp_mss); +} + void ef10_tx_qdesc_vlantci_create( __in efx_txq_t *etp, diff --git a/sys/dev/sfxge/common/siena_nic.c b/sys/dev/sfxge/common/siena_nic.c index 70e7b5d1ccad..59e12833160c 100644 --- a/sys/dev/sfxge/common/siena_nic.c +++ b/sys/dev/sfxge/common/siena_nic.c @@ -169,6 +169,7 @@ siena_board_cfg( encp->enc_hw_tx_insert_vlan_enabled = B_FALSE; encp->enc_fw_assisted_tso_enabled = B_FALSE; + encp->enc_fw_assisted_tso_v2_enabled = B_FALSE; encp->enc_allow_set_mac_with_installed_filters = B_TRUE; return (0); From ac23c34a9f2e82a2857dda1a61f646727d1c494d Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 09:07:40 +0000 Subject: [PATCH 30/88] sfxge: use correct register definitions for setting interrupt moderation on Medford The only value which has changed is the number of rows (ER_DZ_EVQ_TMR_REG_ROWS is 2048 vs 1024 for FR_BZ_TIMER_COMMAND_REGP0_ROWS) but that isn't used, so this shouldn't change behaviour. Submitted by: Mark Spender Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4913 --- sys/dev/sfxge/common/hunt_ev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/dev/sfxge/common/hunt_ev.c b/sys/dev/sfxge/common/hunt_ev.c index 2cea2e05bfe0..6c607cc60b14 100644 --- a/sys/dev/sfxge/common/hunt_ev.c +++ b/sys/dev/sfxge/common/hunt_ev.c @@ -444,9 +444,9 @@ ef10_ev_qmoderate( eep->ee_index, &dword, 0); } else { EFX_POPULATE_DWORD_2(dword, - FRF_CZ_TC_TIMER_MODE, mode, - FRF_CZ_TC_TIMER_VAL, timer_val); - EFX_BAR_TBL_WRITED(enp, FR_BZ_TIMER_COMMAND_REGP0, + ERF_DZ_TC_TIMER_MODE, mode, + ERF_DZ_TC_TIMER_VAL, timer_val); + EFX_BAR_TBL_WRITED(enp, ER_DZ_EVQ_TMR_REG, eep->ee_index, &dword, 0); } From 72cda83214f188a51643837d589ee8249329f586 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 09:11:20 +0000 Subject: [PATCH 31/88] sfxge: fix common code VPD iterator and duplicate tag verification Fix efx_vpd_hunk_next() which has -- since its inception -- failed to correctly iterate over the tags and keywords contained in the VPD data. Only the first tag or keyword would be returned and the next call with *contp == 1 would walk to the end of the data and finish. This was spotted when fixing up errors spotted by Prefast code analysis (which neglected to set all of the out parameters in all successful cases) Also fix efx_vpd_verify() on Siena and EF10 which (as a side effect of correctly iterating over all the tags and keywords) was failing as it detected that both the static VPD and dynamic VPD storage contained an RV keyword in the VPD-R tag. This is intentional as the static VPD and dynamic VPD are stored separately (firmware merges their contents and computes a new RV keyword checksum for the data readable from the VPD capability in PCIe configuration space). Submitted by: Andrew Lee Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4915 --- sys/dev/sfxge/common/efx_vpd.c | 19 +++++++++++-------- sys/dev/sfxge/common/hunt_vpd.c | 7 +++++++ sys/dev/sfxge/common/siena_vpd.c | 7 +++++++ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/sys/dev/sfxge/common/efx_vpd.c b/sys/dev/sfxge/common/efx_vpd.c index c1762e62193f..ebeddfea90f5 100644 --- a/sys/dev/sfxge/common/efx_vpd.c +++ b/sys/dev/sfxge/common/efx_vpd.c @@ -669,7 +669,7 @@ efx_vpd_hunk_next( __in size_t size, __out efx_vpd_tag_t *tagp, __out efx_vpd_keyword_t *keywordp, - __out_bcount_opt(*paylenp) unsigned int *payloadp, + __out_opt unsigned int *payloadp, __out_opt uint8_t *paylenp, __inout unsigned int *contp) { @@ -689,12 +689,18 @@ efx_vpd_hunk_next( if ((rc = efx_vpd_next_tag(data, size, &offset, &tag, &taglen)) != 0) goto fail1; - if (tag == EFX_VPD_END) + + if (tag == EFX_VPD_END) { + keyword = 0; + paylen = 0; + index = 0; break; + } if (tag == EFX_VPD_ID) { - if (index == *contp) { + if (index++ == *contp) { EFSYS_ASSERT3U(taglen, <, 0x100); + keyword = 0; paylen = (uint8_t)MIN(taglen, 0xff); goto done; @@ -705,7 +711,7 @@ efx_vpd_hunk_next( taglen, pos, &keyword, &keylen)) != 0) goto fail2; - if (index == *contp) { + if (index++ == *contp) { offset += pos + 3; paylen = keylen; @@ -717,9 +723,6 @@ efx_vpd_hunk_next( offset += taglen; } - *contp = 0; - return (0); - done: *tagp = tag; *keywordp = keyword; @@ -728,7 +731,7 @@ efx_vpd_hunk_next( if (paylenp != NULL) *paylenp = paylen; - ++(*contp); + *contp = index; return (0); fail2: diff --git a/sys/dev/sfxge/common/hunt_vpd.c b/sys/dev/sfxge/common/hunt_vpd.c index 58e9a66ac712..6ada785e456d 100644 --- a/sys/dev/sfxge/common/hunt_vpd.c +++ b/sys/dev/sfxge/common/hunt_vpd.c @@ -210,6 +210,13 @@ ef10_vpd_verify( if (dcont == 0) break; + /* + * Skip the RV keyword. It should be present in both the static + * and dynamic cfg sectors. + */ + if (dtag == EFX_VPD_RO && dkey == EFX_VPD_KEYWORD('R', 'V')) + continue; + scont = 0; _NOTE(CONSTANTCONDITION) while (1) { diff --git a/sys/dev/sfxge/common/siena_vpd.c b/sys/dev/sfxge/common/siena_vpd.c index 9a07f9130e00..7a7ce675ef56 100644 --- a/sys/dev/sfxge/common/siena_vpd.c +++ b/sys/dev/sfxge/common/siena_vpd.c @@ -326,6 +326,13 @@ siena_vpd_verify( if (dcont == 0) break; + /* + * Skip the RV keyword. It should be present in both the static + * and dynamic cfg sectors. + */ + if (dtag == EFX_VPD_RO && dkey == EFX_VPD_KEYWORD('R', 'V')) + continue; + scont = 0; _NOTE(CONSTANTCONDITION) while (1) { From 410298b13812c3610bfcbcf8f63e9ff15c3e2529 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 09:12:40 +0000 Subject: [PATCH 32/88] sfxge: cleanup: adjust efx_mcdi_get_port_modes() comment for clarity Fix an explanatory comment which did not explain very well. Submitted by: Richard Houldsworth Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4916 --- sys/dev/sfxge/common/hunt_nic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sys/dev/sfxge/common/hunt_nic.c b/sys/dev/sfxge/common/hunt_nic.c index a1cd990e6b87..823393e7859a 100644 --- a/sys/dev/sfxge/common/hunt_nic.c +++ b/sys/dev/sfxge/common/hunt_nic.c @@ -112,7 +112,10 @@ efx_mcdi_get_port_modes( goto fail1; } - /* Accept pre-Medford size (8 bytes - no CurrentMode field) */ + /* + * Require only Modes and DefaultMode fields. + * (CurrentMode field was added for Medford) + */ if (req.emr_out_length_used < MC_CMD_GET_PORT_MODES_OUT_CURRENT_MODE_OFST) { rc = EMSGSIZE; From dc5427fc70618fbe5f10b2dd8a8bdbcb2ba0ce5b Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 09:14:40 +0000 Subject: [PATCH 33/88] sfxge: add table entries for License NVRAM partition Submitted by: Richard Houldsworth Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4917 --- sys/dev/sfxge/common/efx.h | 1 + sys/dev/sfxge/common/hunt_nvram.c | 12 ++++++++++-- sys/dev/sfxge/common/siena_nvram.c | 2 ++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/sys/dev/sfxge/common/efx.h b/sys/dev/sfxge/common/efx.h index 332ce962aaf1..cf629625089f 100644 --- a/sys/dev/sfxge/common/efx.h +++ b/sys/dev/sfxge/common/efx.h @@ -1317,6 +1317,7 @@ typedef enum efx_nvram_type_e { EFX_NVRAM_CPLD, EFX_NVRAM_FPGA_BACKUP, EFX_NVRAM_DYNAMIC_CFG, + EFX_NVRAM_LICENSE, EFX_NVRAM_NTYPES, } efx_nvram_type_t; diff --git a/sys/dev/sfxge/common/hunt_nvram.c b/sys/dev/sfxge/common/hunt_nvram.c index e1fa36a32951..5498ab0e5f55 100644 --- a/sys/dev/sfxge/common/hunt_nvram.c +++ b/sys/dev/sfxge/common/hunt_nvram.c @@ -1541,7 +1541,11 @@ static ef10_parttbl_entry_t hunt_parttbl[] = { {NVRAM_PARTITION_TYPE_FPGA_BACKUP, 1, EFX_NVRAM_FPGA_BACKUP}, {NVRAM_PARTITION_TYPE_FPGA_BACKUP, 2, EFX_NVRAM_FPGA_BACKUP}, {NVRAM_PARTITION_TYPE_FPGA_BACKUP, 3, EFX_NVRAM_FPGA_BACKUP}, - {NVRAM_PARTITION_TYPE_FPGA_BACKUP, 4, EFX_NVRAM_FPGA_BACKUP} + {NVRAM_PARTITION_TYPE_FPGA_BACKUP, 4, EFX_NVRAM_FPGA_BACKUP}, + {NVRAM_PARTITION_TYPE_LICENSE, 1, EFX_NVRAM_LICENSE}, + {NVRAM_PARTITION_TYPE_LICENSE, 2, EFX_NVRAM_LICENSE}, + {NVRAM_PARTITION_TYPE_LICENSE, 3, EFX_NVRAM_LICENSE}, + {NVRAM_PARTITION_TYPE_LICENSE, 4, EFX_NVRAM_LICENSE} }; static ef10_parttbl_entry_t medford_parttbl[] = { @@ -1572,7 +1576,11 @@ static ef10_parttbl_entry_t medford_parttbl[] = { {NVRAM_PARTITION_TYPE_FPGA_BACKUP, 1, EFX_NVRAM_FPGA_BACKUP}, {NVRAM_PARTITION_TYPE_FPGA_BACKUP, 2, EFX_NVRAM_FPGA_BACKUP}, {NVRAM_PARTITION_TYPE_FPGA_BACKUP, 3, EFX_NVRAM_FPGA_BACKUP}, - {NVRAM_PARTITION_TYPE_FPGA_BACKUP, 4, EFX_NVRAM_FPGA_BACKUP} + {NVRAM_PARTITION_TYPE_FPGA_BACKUP, 4, EFX_NVRAM_FPGA_BACKUP}, + {NVRAM_PARTITION_TYPE_LICENSE, 1, EFX_NVRAM_LICENSE}, + {NVRAM_PARTITION_TYPE_LICENSE, 2, EFX_NVRAM_LICENSE}, + {NVRAM_PARTITION_TYPE_LICENSE, 3, EFX_NVRAM_LICENSE}, + {NVRAM_PARTITION_TYPE_LICENSE, 4, EFX_NVRAM_LICENSE} }; static __checkReturn efx_rc_t diff --git a/sys/dev/sfxge/common/siena_nvram.c b/sys/dev/sfxge/common/siena_nvram.c index aae2f08f35ce..f19314261ad9 100644 --- a/sys/dev/sfxge/common/siena_nvram.c +++ b/sys/dev/sfxge/common/siena_nvram.c @@ -227,6 +227,8 @@ static siena_parttbl_entry_t siena_parttbl[] = { {MC_CMD_NVRAM_TYPE_FC_FW, 2, EFX_NVRAM_FCFW}, {MC_CMD_NVRAM_TYPE_CPLD, 1, EFX_NVRAM_CPLD}, {MC_CMD_NVRAM_TYPE_CPLD, 2, EFX_NVRAM_CPLD}, + {MC_CMD_NVRAM_TYPE_LICENSE, 1, EFX_NVRAM_LICENSE}, + {MC_CMD_NVRAM_TYPE_LICENSE, 2, EFX_NVRAM_LICENSE} }; __checkReturn efx_rc_t From 0c848230f72ce4108cfc3fc6904fb55b8d66b614 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 09:19:28 +0000 Subject: [PATCH 34/88] sfxge: add accessors for license-related MCDI calls to common code Add support for Huntington MCDI licensing interface to common code. Ported from Linux net driver IOCTL functions with restructuring for initial support for V3 licensing API. Submitted by: Richard Houldsworth Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4918 --- sys/conf/files.amd64 | 1 + sys/dev/sfxge/common/efsys.h | 2 + sys/dev/sfxge/common/efx.h | 51 ++ sys/dev/sfxge/common/efx_check.h | 11 + sys/dev/sfxge/common/efx_impl.h | 16 + sys/dev/sfxge/common/efx_lic.c | 792 +++++++++++++++++++++++++++++++ sys/dev/sfxge/common/efx_nic.c | 7 +- sys/modules/sfxge/Makefile | 2 +- 8 files changed, 878 insertions(+), 4 deletions(-) create mode 100644 sys/dev/sfxge/common/efx_lic.c diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 11e9e12c2754..5944a383d692 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -317,6 +317,7 @@ dev/sfxge/common/efx_ev.c optional sfxge pci dev/sfxge/common/efx_filter.c optional sfxge pci dev/sfxge/common/efx_hash.c optional sfxge pci dev/sfxge/common/efx_intr.c optional sfxge pci +dev/sfxge/common/efx_lic.c optional sfxge pci dev/sfxge/common/efx_mac.c optional sfxge pci dev/sfxge/common/efx_mcdi.c optional sfxge pci dev/sfxge/common/efx_mon.c optional sfxge pci diff --git a/sys/dev/sfxge/common/efsys.h b/sys/dev/sfxge/common/efsys.h index 4d72e28903e3..c6729cab875d 100644 --- a/sys/dev/sfxge/common/efsys.h +++ b/sys/dev/sfxge/common/efsys.h @@ -293,6 +293,8 @@ sfxge_map_mbuf_fast(bus_dma_tag_t tag, bus_dmamap_t map, #define EFSYS_OPT_DECODE_INTR_FATAL 1 +#define EFSYS_OPT_LICENSING 0 + /* ID */ typedef struct __efsys_identifier_s efsys_identifier_t; diff --git a/sys/dev/sfxge/common/efx.h b/sys/dev/sfxge/common/efx.h index cf629625089f..9f6dab9266d8 100644 --- a/sys/dev/sfxge/common/efx.h +++ b/sys/dev/sfxge/common/efx.h @@ -2310,6 +2310,57 @@ efx_hash_bytes( __in size_t length, __in uint32_t init); +#if EFSYS_OPT_LICENSING + +/* LICENSING */ + +typedef struct efx_key_stats_s { + uint32_t eks_valid; + uint32_t eks_invalid; + uint32_t eks_blacklisted; + uint32_t eks_unverifiable; + uint32_t eks_wrong_node; + uint32_t eks_licensed_apps_lo; + uint32_t eks_licensed_apps_hi; + uint32_t eks_licensed_features_lo; + uint32_t eks_licensed_features_hi; +} efx_key_stats_t; + +extern __checkReturn efx_rc_t +efx_lic_init( + __in efx_nic_t *enp); + +extern void +efx_lic_fini( + __in efx_nic_t *enp); + +extern __checkReturn efx_rc_t +efx_lic_update_licenses( + __in efx_nic_t *enp); + +extern __checkReturn efx_rc_t +efx_lic_get_key_stats( + __in efx_nic_t *enp, + __out efx_key_stats_t *ksp); + +extern __checkReturn efx_rc_t +efx_lic_app_state( + __in efx_nic_t *enp, + __in uint64_t app_id, + __out boolean_t *licensedp); + +extern __checkReturn efx_rc_t +efx_lic_get_id( + __in efx_nic_t *enp, + __in size_t buffer_size, + __out uint32_t *typep, + __out size_t *lengthp, + __out_opt uint8_t *bufferp); + + +#endif /* EFSYS_OPT_LICENSING */ + + #ifdef __cplusplus } diff --git a/sys/dev/sfxge/common/efx_check.h b/sys/dev/sfxge/common/efx_check.h index 199fab013f38..d6b7cf42eef8 100644 --- a/sys/dev/sfxge/common/efx_check.h +++ b/sys/dev/sfxge/common/efx_check.h @@ -401,4 +401,15 @@ # endif #endif /* EFSYS_OPT_BIST */ +/* Support MCDI licensing API */ +#if EFSYS_OPT_LICENSING +# if !EFSYS_OPT_MCDI +# error "LICENSING requires MCDI" +# endif +# if !EFSYS_HAS_UINT64 +# error "LICENSING requires UINT64" +# endif +#endif /* EFSYS_OPT_LICENSING */ + + #endif /* _SYS_EFX_CHECK_H */ diff --git a/sys/dev/sfxge/common/efx_impl.h b/sys/dev/sfxge/common/efx_impl.h index 46f0f7efe7d8..02a3e8eb6ebc 100644 --- a/sys/dev/sfxge/common/efx_impl.h +++ b/sys/dev/sfxge/common/efx_impl.h @@ -84,6 +84,7 @@ extern "C" { #define EFX_MOD_WOL 0x00000800 #define EFX_MOD_FILTER 0x00001000 #define EFX_MOD_PKTFILTER 0x00002000 +#define EFX_MOD_LIC 0x00004000 #define EFX_RESET_MAC 0x00000001 #define EFX_RESET_PHY 0x00000002 @@ -591,6 +592,18 @@ efx_mcdi_nvram_test( #endif /* EFSYS_OPT_VPD || EFSYS_OPT_NVRAM */ +#if EFSYS_OPT_LICENSING + +typedef struct efx_lic_ops_s { + efx_rc_t (*elo_update_licenses)(efx_nic_t *); + efx_rc_t (*elo_get_key_stats)(efx_nic_t *, efx_key_stats_t *); + efx_rc_t (*elo_app_state)(efx_nic_t *, uint64_t, boolean_t *); + efx_rc_t (*elo_get_id)(efx_nic_t *, size_t, uint32_t *, + size_t *, uint8_t *); +} efx_lic_ops_t; + +#endif + typedef struct efx_drv_cfg_s { uint32_t edc_min_vi_count; uint32_t edc_max_vi_count; @@ -640,6 +653,9 @@ struct efx_nic_s { uint32_t en_rss_context; #endif /* EFSYS_OPT_RX_SCALE */ uint32_t en_vport_id; +#if EFSYS_OPT_LICENSING + efx_lic_ops_t *en_elop; +#endif union { #if EFSYS_OPT_FALCON struct { diff --git a/sys/dev/sfxge/common/efx_lic.c b/sys/dev/sfxge/common/efx_lic.c new file mode 100644 index 000000000000..33c8aba6abfc --- /dev/null +++ b/sys/dev/sfxge/common/efx_lic.c @@ -0,0 +1,792 @@ +/*- + * Copyright (c) 2009-2015 Solarflare Communications Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are + * those of the authors and should not be interpreted as representing official + * policies, either expressed or implied, of the FreeBSD Project. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "efx.h" +#include "efx_impl.h" + +#if EFSYS_OPT_LICENSING + +#if EFSYS_OPT_SIENA + +static __checkReturn efx_rc_t +efx_mcdi_fc_license_update_license( + __in efx_nic_t *enp); + +static __checkReturn efx_rc_t +efx_mcdi_fc_license_get_key_stats( + __in efx_nic_t *enp, + __out efx_key_stats_t *eksp); + +static efx_lic_ops_t __efx_lic_v1_ops = { + efx_mcdi_fc_license_update_license, /* elo_update_licenses */ + efx_mcdi_fc_license_get_key_stats, /* elo_get_key_stats */ + NULL, /* elo_app_state */ + NULL, /* elo_get_id */ +}; + +#endif /* EFSYS_OPT_SIENA */ + +#if EFSYS_OPT_HUNTINGTON + +static __checkReturn efx_rc_t +efx_mcdi_licensing_update_licenses( + __in efx_nic_t *enp); + +static __checkReturn efx_rc_t +efx_mcdi_licensing_get_key_stats( + __in efx_nic_t *enp, + __out efx_key_stats_t *eksp); + +static __checkReturn efx_rc_t +efx_mcdi_licensed_app_state( + __in efx_nic_t *enp, + __in uint64_t app_id, + __out boolean_t *licensedp); + +static efx_lic_ops_t __efx_lic_v2_ops = { + efx_mcdi_licensing_update_licenses, /* elo_update_licenses */ + efx_mcdi_licensing_get_key_stats, /* elo_get_key_stats */ + efx_mcdi_licensed_app_state, /* elo_app_state */ + NULL, /* elo_get_id */ +}; + +#endif /* EFSYS_OPT_HUNTINGTON */ + +#if EFSYS_OPT_MEDFORD + +static __checkReturn efx_rc_t +efx_mcdi_licensing_v3_update_licenses( + __in efx_nic_t *enp); + +static __checkReturn efx_rc_t +efx_mcdi_licensing_v3_report_license( + __in efx_nic_t *enp, + __out efx_key_stats_t *eksp); + +static __checkReturn efx_rc_t +efx_mcdi_licensing_v3_app_state( + __in efx_nic_t *enp, + __in uint64_t app_id, + __out boolean_t *licensedp); + +static __checkReturn efx_rc_t +efx_mcdi_licensing_v3_get_id( + __in efx_nic_t *enp, + __in size_t buffer_size, + __out uint32_t *typep, + __out size_t *lengthp, + __out_bcount_part_opt(buffer_size, *lengthp) + uint8_t *bufferp); + +static efx_lic_ops_t __efx_lic_v3_ops = { + efx_mcdi_licensing_v3_update_licenses, /* elo_update_licenses */ + efx_mcdi_licensing_v3_report_license, /* elo_get_key_stats */ + efx_mcdi_licensing_v3_app_state, /* elo_app_state */ + efx_mcdi_licensing_v3_get_id, /* elo_get_id */ +}; + +#endif /* EFSYS_OPT_MEDFORD */ + + +/* V1 Licensing - used in Siena Modena only */ + +#if EFSYS_OPT_SIENA + +static __checkReturn efx_rc_t +efx_mcdi_fc_license_update_license( + __in efx_nic_t *enp) +{ + efx_mcdi_req_t req; + uint8_t payload[MC_CMD_FC_IN_LICENSE_LEN]; + efx_rc_t rc; + + EFSYS_ASSERT(enp->en_family == EFX_FAMILY_SIENA); + + (void) memset(payload, 0, sizeof (payload)); + req.emr_cmd = MC_CMD_FC_OP_LICENSE; + req.emr_in_buf = payload; + req.emr_in_length = MC_CMD_FC_IN_LICENSE_LEN; + req.emr_out_buf = payload; + req.emr_out_length = 0; + + MCDI_IN_SET_DWORD(req, FC_IN_LICENSE_OP, + MC_CMD_FC_IN_LICENSE_UPDATE_LICENSE); + + efx_mcdi_execute(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + + if (req.emr_out_length_used != 0) { + rc = EIO; + goto fail2; + } + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_mcdi_fc_license_get_key_stats( + __in efx_nic_t *enp, + __out efx_key_stats_t *eksp) +{ + efx_mcdi_req_t req; + uint8_t payload[MAX(MC_CMD_FC_IN_LICENSE_LEN, + MC_CMD_FC_OUT_LICENSE_LEN)]; + efx_rc_t rc; + + EFSYS_ASSERT(enp->en_family == EFX_FAMILY_SIENA); + + (void) memset(payload, 0, sizeof (payload)); + req.emr_cmd = MC_CMD_FC_OP_LICENSE; + req.emr_in_buf = payload; + req.emr_in_length = MC_CMD_FC_IN_LICENSE_LEN; + req.emr_out_buf = payload; + req.emr_out_length = MC_CMD_FC_OUT_LICENSE_LEN; + + MCDI_IN_SET_DWORD(req, FC_IN_LICENSE_OP, + MC_CMD_FC_IN_LICENSE_GET_KEY_STATS); + + efx_mcdi_execute(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + + if (req.emr_out_length_used < MC_CMD_FC_OUT_LICENSE_LEN) { + rc = EMSGSIZE; + goto fail2; + } + + eksp->eks_valid = + MCDI_OUT_DWORD(req, FC_OUT_LICENSE_VALID_KEYS); + eksp->eks_invalid = + MCDI_OUT_DWORD(req, FC_OUT_LICENSE_INVALID_KEYS); + eksp->eks_blacklisted = + MCDI_OUT_DWORD(req, FC_OUT_LICENSE_BLACKLISTED_KEYS); + eksp->eks_unverifiable = 0; + eksp->eks_wrong_node = 0; + eksp->eks_licensed_apps_lo = 0; + eksp->eks_licensed_apps_hi = 0; + eksp->eks_licensed_features_lo = 0; + eksp->eks_licensed_features_hi = 0; + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +#endif /* EFSYS_OPT_SIENA */ + +/* V2 Licensing - used by Huntington family only. See SF-113611-TC */ + +#if EFSYS_OPT_HUNTINGTON + +static __checkReturn efx_rc_t +efx_mcdi_licensed_app_state( + __in efx_nic_t *enp, + __in uint64_t app_id, + __out boolean_t *licensedp) +{ + efx_mcdi_req_t req; + uint8_t payload[MAX(MC_CMD_GET_LICENSED_APP_STATE_IN_LEN, + MC_CMD_GET_LICENSED_APP_STATE_OUT_LEN)]; + uint32_t app_state; + efx_rc_t rc; + + EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON); + + /* V2 licensing supports 32bit app id only */ + if ((app_id >> 32) != 0) { + rc = EINVAL; + goto fail1; + } + + (void) memset(payload, 0, sizeof (payload)); + req.emr_cmd = MC_CMD_GET_LICENSED_APP_STATE; + req.emr_in_buf = payload; + req.emr_in_length = MC_CMD_GET_LICENSED_APP_STATE_IN_LEN; + req.emr_out_buf = payload; + req.emr_out_length = MC_CMD_GET_LICENSED_APP_STATE_OUT_LEN; + + MCDI_IN_SET_DWORD(req, GET_LICENSED_APP_STATE_IN_APP_ID, + app_id & 0xffffffff); + + efx_mcdi_execute(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail2; + } + + if (req.emr_out_length_used < MC_CMD_GET_LICENSED_APP_STATE_OUT_LEN) { + rc = EMSGSIZE; + goto fail3; + } + + app_state = (MCDI_OUT_DWORD(req, GET_LICENSED_APP_STATE_OUT_STATE)); + if (app_state != MC_CMD_GET_LICENSED_APP_STATE_OUT_NOT_LICENSED) { + *licensedp = B_TRUE; + } else { + *licensedp = B_FALSE; + } + + return (0); + +fail3: + EFSYS_PROBE(fail3); +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_mcdi_licensing_update_licenses( + __in efx_nic_t *enp) +{ + efx_mcdi_req_t req; + uint8_t payload[MC_CMD_LICENSING_IN_LEN]; + efx_rc_t rc; + + EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON); + + (void) memset(payload, 0, sizeof (payload)); + req.emr_cmd = MC_CMD_LICENSING; + req.emr_in_buf = payload; + req.emr_in_length = MC_CMD_LICENSING_IN_LEN; + req.emr_out_buf = payload; + req.emr_out_length = 0; + + MCDI_IN_SET_DWORD(req, LICENSING_IN_OP, + MC_CMD_LICENSING_IN_OP_UPDATE_LICENSE); + + efx_mcdi_execute(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + + if (req.emr_out_length_used != 0) { + rc = EIO; + goto fail2; + } + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_mcdi_licensing_get_key_stats( + __in efx_nic_t *enp, + __out efx_key_stats_t *eksp) +{ + efx_mcdi_req_t req; + uint8_t payload[MAX(MC_CMD_LICENSING_IN_LEN, + MC_CMD_LICENSING_OUT_LEN)]; + efx_rc_t rc; + + EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON); + + (void) memset(payload, 0, sizeof (payload)); + req.emr_cmd = MC_CMD_LICENSING; + req.emr_in_buf = payload; + req.emr_in_length = MC_CMD_LICENSING_IN_LEN; + req.emr_out_buf = payload; + req.emr_out_length = MC_CMD_LICENSING_OUT_LEN; + + MCDI_IN_SET_DWORD(req, LICENSING_IN_OP, + MC_CMD_LICENSING_IN_OP_GET_KEY_STATS); + + efx_mcdi_execute(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + + if (req.emr_out_length_used < MC_CMD_LICENSING_OUT_LEN) { + rc = EMSGSIZE; + goto fail2; + } + + eksp->eks_valid = + MCDI_OUT_DWORD(req, LICENSING_OUT_VALID_APP_KEYS); + eksp->eks_invalid = + MCDI_OUT_DWORD(req, LICENSING_OUT_INVALID_APP_KEYS); + eksp->eks_blacklisted = + MCDI_OUT_DWORD(req, LICENSING_OUT_BLACKLISTED_APP_KEYS); + eksp->eks_unverifiable = + MCDI_OUT_DWORD(req, LICENSING_OUT_UNVERIFIABLE_APP_KEYS); + eksp->eks_wrong_node = + MCDI_OUT_DWORD(req, LICENSING_OUT_WRONG_NODE_APP_KEYS); + eksp->eks_licensed_apps_lo = 0; + eksp->eks_licensed_apps_hi = 0; + eksp->eks_licensed_features_lo = 0; + eksp->eks_licensed_features_hi = 0; + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +#endif /* EFSYS_OPT_HUNTINGTON */ + +/* V3 Licensing - used starting from Medford family. See SF-114884-SW */ + +#if EFSYS_OPT_MEDFORD + +static __checkReturn efx_rc_t +efx_mcdi_licensing_v3_update_licenses( + __in efx_nic_t *enp) +{ + efx_mcdi_req_t req; + uint8_t payload[MC_CMD_LICENSING_V3_IN_LEN]; + efx_rc_t rc; + + EFSYS_ASSERT(enp->en_family == EFX_FAMILY_MEDFORD); + + (void) memset(payload, 0, sizeof (payload)); + req.emr_cmd = MC_CMD_LICENSING_V3; + req.emr_in_buf = payload; + req.emr_in_length = MC_CMD_LICENSING_V3_IN_LEN; + req.emr_out_buf = NULL; + req.emr_out_length = 0; + + MCDI_IN_SET_DWORD(req, LICENSING_V3_IN_OP, + MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE); + + efx_mcdi_execute(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + + return (0); + +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_mcdi_licensing_v3_report_license( + __in efx_nic_t *enp, + __out efx_key_stats_t *eksp) +{ + efx_mcdi_req_t req; + uint8_t payload[MAX(MC_CMD_LICENSING_V3_IN_LEN, + MC_CMD_LICENSING_V3_OUT_LEN)]; + efx_rc_t rc; + + EFSYS_ASSERT(enp->en_family == EFX_FAMILY_MEDFORD); + + (void) memset(payload, 0, sizeof (payload)); + req.emr_cmd = MC_CMD_LICENSING_V3; + req.emr_in_buf = payload; + req.emr_in_length = MC_CMD_LICENSING_V3_IN_LEN; + req.emr_out_buf = payload; + req.emr_out_length = MC_CMD_LICENSING_V3_OUT_LEN; + + MCDI_IN_SET_DWORD(req, LICENSING_V3_IN_OP, + MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE); + + efx_mcdi_execute(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + + if (req.emr_out_length_used < MC_CMD_LICENSING_V3_OUT_LEN) { + rc = EMSGSIZE; + goto fail2; + } + + eksp->eks_valid = + MCDI_OUT_DWORD(req, LICENSING_V3_OUT_VALID_KEYS); + eksp->eks_invalid = + MCDI_OUT_DWORD(req, LICENSING_V3_OUT_INVALID_KEYS); + eksp->eks_blacklisted = 0; + eksp->eks_unverifiable = + MCDI_OUT_DWORD(req, LICENSING_V3_OUT_UNVERIFIABLE_KEYS); + eksp->eks_wrong_node = + MCDI_OUT_DWORD(req, LICENSING_V3_OUT_WRONG_NODE_KEYS); + eksp->eks_licensed_apps_lo = + MCDI_OUT_DWORD(req, LICENSING_V3_OUT_LICENSED_APPS_LO); + eksp->eks_licensed_apps_hi = + MCDI_OUT_DWORD(req, LICENSING_V3_OUT_LICENSED_APPS_HI); + eksp->eks_licensed_features_lo = + MCDI_OUT_DWORD(req, LICENSING_V3_OUT_LICENSED_FEATURES_LO); + eksp->eks_licensed_features_hi = + MCDI_OUT_DWORD(req, LICENSING_V3_OUT_LICENSED_FEATURES_HI); + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_mcdi_licensing_v3_app_state( + __in efx_nic_t *enp, + __in uint64_t app_id, + __out boolean_t *licensedp) +{ + efx_mcdi_req_t req; + uint8_t payload[MAX(MC_CMD_GET_LICENSED_V3_APP_STATE_IN_LEN, + MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LEN)]; + uint32_t app_state; + efx_rc_t rc; + + EFSYS_ASSERT(enp->en_family == EFX_FAMILY_MEDFORD); + + (void) memset(payload, 0, sizeof (payload)); + req.emr_cmd = MC_CMD_GET_LICENSED_V3_APP_STATE; + req.emr_in_buf = payload; + req.emr_in_length = MC_CMD_GET_LICENSED_V3_APP_STATE_IN_LEN; + req.emr_out_buf = payload; + req.emr_out_length = MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LEN; + + MCDI_IN_SET_DWORD(req, GET_LICENSED_V3_APP_STATE_IN_APP_ID_LO, + app_id & 0xffffffff); + MCDI_IN_SET_DWORD(req, GET_LICENSED_V3_APP_STATE_IN_APP_ID_HI, + app_id >> 32); + + efx_mcdi_execute(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + + if (req.emr_out_length_used < MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LEN) { + rc = EMSGSIZE; + goto fail2; + } + + app_state = (MCDI_OUT_DWORD(req, GET_LICENSED_V3_APP_STATE_OUT_STATE)); + if (app_state != MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_NOT_LICENSED) { + *licensedp = B_TRUE; + } else { + *licensedp = B_FALSE; + } + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +static __checkReturn efx_rc_t +efx_mcdi_licensing_v3_get_id( + __in efx_nic_t *enp, + __in size_t buffer_size, + __out uint32_t *typep, + __out size_t *lengthp, + __out_bcount_part_opt(buffer_size, *lengthp) + uint8_t *bufferp) +{ + efx_mcdi_req_t req; + uint8_t payload[MAX(MC_CMD_LICENSING_GET_ID_V3_IN_LEN, + MC_CMD_LICENSING_GET_ID_V3_OUT_LENMIN)]; + efx_rc_t rc; + + req.emr_cmd = MC_CMD_LICENSING_GET_ID_V3; + + if (bufferp == NULL) { + /* Request id type and length only */ + req.emr_in_buf = bufferp; + req.emr_in_length = MC_CMD_LICENSING_GET_ID_V3_IN_LEN; + req.emr_out_buf = bufferp; + req.emr_out_length = MC_CMD_LICENSING_GET_ID_V3_OUT_LENMIN; + (void) memset(payload, 0, sizeof (payload)); + } else { + /* Request full buffer */ + req.emr_in_buf = bufferp; + req.emr_in_length = MC_CMD_LICENSING_GET_ID_V3_IN_LEN; + req.emr_out_buf = bufferp; + req.emr_out_length = MIN(buffer_size, MC_CMD_LICENSING_GET_ID_V3_OUT_LENMIN); + (void) memset(bufferp, 0, req.emr_out_length); + } + + efx_mcdi_execute(enp, &req); + + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + + if (req.emr_out_length_used < MC_CMD_LICENSING_GET_ID_V3_OUT_LENMIN) { + rc = EMSGSIZE; + goto fail2; + } + + *typep = MCDI_OUT_DWORD(req, LICENSING_GET_ID_V3_OUT_LICENSE_TYPE); + *lengthp = MCDI_OUT_DWORD(req, LICENSING_GET_ID_V3_OUT_LICENSE_ID_LENGTH); + + if (bufferp == NULL) { + /* modify length requirements to indicate to caller the extra buffering + ** needed to read the complete output. + */ + *lengthp += MC_CMD_LICENSING_GET_ID_V3_OUT_LENMIN; + } else { + /* Shift ID down to start of buffer */ + memmove(bufferp, + bufferp+MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_ID_OFST, + *lengthp); + memset(bufferp+(*lengthp), 0, MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_ID_OFST); + } + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + + +#endif /* EFSYS_OPT_MEDFORD */ + + __checkReturn efx_rc_t +efx_lic_init( + __in efx_nic_t *enp) +{ + efx_lic_ops_t *elop; + efx_rc_t rc; + + EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); + EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_PROBE); + EFSYS_ASSERT(!(enp->en_mod_flags & EFX_MOD_LIC)); + + switch (enp->en_family) { + +#if EFSYS_OPT_SIENA + case EFX_FAMILY_SIENA: + elop = (efx_lic_ops_t *)&__efx_lic_v1_ops; + break; +#endif /* EFSYS_OPT_SIENA */ + +#if EFSYS_OPT_HUNTINGTON + case EFX_FAMILY_HUNTINGTON: + elop = (efx_lic_ops_t *)&__efx_lic_v2_ops; + break; +#endif /* EFSYS_OPT_HUNTINGTON */ + +#if EFSYS_OPT_MEDFORD + case EFX_FAMILY_MEDFORD: + elop = (efx_lic_ops_t *)&__efx_lic_v3_ops; + break; +#endif /* EFSYS_OPT_MEDFORD */ + + default: + EFSYS_ASSERT(0); + rc = ENOTSUP; + goto fail1; + } + + enp->en_elop = elop; + enp->en_mod_flags |= EFX_MOD_LIC; + + return (0); + +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + + void +efx_lic_fini( + __in efx_nic_t *enp) +{ + efx_lic_ops_t *elop = enp->en_elop; + + EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); + EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_PROBE); + EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_LIC); + + enp->en_elop = NULL; + enp->en_mod_flags &= ~EFX_MOD_LIC; +} + + + __checkReturn efx_rc_t +efx_lic_update_licenses( + __in efx_nic_t *enp) +{ + efx_lic_ops_t *elop = enp->en_elop; + efx_rc_t rc; + + EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); + EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_LIC); + + if ((rc = elop->elo_update_licenses(enp)) != 0) + goto fail1; + + return (0); + +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + + __checkReturn efx_rc_t +efx_lic_get_key_stats( + __in efx_nic_t *enp, + __out efx_key_stats_t *eksp) +{ + efx_lic_ops_t *elop = enp->en_elop; + efx_rc_t rc; + + EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); + EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_LIC); + + if ((rc = elop->elo_get_key_stats(enp, eksp)) != 0) + goto fail1; + + return (0); + +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + + __checkReturn efx_rc_t +efx_lic_app_state( + __in efx_nic_t *enp, + __in uint64_t app_id, + __out boolean_t *licensedp) +{ + efx_lic_ops_t *elop = enp->en_elop; + efx_rc_t rc; + + EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); + EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_LIC); + + if (elop->elo_app_state == NULL) { + rc = ENOTSUP; + goto fail1; + } + if ((rc = elop->elo_app_state(enp, app_id, licensedp)) != 0) + goto fail2; + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + + __checkReturn efx_rc_t +efx_lic_get_id( + __in efx_nic_t *enp, + __in size_t buffer_size, + __out uint32_t *typep, + __out size_t *lengthp, + __out_opt uint8_t *bufferp + ) +{ + efx_lic_ops_t *elop = enp->en_elop; + efx_rc_t rc; + + EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); + EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_LIC); + + if (elop->elo_get_id == NULL) { + rc = ENOTSUP; + goto fail1; + } + + if ((rc = elop->elo_get_id(enp, buffer_size, typep, + lengthp, bufferp)) != 0) + goto fail2; + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +#endif /* EFSYS_OPT_LICENSING */ diff --git a/sys/dev/sfxge/common/efx_nic.c b/sys/dev/sfxge/common/efx_nic.c index 28ef8c7b088d..dd28ecef029f 100644 --- a/sys/dev/sfxge/common/efx_nic.c +++ b/sys/dev/sfxge/common/efx_nic.c @@ -708,8 +708,9 @@ efx_nic_reset( EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); EFSYS_ASSERT(enp->en_mod_flags & EFX_MOD_PROBE); /* - * All modules except the MCDI, PROBE, NVRAM, VPD, MON (which we - * do not reset here) must have been shut down or never initialized. + * All modules except the MCDI, PROBE, NVRAM, VPD, MON, LIC + * (which we do not reset here) must have been shut down or never + * initialized. * * A rule of thumb here is: If the controller or MC reboots, is *any* * state lost. If it's lost and needs reapplying, then the module @@ -717,7 +718,7 @@ efx_nic_reset( */ mod_flags = enp->en_mod_flags; mod_flags &= ~(EFX_MOD_MCDI | EFX_MOD_PROBE | EFX_MOD_NVRAM | - EFX_MOD_VPD | EFX_MOD_MON); + EFX_MOD_VPD | EFX_MOD_MON | EFX_MOD_LIC); EFSYS_ASSERT3U(mod_flags, ==, 0); if (mod_flags != 0) { rc = EINVAL; diff --git a/sys/modules/sfxge/Makefile b/sys/modules/sfxge/Makefile index 54d0af13d730..ab9bb2139a76 100644 --- a/sys/modules/sfxge/Makefile +++ b/sys/modules/sfxge/Makefile @@ -14,7 +14,7 @@ SRCS+= sfxge_port.c sfxge_rx.c sfxge_tx.c SRCS+= sfxge.h sfxge_rx.h sfxge_tx.h sfxge_version.h .PATH: ${.CURDIR}/../../dev/sfxge/common -SRCS+= efx_bootcfg.c efx_crc32.c efx_ev.c efx_intr.c efx_mac.c +SRCS+= efx_bootcfg.c efx_crc32.c efx_ev.c efx_intr.c efx_lic.c efx_mac.c SRCS+= efx_mcdi.c efx_mon.c efx_nic.c SRCS+= efx_nvram.c efx_phy.c efx_port.c efx_rx.c efx_sram.c efx_tx.c SRCS+= efx_vpd.c efx_wol.c efx_filter.c efx_hash.c From 0ed660ee947a19141fe10e7974fd0f2f8f168811 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Thu, 14 Jan 2016 09:20:25 +0000 Subject: [PATCH 35/88] sfxge: cleanup: quieten efx_mcdi_read_resonse_header error reporting The "mcdi_err_arg" probe still reports results of failed MCDI commands, unless the caller invoked efx_mcdi_execute_quiet(). Submitted by: Andy Moreton Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4919 --- sys/dev/sfxge/common/efx_mcdi.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sys/dev/sfxge/common/efx_mcdi.c b/sys/dev/sfxge/common/efx_mcdi.c index 7224c3686c17..5ff37ced8dc0 100644 --- a/sys/dev/sfxge/common/efx_mcdi.c +++ b/sys/dev/sfxge/common/efx_mcdi.c @@ -462,15 +462,8 @@ efx_mcdi_read_response_header( return; fail3: - if (!emrp->emr_quiet) - EFSYS_PROBE(fail3); fail2: - if (!emrp->emr_quiet) - EFSYS_PROBE(fail2); fail1: - if (!emrp->emr_quiet) - EFSYS_PROBE1(fail1, efx_rc_t, rc); - emrp->emr_rc = rc; emrp->emr_out_length_used = 0; } From bb39a9ed8e7152eec02722f08df1dcfd482a1028 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Thu, 14 Jan 2016 09:22:01 +0000 Subject: [PATCH 36/88] Fix GCC warnings causing build failure after r293724 Disable some compiler warnings for GCC (non-standard compiler) fixing build failures introduced by r293724, which enabled WARNS in the EFI boot code, when compiling with none standard compiler (GCC). Raised by: ian MFC after: 2 weeks X-MFC-With: r293268 Sponsored by: Multiplay --- sys/boot/arm/uboot/Makefile | 2 ++ sys/boot/efi/fdt/Makefile | 2 ++ sys/boot/efi/libefi/Makefile | 1 + 3 files changed, 5 insertions(+) diff --git a/sys/boot/arm/uboot/Makefile b/sys/boot/arm/uboot/Makefile index 7f0fd2e6433d..a5f709619569 100644 --- a/sys/boot/arm/uboot/Makefile +++ b/sys/boot/arm/uboot/Makefile @@ -8,6 +8,8 @@ NEWVERSWHAT= "U-Boot loader" ${MACHINE_ARCH} BINDIR?= /boot INSTALLFLAGS= -b WARNS?= 1 +CWARNFLAGS.gcc+= -Wno-int-to-pointer-cast + # Address at which ubldr will be loaded. # This varies for different boards and SOCs. UBLDR_LOADADDR?= 0x1000000 diff --git a/sys/boot/efi/fdt/Makefile b/sys/boot/efi/fdt/Makefile index 15862dc2957e..4d82a86e568e 100644 --- a/sys/boot/efi/fdt/Makefile +++ b/sys/boot/efi/fdt/Makefile @@ -7,6 +7,8 @@ LIB= efi_fdt INTERNALLIB= WARNS?= 6 +CWARNFLAGS.gcc+= -Wno-strict-prototypes +CWARNFLAGS.gcc+= -Wno-redundant-decls SRCS= efi_fdt.c diff --git a/sys/boot/efi/libefi/Makefile b/sys/boot/efi/libefi/Makefile index bb2f9ea36c0b..6df7817328c4 100644 --- a/sys/boot/efi/libefi/Makefile +++ b/sys/boot/efi/libefi/Makefile @@ -3,6 +3,7 @@ LIB= efi INTERNALLIB= WARNS?= 2 +CWARNFLAGS.gcc+= -Wno-attributes SRCS= delay.c efi_console.c efinet.c efipart.c errno.c handles.c \ libefi.c time.c From ff7ea78834a2360a86db8beeeb1c7113c6887625 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Thu, 14 Jan 2016 10:09:05 +0000 Subject: [PATCH 37/88] Allow file specific user-specified flag overrides Allow user-specified warning flag overrides for specific files under bsd.sys.mk, in the same way kern.mk does. This will to be used by future commits. MFC after: 2 weeks X-MFC-With: r293268 Sponsored by: Multiplay --- share/mk/bsd.sys.mk | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/share/mk/bsd.sys.mk b/share/mk/bsd.sys.mk index de62195b84fd..7ac7917e9e89 100644 --- a/share/mk/bsd.sys.mk +++ b/share/mk/bsd.sys.mk @@ -164,10 +164,11 @@ SSP_CFLAGS?= -fstack-protector CFLAGS+= ${SSP_CFLAGS} .endif # SSP && !ARM && !MIPS -# Allow user-specified additional warning flags, plus compiler specific flag overrides. -# Unless we've overriden this... +# Allow user-specified additional warning flags, plus compiler and file +# specific flag overrides, unless we've overriden this... .if ${MK_WARNS} != "no" CFLAGS+= ${CWARNFLAGS} ${CWARNFLAGS.${COMPILER_TYPE}} +CFLAGS+= ${CWARNFLAGS.${.IMPSRC:T}} .endif CFLAGS+= ${CFLAGS.${COMPILER_TYPE}} From 479795819a80a3d669f5f36903e3bbf418768f0a Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 14 Jan 2016 10:11:10 +0000 Subject: [PATCH 38/88] Verify the packet length in sctp6_input(). The sctp6_ctlinput() function does not properly check the length of the packet it receives from the ICMP6 input routine. This means that an attacker can craft a packet that will cause a kernel panic. When the kernel receives an ICMP6 error message with one of the types/codes it handles, it calls icmp6_notify_error() to deliver it to the upper-level protocol. icmp6_notify_error() cycles through the extension headers (if any) to find the protocol number of the first non-extension header. It does NOT verify the length of the non-extension header. It passes information about the packet (including the actual packet) to the upper-level protocol's pr_ctlinput function. In the case of SCTP for IPv6, icmp6_notify_error() calls sctp6_ctlinput(). sctp6_ctlinput() assumes that the incoming packet contains a sufficiently-long SCTP header and calls m_copydata() to extract a copy of that header. In turn, m_copydata() assumes that the caller has already verified that the offset and length parameters are correct. If they are incorrect, it will dereference a NULL pointer and cause a kernel panic. In short, no one is sufficiently verifying the input, and the result is a kernel panic. Submitted by: jtl Security: SA-16:01.sctp --- sys/netinet6/sctp6_usrreq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c index 43df06edeb37..40c1b411697e 100644 --- a/sys/netinet6/sctp6_usrreq.c +++ b/sys/netinet6/sctp6_usrreq.c @@ -379,7 +379,6 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d) * XXX: We assume that when IPV6 is non NULL, M and OFF are * valid. */ - /* check if we can safely examine src and dst ports */ struct sctp_inpcb *inp = NULL; struct sctp_tcb *stcb = NULL; struct sctp_nets *net = NULL; @@ -388,6 +387,10 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d) if (ip6cp->ip6c_m == NULL) return; + /* Check if we can safely examine the SCTP header. */ + if (ip6cp->ip6c_m->m_pkthdr.len < ip6cp->ip6c_off + sizeof(sh)) + return; + bzero(&sh, sizeof(sh)); bzero(&final, sizeof(final)); inp = NULL; From 037f750877f04f57a316f597f2e0176fa1e19aab Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 14 Jan 2016 10:13:58 +0000 Subject: [PATCH 39/88] Change linux get_robust_list system call to match actual linux one. The set_robust_list system call request the kernel to record the head of the list of robust futexes owned by the calling thread. The head argument is the list head to record. The get_robust_list system call should return the head of the robust list of the thread whose thread id is specified in pid argument. The list head should be stored in the location pointed to by head argument. In contrast, our implemenattion of get_robust_list system call copies the known portion of memory pointed by recorded in set_robust_list system call pointer to the head of the robust list to the location pointed by head argument. So, it is possible for a local attacker to read portions of kernel memory, which may result in a privilege escalation. Submitted by: mjg Security: SA-16:03.linux --- sys/amd64/linux/syscalls.master | 4 ++-- sys/amd64/linux32/syscalls.master | 4 ++-- sys/compat/linux/linux_futex.c | 2 +- sys/i386/linux/syscalls.master | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master index 87c800079b14..eb36d5f2ec90 100644 --- a/sys/amd64/linux/syscalls.master +++ b/sys/amd64/linux/syscalls.master @@ -461,8 +461,8 @@ 272 AUE_NULL STD { int linux_unshare(void); } 273 AUE_NULL STD { int linux_set_robust_list(struct linux_robust_list_head *head, \ l_size_t len); } -274 AUE_NULL STD { int linux_get_robust_list(l_int pid, struct linux_robust_list_head *head, \ - l_size_t *len); } +274 AUE_NULL STD { int linux_get_robust_list(l_int pid, \ + struct linux_robust_list_head **head, l_size_t *len); } 275 AUE_NULL STD { int linux_splice(void); } 276 AUE_NULL STD { int linux_tee(void); } 277 AUE_NULL STD { int linux_sync_file_range(void); } diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master index 8d9f90c21043..0396b30db9c8 100644 --- a/sys/amd64/linux32/syscalls.master +++ b/sys/amd64/linux32/syscalls.master @@ -520,8 +520,8 @@ ; linux 2.6.17: 311 AUE_NULL STD { int linux_set_robust_list(struct linux_robust_list_head *head, \ l_size_t len); } -312 AUE_NULL STD { int linux_get_robust_list(l_int pid, struct linux_robust_list_head *head, \ - l_size_t *len); } +312 AUE_NULL STD { int linux_get_robust_list(l_int pid, \ + struct linux_robust_list_head **head, l_size_t *len); } 313 AUE_NULL STD { int linux_splice(void); } 314 AUE_NULL STD { int linux_sync_file_range(void); } 315 AUE_NULL STD { int linux_tee(void); } diff --git a/sys/compat/linux/linux_futex.c b/sys/compat/linux/linux_futex.c index db8ab2c71eb6..be7c8a0d5388 100644 --- a/sys/compat/linux/linux_futex.c +++ b/sys/compat/linux/linux_futex.c @@ -1131,7 +1131,7 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args return (EFAULT); } - error = copyout(head, args->head, sizeof(struct linux_robust_list_head)); + error = copyout(&head, args->head, sizeof(head)); if (error) { LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, error); diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master index e6609a9c4b08..248770c9bd55 100644 --- a/sys/i386/linux/syscalls.master +++ b/sys/i386/linux/syscalls.master @@ -528,8 +528,8 @@ ; linux 2.6.17: 311 AUE_NULL STD { int linux_set_robust_list(struct linux_robust_list_head *head, \ l_size_t len); } -312 AUE_NULL STD { int linux_get_robust_list(l_int pid, struct linux_robust_list_head **head, \ - l_size_t *len); } +312 AUE_NULL STD { int linux_get_robust_list(l_int pid, \ + struct linux_robust_list_head **head, l_size_t *len); } 313 AUE_NULL STD { int linux_splice(void); } 314 AUE_NULL STD { int linux_sync_file_range(void); } 315 AUE_NULL STD { int linux_tee(void); } From de44d808ef9e0a4cdccaf37cb009737ba57f3d14 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 14 Jan 2016 10:15:21 +0000 Subject: [PATCH 40/88] Regen after r293907. --- sys/amd64/linux/linux_proto.h | 4 ++-- sys/amd64/linux/linux_syscall.h | 2 +- sys/amd64/linux/linux_syscalls.c | 2 +- sys/amd64/linux/linux_sysent.c | 2 +- sys/amd64/linux/linux_systrace_args.c | 4 ++-- sys/amd64/linux32/linux32_proto.h | 4 ++-- sys/amd64/linux32/linux32_syscall.h | 2 +- sys/amd64/linux32/linux32_syscalls.c | 2 +- sys/amd64/linux32/linux32_sysent.c | 2 +- sys/amd64/linux32/linux32_systrace_args.c | 4 ++-- sys/i386/linux/linux_proto.h | 2 +- sys/i386/linux/linux_syscall.h | 2 +- sys/i386/linux/linux_syscalls.c | 2 +- sys/i386/linux/linux_sysent.c | 2 +- 14 files changed, 18 insertions(+), 18 deletions(-) diff --git a/sys/amd64/linux/linux_proto.h b/sys/amd64/linux/linux_proto.h index 8aecd7c0f976..54e11fe8bdfc 100644 --- a/sys/amd64/linux/linux_proto.h +++ b/sys/amd64/linux/linux_proto.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/amd64/linux/syscalls.master 289769 2015-10-22 21:28:20Z jhb + * created from FreeBSD: head/sys/amd64/linux/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ #ifndef _LINUX_SYSPROTO_H_ @@ -966,7 +966,7 @@ struct linux_set_robust_list_args { }; struct linux_get_robust_list_args { char pid_l_[PADL_(l_int)]; l_int pid; char pid_r_[PADR_(l_int)]; - char head_l_[PADL_(struct linux_robust_list_head *)]; struct linux_robust_list_head * head; char head_r_[PADR_(struct linux_robust_list_head *)]; + char head_l_[PADL_(struct linux_robust_list_head **)]; struct linux_robust_list_head ** head; char head_r_[PADR_(struct linux_robust_list_head **)]; char len_l_[PADL_(l_size_t *)]; l_size_t * len; char len_r_[PADR_(l_size_t *)]; }; struct linux_splice_args { diff --git a/sys/amd64/linux/linux_syscall.h b/sys/amd64/linux/linux_syscall.h index 671eb0b344d7..cee28556b237 100644 --- a/sys/amd64/linux/linux_syscall.h +++ b/sys/amd64/linux/linux_syscall.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/amd64/linux/syscalls.master 289769 2015-10-22 21:28:20Z jhb + * created from FreeBSD: head/sys/amd64/linux/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ #define LINUX_SYS_read 0 diff --git a/sys/amd64/linux/linux_syscalls.c b/sys/amd64/linux/linux_syscalls.c index a414ea0cebaf..ea001ba68467 100644 --- a/sys/amd64/linux/linux_syscalls.c +++ b/sys/amd64/linux/linux_syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/amd64/linux/syscalls.master 289769 2015-10-22 21:28:20Z jhb + * created from FreeBSD: head/sys/amd64/linux/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ const char *linux_syscallnames[] = { diff --git a/sys/amd64/linux/linux_sysent.c b/sys/amd64/linux/linux_sysent.c index 31f2eeb02e4d..053f8d1e415d 100644 --- a/sys/amd64/linux/linux_sysent.c +++ b/sys/amd64/linux/linux_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/amd64/linux/syscalls.master 289769 2015-10-22 21:28:20Z jhb + * created from FreeBSD: head/sys/amd64/linux/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ #include diff --git a/sys/amd64/linux/linux_systrace_args.c b/sys/amd64/linux/linux_systrace_args.c index d649eed3d4de..bc279c947ec1 100644 --- a/sys/amd64/linux/linux_systrace_args.c +++ b/sys/amd64/linux/linux_systrace_args.c @@ -2020,7 +2020,7 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) case 274: { struct linux_get_robust_list_args *p = params; iarg[0] = p->pid; /* l_int */ - uarg[1] = (intptr_t) p->head; /* struct linux_robust_list_head * */ + uarg[1] = (intptr_t) p->head; /* struct linux_robust_list_head ** */ uarg[2] = (intptr_t) p->len; /* l_size_t * */ *n_args = 3; break; @@ -5347,7 +5347,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) p = "l_int"; break; case 1: - p = "struct linux_robust_list_head *"; + p = "struct linux_robust_list_head **"; break; case 2: p = "l_size_t *"; diff --git a/sys/amd64/linux32/linux32_proto.h b/sys/amd64/linux32/linux32_proto.h index 84c78152a7ab..e76384fb445b 100644 --- a/sys/amd64/linux32/linux32_proto.h +++ b/sys/amd64/linux32/linux32_proto.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 289769 2015-10-22 21:28:20Z jhb + * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ #ifndef _LINUX32_SYSPROTO_H_ @@ -1023,7 +1023,7 @@ struct linux_set_robust_list_args { }; struct linux_get_robust_list_args { char pid_l_[PADL_(l_int)]; l_int pid; char pid_r_[PADR_(l_int)]; - char head_l_[PADL_(struct linux_robust_list_head *)]; struct linux_robust_list_head * head; char head_r_[PADR_(struct linux_robust_list_head *)]; + char head_l_[PADL_(struct linux_robust_list_head **)]; struct linux_robust_list_head ** head; char head_r_[PADR_(struct linux_robust_list_head **)]; char len_l_[PADL_(l_size_t *)]; l_size_t * len; char len_r_[PADR_(l_size_t *)]; }; struct linux_splice_args { diff --git a/sys/amd64/linux32/linux32_syscall.h b/sys/amd64/linux32/linux32_syscall.h index a3fbb26bb41c..990adf3642ed 100644 --- a/sys/amd64/linux32/linux32_syscall.h +++ b/sys/amd64/linux32/linux32_syscall.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 289769 2015-10-22 21:28:20Z jhb + * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ #define LINUX32_SYS_linux_exit 1 diff --git a/sys/amd64/linux32/linux32_syscalls.c b/sys/amd64/linux32/linux32_syscalls.c index f3b4510177a0..79b96a71bf2a 100644 --- a/sys/amd64/linux32/linux32_syscalls.c +++ b/sys/amd64/linux32/linux32_syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 289769 2015-10-22 21:28:20Z jhb + * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ const char *linux32_syscallnames[] = { diff --git a/sys/amd64/linux32/linux32_sysent.c b/sys/amd64/linux32/linux32_sysent.c index 6f0420dec634..7d6d22078f83 100644 --- a/sys/amd64/linux32/linux32_sysent.c +++ b/sys/amd64/linux32/linux32_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 289769 2015-10-22 21:28:20Z jhb + * created from FreeBSD: head/sys/amd64/linux32/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ #include "opt_compat.h" diff --git a/sys/amd64/linux32/linux32_systrace_args.c b/sys/amd64/linux32/linux32_systrace_args.c index b6ccd334178f..80889bf83b62 100644 --- a/sys/amd64/linux32/linux32_systrace_args.c +++ b/sys/amd64/linux32/linux32_systrace_args.c @@ -2122,7 +2122,7 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) case 312: { struct linux_get_robust_list_args *p = params; iarg[0] = p->pid; /* l_int */ - uarg[1] = (intptr_t) p->head; /* struct linux_robust_list_head * */ + uarg[1] = (intptr_t) p->head; /* struct linux_robust_list_head ** */ uarg[2] = (intptr_t) p->len; /* l_size_t * */ *n_args = 3; break; @@ -5551,7 +5551,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) p = "l_int"; break; case 1: - p = "struct linux_robust_list_head *"; + p = "struct linux_robust_list_head **"; break; case 2: p = "l_size_t *"; diff --git a/sys/i386/linux/linux_proto.h b/sys/i386/linux/linux_proto.h index a5d6ed2e158c..884292f106b3 100644 --- a/sys/i386/linux/linux_proto.h +++ b/sys/i386/linux/linux_proto.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/i386/linux/syscalls.master 283492 2015-05-24 18:08:01Z dchagin + * created from FreeBSD: head/sys/i386/linux/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ #ifndef _LINUX_SYSPROTO_H_ diff --git a/sys/i386/linux/linux_syscall.h b/sys/i386/linux/linux_syscall.h index 60c284a656b8..1be670ae0f6b 100644 --- a/sys/i386/linux/linux_syscall.h +++ b/sys/i386/linux/linux_syscall.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/i386/linux/syscalls.master 283492 2015-05-24 18:08:01Z dchagin + * created from FreeBSD: head/sys/i386/linux/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ #define LINUX_SYS_linux_exit 1 diff --git a/sys/i386/linux/linux_syscalls.c b/sys/i386/linux/linux_syscalls.c index 64556a9b6cd7..68a81ef5cc48 100644 --- a/sys/i386/linux/linux_syscalls.c +++ b/sys/i386/linux/linux_syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/i386/linux/syscalls.master 283492 2015-05-24 18:08:01Z dchagin + * created from FreeBSD: head/sys/i386/linux/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ const char *linux_syscallnames[] = { diff --git a/sys/i386/linux/linux_sysent.c b/sys/i386/linux/linux_sysent.c index 922ddbb19aed..d853d1d592d3 100644 --- a/sys/i386/linux/linux_sysent.c +++ b/sys/i386/linux/linux_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/i386/linux/syscalls.master 283492 2015-05-24 18:08:01Z dchagin + * created from FreeBSD: head/sys/i386/linux/syscalls.master 293907 2016-01-14 10:13:58Z glebius */ #include From c8358c6e0d55601e5e81ff7829c210c64d460f85 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 14 Jan 2016 10:16:25 +0000 Subject: [PATCH 41/88] Call crextend() before copying old credentials to the new credentials and replace crcopysafe by crcopy as crcopysafe is is not intended to be safe in a threaded environment, it drops PROC_LOCK() in while() that can lead to unexpected results, such as overwrite kernel memory. In my POV crcopysafe() needs special attention. For now I do not see any problems with this function, but who knows. Submitted by: dchagin Found by: trinity Security: SA-16:04.linux --- sys/compat/linux/linux_misc.c | 4 +++- sys/kern/kern_prot.c | 3 +-- sys/sys/ucred.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index de918695c8e8..fe4dbf642427 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -1304,9 +1304,11 @@ linux_setgroups(struct thread *td, struct linux_setgroups_args *args) if (error) goto out; newcred = crget(); + crextend(newcred, ngrp + 1); p = td->td_proc; PROC_LOCK(p); - oldcred = crcopysafe(p, newcred); + oldcred = p->p_ucred; + crcopy(newcred, oldcred); /* * cr_groups[0] holds egid. Setting the whole set from diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index d7ec70134ddf..6594e51e7d04 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -88,7 +88,6 @@ static MALLOC_DEFINE(M_CRED, "cred", "credentials"); SYSCTL_NODE(_security, OID_AUTO, bsd, CTLFLAG_RW, 0, "BSD security policy"); -static void crextend(struct ucred *cr, int n); static void crsetgroups_locked(struct ucred *cr, int ngrp, gid_t *groups); @@ -1997,7 +1996,7 @@ crcopysafe(struct proc *p, struct ucred *cr) /* * Extend the passed in credential to hold n items. */ -static void +void crextend(struct ucred *cr, int n) { int cnt; diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h index d7e7fa5a947c..8dbf2fef74fe 100644 --- a/sys/sys/ucred.h +++ b/sys/sys/ucred.h @@ -105,6 +105,7 @@ void change_svuid(struct ucred *newcred, uid_t svuid); void crcopy(struct ucred *dest, struct ucred *src); struct ucred *crcopysafe(struct proc *p, struct ucred *cr); struct ucred *crdup(struct ucred *cr); +void crextend(struct ucred *cr, int n); void proc_set_cred_init(struct proc *p, struct ucred *cr); struct ucred *proc_set_cred(struct proc *p, struct ucred *cr); void crfree(struct ucred *cr); From f73d9fd2f1e7e3cdb56caf7458147db355ff0354 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 14 Jan 2016 10:22:45 +0000 Subject: [PATCH 42/88] There is a bug in tcp_output()'s implementation of the TCP_SIGNATURE (RFC 2385/TCP-MD5) kernel option. If a tcpcb has TF_NOOPT flag, then tcp_addoptions() is not called, and to.to_signature is an uninitialized stack variable. The value is later used as write offset, which leads to writing to random address. Submitted by: rstone, jtl Security: SA-16:05.tcp --- sys/netinet/tcp_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 3c32d77c377e..4a4e7eb5061a 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -752,8 +752,8 @@ tcp_output(struct tcpcb *tp) * segments. Options for SYN-ACK segments are handled in TCP * syncache. */ + to.to_flags = 0; if ((tp->t_flags & TF_NOOPT) == 0) { - to.to_flags = 0; /* Maximum segment size. */ if (flags & TH_SYN) { tp->snd_nxt = tp->iss; @@ -1233,7 +1233,7 @@ tcp_output(struct tcpcb *tp) tp->snd_up = tp->snd_una; /* drag it along */ #ifdef TCP_SIGNATURE - if (tp->t_flags & TF_SIGNATURE) { + if (to.to_flags & TOF_SIGNATURE) { int sigoff = to.to_signature - opt; tcp_signature_compute(m, 0, len, optlen, (u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND); @@ -1713,6 +1713,7 @@ tcp_addoptions(struct tcpopt *to, u_char *optp) bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr)); optp += sizeof(to->to_tsecr); break; +#ifdef TCP_SIGNATURE case TOF_SIGNATURE: { int siglen = TCPOLEN_SIGNATURE - 2; @@ -1731,6 +1732,7 @@ tcp_addoptions(struct tcpopt *to, u_char *optp) *optp++ = 0; break; } +#endif case TOF_SACK: { int sackblks = 0; From dc5f4fa86e1c7835572bd10dfad565732dca4bd1 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 14 Jan 2016 10:25:18 +0000 Subject: [PATCH 43/88] Install /etc/snmpd.config with 0600 permissions. Noticed by: Pierre Kim Security: SA-16:06.snmpd Pointy hat to: glebius in 2005 --- etc/Makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/etc/Makefile b/etc/Makefile index 5484c11e1424..27d1ba447b9e 100644 --- a/etc/Makefile +++ b/etc/Makefile @@ -82,10 +82,6 @@ BIN1+= apmd.conf BIN1+= auto_master .endif -.if ${MK_BSNMP} != "no" -BIN1+= snmpd.config -.endif - .if ${MK_FREEBSD_UPDATE} != "no" BIN1+= freebsd-update.conf .endif @@ -219,6 +215,11 @@ distribution: ${BIN2} ${DESTDIR}/etc; \ ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 600 \ master.passwd nsmb.conf opieaccess ${DESTDIR}/etc; +.if ${MK_BSNMP} != "no" + cd ${.CURDIR}; \ + ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 600 \ + snmpd.config ${DESTDIR}/etc; +.endif .if ${MK_AT} == "no" sed -i "" -e 's;.*/usr/libexec/atrun;#&;' ${DESTDIR}/etc/crontab .endif From ebee3dc22933dd91762d70461a1ec9a2056d5d07 Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Thu, 14 Jan 2016 11:25:28 +0000 Subject: [PATCH 44/88] Fail the SCTP_GET_ASSOC_NUMBER and SCTP_GET_ASSOC_ID_LIST socket options for 1-to-1 style sockets as specified in RFC 6458. MFC after: 3 days --- sys/netinet/sctp_usrreq.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c index 60deff0ff253..2ea4eed96cab 100644 --- a/sys/netinet/sctp_usrreq.c +++ b/sys/netinet/sctp_usrreq.c @@ -1883,8 +1883,15 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, uint32_t *value, cnt; SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); - cnt = 0; SCTP_INP_RLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { + /* Can't do this for a 1-1 socket */ + error = EINVAL; + SCTP_INP_RUNLOCK(inp); + break; + } + cnt = 0; LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { cnt++; } @@ -1899,9 +1906,16 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, unsigned int at, limit; SCTP_CHECK_AND_CAST(ids, optval, struct sctp_assoc_ids, *optsize); + SCTP_INP_RLOCK(inp); + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { + /* Can't do this for a 1-1 socket */ + error = EINVAL; + SCTP_INP_RUNLOCK(inp); + break; + } at = 0; limit = (*optsize - sizeof(uint32_t)) / sizeof(sctp_assoc_t); - SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { if (at < limit) { ids->gaids_assoc_id[at++] = sctp_get_associd(stcb); From 6369f51bc82d713bd63e5b6462c8c9f43bcf2562 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Thu, 14 Jan 2016 13:14:12 +0000 Subject: [PATCH 45/88] Make ng_netflow(9) use new routing KPI. Netflow module is supposed to store (along with fields like gateway address and interface index) matched netmask for each record. This (currently) requires returning individual route entries, instead of optimized next-hop structure. Given that, use control-plane rib_lookup_info() function to avoid accessing rtentries directly. While rib_lookup_info() might be slower, than fibX_lookup() flavours, it is more scalable than rtalloc1_fib(), because rtentry mutex is not acquired. --- sys/netgraph/netflow/netflow.c | 100 ++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 39 deletions(-) diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c index 87ff18909025..6adffc9e6072 100644 --- a/sys/netgraph/netflow/netflow.c +++ b/sys/netgraph/netflow/netflow.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -307,8 +308,9 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, int plen, uint8_t flags, uint8_t tcp_flags) { struct flow_entry *fle; - struct sockaddr_in sin; - struct rtentry *rt; + struct sockaddr_in sin, sin_mask; + struct sockaddr_dl rt_gateway; + struct rt_addrinfo info; mtx_assert(&hsh->mtx, MA_OWNED); @@ -339,23 +341,30 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = fle->f.r.r_dst; - rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib); - if (rt != NULL) { - fle->f.fle_o_ifx = rt->rt_ifp->if_index; - if (rt->rt_flags & RTF_GATEWAY && - rt->rt_gateway->sa_family == AF_INET) + rt_gateway.sdl_len = sizeof(rt_gateway); + sin_mask.sin_len = sizeof(struct sockaddr_in); + bzero(&info, sizeof(info)); + + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; + info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin_mask; + + if (rib_lookup_info(r->fib, (struct sockaddr *)&sin, NHR_REF, 0, + &info) == 0) { + fle->f.fle_o_ifx = info.rti_ifp->if_index; + + if (info.rti_flags & RTF_GATEWAY && + rt_gateway.sdl_family == AF_INET) fle->f.next_hop = - ((struct sockaddr_in *)(rt->rt_gateway))->sin_addr; + ((struct sockaddr_in *)&rt_gateway)->sin_addr; - if (rt_mask(rt)) - fle->f.dst_mask = - bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr); - else if (rt->rt_flags & RTF_HOST) + if (info.rti_addrs & RTA_NETMASK) + fle->f.dst_mask = bitcount32(sin_mask.sin_addr.s_addr); + else if (info.rti_flags & RTF_HOST) /* Give up. We can't determine mask :( */ fle->f.dst_mask = 32; - RTFREE_LOCKED(rt); + rib_free_info(&info); } } @@ -365,16 +374,20 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = fle->f.r.r_src; - rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib); - if (rt != NULL) { - if (rt_mask(rt)) + + sin_mask.sin_len = sizeof(struct sockaddr_in); + bzero(&info, sizeof(info)); + + info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin_mask; + + if (rib_lookup_info(r->fib, (struct sockaddr *)&sin, 0, 0, + &info) == 0) { + if (info.rti_addrs & RTA_NETMASK) fle->f.src_mask = - bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr); - else if (rt->rt_flags & RTF_HOST) + bitcount32(sin_mask.sin_addr.s_addr); + else if (info.rti_flags & RTF_HOST) /* Give up. We can't determine mask :( */ fle->f.src_mask = 32; - - RTFREE_LOCKED(rt); } } @@ -390,14 +403,14 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, bitcount32((x).__u6_addr.__u6_addr32[1]) + \ bitcount32((x).__u6_addr.__u6_addr32[2]) + \ bitcount32((x).__u6_addr.__u6_addr32[3]) -#define RT_MASK6(x) (ipv6_masklen(((struct sockaddr_in6 *)rt_mask(x))->sin6_addr)) static int hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, int plen, uint8_t flags, uint8_t tcp_flags) { struct flow6_entry *fle6; - struct sockaddr_in6 sin6; - struct rtentry *rt; + struct sockaddr_in6 sin6, sin6_mask; + struct sockaddr_dl rt_gateway; + struct rt_addrinfo info; mtx_assert(&hsh6->mtx, MA_OWNED); @@ -430,22 +443,29 @@ hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, sin6.sin6_family = AF_INET6; sin6.sin6_addr = r->dst.r_dst6; - rt = rtalloc1_fib((struct sockaddr *)&sin6, 0, 0, r->fib); + rt_gateway.sdl_len = sizeof(rt_gateway); + sin6_mask.sin6_len = sizeof(struct sockaddr_in6); + bzero(&info, sizeof(info)); - if (rt != NULL) { - fle6->f.fle_o_ifx = rt->rt_ifp->if_index; + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; + info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin6_mask; - if (rt->rt_flags & RTF_GATEWAY && - rt->rt_gateway->sa_family == AF_INET6) + if (rib_lookup_info(r->fib, (struct sockaddr *)&sin6, NHR_REF, + 0, &info) == 0) { + fle6->f.fle_o_ifx = info.rti_ifp->if_index; + + if (info.rti_flags & RTF_GATEWAY && + rt_gateway.sdl_family == AF_INET6) fle6->f.n.next_hop6 = - ((struct sockaddr_in6 *)(rt->rt_gateway))->sin6_addr; + ((struct sockaddr_in6 *)&rt_gateway)->sin6_addr; - if (rt_mask(rt)) - fle6->f.dst_mask = RT_MASK6(rt); + if (info.rti_addrs & RTA_NETMASK) + fle6->f.dst_mask = + ipv6_masklen(sin6_mask.sin6_addr); else fle6->f.dst_mask = 128; - RTFREE_LOCKED(rt); + rib_free_info(&info); } } @@ -456,15 +476,18 @@ hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, sin6.sin6_family = AF_INET6; sin6.sin6_addr = r->src.r_src6; - rt = rtalloc1_fib((struct sockaddr *)&sin6, 0, 0, r->fib); + sin6_mask.sin6_len = sizeof(struct sockaddr_in6); + bzero(&info, sizeof(info)); - if (rt != NULL) { - if (rt_mask(rt)) - fle6->f.src_mask = RT_MASK6(rt); + info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&sin6_mask; + + if (rib_lookup_info(r->fib, (struct sockaddr *)&sin6, 0, 0, + &info) == 0) { + if (info.rti_addrs & RTA_NETMASK) + fle6->f.src_mask = + ipv6_masklen(sin6_mask.sin6_addr); else fle6->f.src_mask = 128; - - RTFREE_LOCKED(rt); } } @@ -474,7 +497,6 @@ hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, return (0); } #undef ipv6_masklen -#undef RT_MASK6 #endif From a6c9d35c231ed64befd220527706795d007fe77a Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 14 Jan 2016 15:49:24 +0000 Subject: [PATCH 46/88] Adjust previous fix to conform to the existing style in this file. --- usr.sbin/boot0cfg/boot0cfg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/usr.sbin/boot0cfg/boot0cfg.c b/usr.sbin/boot0cfg/boot0cfg.c index f2cbf67a39d3..c2cb91de4166 100644 --- a/usr.sbin/boot0cfg/boot0cfg.c +++ b/usr.sbin/boot0cfg/boot0cfg.c @@ -336,8 +336,7 @@ read_mbr(const char *disk, u_int8_t **mbr, int check_version) close(fd); return (mbr_size); } - *mbr = malloc(sizeof(buf)); - if (*mbr == NULL) + if ((*mbr = malloc(sizeof(buf))) == NULL) errx(1, "%s: unable to allocate MBR buffer", disk); memcpy(*mbr, buf, sizeof(buf)); close(fd); From 2f9e57916663b5d75a6a97c2682c101dcfe3db4c Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 14 Jan 2016 15:51:13 +0000 Subject: [PATCH 47/88] Fix building with GCC since PAGE_MASK is signed on i386. Reviewed by: ngie MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D4772 --- lib/libkvm/kvm_i386.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libkvm/kvm_i386.h b/lib/libkvm/kvm_i386.h index 2949e6893024..51eb6f8f7f70 100644 --- a/lib/libkvm/kvm_i386.h +++ b/lib/libkvm/kvm_i386.h @@ -70,7 +70,7 @@ _Static_assert(NBPDR == I386_NBPDR, "NBPDR mismatch"); _Static_assert(PG_V == I386_PG_V, "PG_V mismatch"); _Static_assert(PG_PS == I386_PG_PS, "PG_PS mismatch"); -_Static_assert(PG_FRAME == I386_PG_FRAME, "PG_FRAME mismatch"); +_Static_assert((u_int)PG_FRAME == I386_PG_FRAME, "PG_FRAME mismatch"); _Static_assert(PG_PS_FRAME == I386_PG_PS_FRAME, "PG_PS_FRAME mismatch"); #endif From 5147131ae42d2ec15b11fc3d8531c943bc269298 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Thu, 14 Jan 2016 16:23:07 +0000 Subject: [PATCH 48/88] Document how to enter the debugger here. I'm sure there's some better canonical place, and the nit-pickers are welcome to move this information there with a cross reference. Differential Review: https://reviews.freebsd.org/D4860 --- share/man/man4/ddb.4 | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/share/man/man4/ddb.4 b/share/man/man4/ddb.4 index d29bcdda78a0..35b5dc139df5 100644 --- a/share/man/man4/ddb.4 +++ b/share/man/man4/ddb.4 @@ -1435,6 +1435,47 @@ The NMI allows one to break into the debugger on a wedged machine to diagnose problems. Other bus' bridge chipsets may be able to generate NMI using bus specific methods. +There are many PCI and PCIe add-in cards which can generate NMI for +debugging. +Modern systems typically use IMPI to generate signals to enter the +debugger. +The +.Dv devel/ipmitool +port can be used to send the +.Cd chassis power diag +command which delivers an NMI to the processor. +.Pp +For serial consoles, you can break to the debugger by sending a BREAK +condition on the serial line if +.Cd options BREAK_TO_DEBUGGER +is specified in the kernel. +Most terminal emulation programs can send a break sequence with a +special key sequence or via a menu item. +However, in some setups, sending the break can be difficult to arrange +or happens spuriously, so if the kernel contains +.Cd options ALT_BREAK_TO_DEBUGGER +then the sequence of CR TILDE CTRL-B enters the debugger; +CR TILDE CTRL-P causes a panic instead of entering the +debugger; and +CR TILDE CTRL-R causes an immediate reboot. +In all the above sequences, CR is a Carriage Return and is usually +sent by hitting the Enter or Return key. +TILDE is the ASCII tilde character (~). +CTRL-x is Control x created by hitting the control key and then x +and then releasing both. +.Pp +The break to debugger behavior may also be enabled by setting the +.Xr sysctl 8 +.Dv debug.kdb.break_to_debugger +to 1. +The alt break to debugger behavior may also be enabled by setting the +.Xr sysctl 8 +.Dv debug.kdb.alt_break_to_debugger +to 1. +The debugger may be entered by setting the +.Xr sysctl 8 +.Dv debug.kdb.enter +to 1. .Sh FILES Header files mentioned in this manual page can be found below .Pa /usr/include From fcbfdb37a14e787c174fb8db38d0705b9ccafef4 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Thu, 14 Jan 2016 16:31:00 +0000 Subject: [PATCH 49/88] Fix panic in IP redirect. Panic was introduced in r293466. Found by: Yamagi Burmeister > --- sys/net/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/net/route.c b/sys/net/route.c index a93f2ee972d1..001a15f6c8e5 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -576,8 +576,8 @@ rtredirect_fib(struct sockaddr *dst, * Create new route, rather than smashing route to net. */ create: - RTFREE(rt); - rt = NULL; + if (rt != NULL) + RTFREE_LOCKED(rt); flags |= RTF_DYNAMIC; bzero((caddr_t)&info, sizeof(info)); From 0851a9b1a1a37f6632bc9e24f3d543bab150326a Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Thu, 14 Jan 2016 16:53:17 +0000 Subject: [PATCH 50/88] Fix the code to retry mount attempt in mountcritlocal if there are any root mount holds. The previous one used a wrong conditional - the "err=$?" assignment resets "$?" to 0. Submitted by: jilles@ MFC after: 1 month Sponsored by: The FreeBSD Foundation --- etc/rc.d/mountcritlocal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/rc.d/mountcritlocal b/etc/rc.d/mountcritlocal index 1513ec7397ac..0833199df11e 100755 --- a/etc/rc.d/mountcritlocal +++ b/etc/rc.d/mountcritlocal @@ -44,7 +44,7 @@ mountcritlocal_start() # and retry. mount -a -t ${mount_excludes} err=$? - if [ $? -ne 0 ]; then + if [ ${err} -ne 0 ]; then echo echo 'Mounting /etc/fstab filesystems failed,' \ 'will retry after root mount hold release' From a9a9fa410d1e46b1824ef1572fd12eb680c47c89 Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Thu, 14 Jan 2016 16:55:07 +0000 Subject: [PATCH 51/88] Wrap overlong comment lines. MFC after: 1 month Sponsored by: The FreeBSD Foundation --- etc/rc.d/mountcritlocal | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/etc/rc.d/mountcritlocal b/etc/rc.d/mountcritlocal index 0833199df11e..069e3fda48fd 100755 --- a/etc/rc.d/mountcritlocal +++ b/etc/rc.d/mountcritlocal @@ -36,12 +36,12 @@ mountcritlocal_start() done mount_excludes=${mount_excludes%,} - # Originally, root mount hold had to be released before mounting the root - # filesystem. This delayed the boot, so it was changed to only wait if - # the root device isn't readily available. This can result in this script - # executing before all the devices - such as graid(8) - are available. - # Thus, should the mount fail, we will wait for the root mount hold release - # and retry. + # Originally, root mount hold had to be released before mounting + # the root filesystem. This delayed the boot, so it was changed + # to only wait if the root device isn't readily available. This + # can result in this script executing before all the devices - such + # as graid(8) - are available. Thus, should the mount fail, + # we will wait for the root mount hold release and retry. mount -a -t ${mount_excludes} err=$? if [ ${err} -ne 0 ]; then From e2b10854e4b244c5a27e75565810438576b9a614 Mon Sep 17 00:00:00 2001 From: Benjamin Kaduk Date: Thu, 14 Jan 2016 17:16:47 +0000 Subject: [PATCH 52/88] Update .Dd, missed in r294011 --- share/man/man4/ddb.4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/man/man4/ddb.4 b/share/man/man4/ddb.4 index 35b5dc139df5..8e3e792a368c 100644 --- a/share/man/man4/ddb.4 +++ b/share/man/man4/ddb.4 @@ -60,7 +60,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 5, 2015 +.Dd January 14, 2016 .Dt DDB 4 .Os .Sh NAME From 69c0fce6bac411bab3c35741925b1edbfec0cef4 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Thu, 14 Jan 2016 18:04:49 +0000 Subject: [PATCH 53/88] Fix spelling of IPMI Sponsored by: EMC / Isilon Storage Division --- share/man/man4/ddb.4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/man/man4/ddb.4 b/share/man/man4/ddb.4 index 8e3e792a368c..83bf001a8e1f 100644 --- a/share/man/man4/ddb.4 +++ b/share/man/man4/ddb.4 @@ -1437,7 +1437,7 @@ Other bus' bridge chipsets may be able to generate NMI using bus specific methods. There are many PCI and PCIe add-in cards which can generate NMI for debugging. -Modern systems typically use IMPI to generate signals to enter the +Modern systems typically use IPMI to generate signals to enter the debugger. The .Dv devel/ipmitool From cbedc01c9ad3b20754ede564b748bcdd3de9e8b8 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Thu, 14 Jan 2016 18:19:05 +0000 Subject: [PATCH 54/88] Fix race condition involving ZFS remove events When a ZFS drive disappears, ZFS sends a resource.fs.zfs.removed event to userland. A userland program like zfsd(8) can use that event, for example to activate a hotspare. The current code contains a race condition: vdev_geom will sent the sysevent _before_ spa.c would update the vdev's status, causing userland processes to see pool state that does not reflect the device removal. This change moves the sysevent to spa.c, closing the race. Reviewed by: delphij, Sean Eric Fagan MFC after: 4 weeks Sponsored by: Spectra Logic Corp Differential Revision: https://reviews.freebsd.org/D4902 --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c | 2 ++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c index cf9112b72aa2..29d2c90c55d5 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c @@ -5952,6 +5952,8 @@ spa_async_remove(spa_t *spa, vdev_t *vd) vd->vdev_stat.vs_checksum_errors = 0; vdev_state_dirty(vd->vdev_top); + /* Tell userspace that the vdev is gone. */ + zfs_post_remove(spa, vd); } for (int c = 0; c < vd->vdev_children; c++) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index 99496c06af8f..4c7c89df3b0f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -160,7 +160,6 @@ vdev_geom_orphan(struct g_consumer *cp) * async removal support to invoke a close on this * vdev once it is safe to do so. */ - zfs_post_remove(vd->vdev_spa, vd); vd->vdev_remove_wanted = B_TRUE; spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); } From 8171acf3adb4baf1b9929cfd873b9ef77aa72a74 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Thu, 14 Jan 2016 18:46:57 +0000 Subject: [PATCH 55/88] Revert r293903 Revert r293903 as EFI shouldn't be built on this platform that the this was reported on. Sponsored by: Multiplay --- sys/boot/arm/uboot/Makefile | 2 -- sys/boot/efi/fdt/Makefile | 2 -- sys/boot/efi/libefi/Makefile | 1 - 3 files changed, 5 deletions(-) diff --git a/sys/boot/arm/uboot/Makefile b/sys/boot/arm/uboot/Makefile index a5f709619569..7f0fd2e6433d 100644 --- a/sys/boot/arm/uboot/Makefile +++ b/sys/boot/arm/uboot/Makefile @@ -8,8 +8,6 @@ NEWVERSWHAT= "U-Boot loader" ${MACHINE_ARCH} BINDIR?= /boot INSTALLFLAGS= -b WARNS?= 1 -CWARNFLAGS.gcc+= -Wno-int-to-pointer-cast - # Address at which ubldr will be loaded. # This varies for different boards and SOCs. UBLDR_LOADADDR?= 0x1000000 diff --git a/sys/boot/efi/fdt/Makefile b/sys/boot/efi/fdt/Makefile index 4d82a86e568e..15862dc2957e 100644 --- a/sys/boot/efi/fdt/Makefile +++ b/sys/boot/efi/fdt/Makefile @@ -7,8 +7,6 @@ LIB= efi_fdt INTERNALLIB= WARNS?= 6 -CWARNFLAGS.gcc+= -Wno-strict-prototypes -CWARNFLAGS.gcc+= -Wno-redundant-decls SRCS= efi_fdt.c diff --git a/sys/boot/efi/libefi/Makefile b/sys/boot/efi/libefi/Makefile index 6df7817328c4..bb2f9ea36c0b 100644 --- a/sys/boot/efi/libefi/Makefile +++ b/sys/boot/efi/libefi/Makefile @@ -3,7 +3,6 @@ LIB= efi INTERNALLIB= WARNS?= 2 -CWARNFLAGS.gcc+= -Wno-attributes SRCS= delay.c efi_console.c efinet.c efipart.c errno.c handles.c \ libefi.c time.c From 445a2b570ee971588bfc2a8161a9a9f4c5eaf1e4 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Thu, 14 Jan 2016 18:53:54 +0000 Subject: [PATCH 56/88] Only build EFI components on supported compilers As the in-tree GCC does not support __attribute__((ms_abi)) EFI can only be built with Clang. The EFI loader and boot1 validated this, but unused libefi was still built causing issues under GCC after warnings where enabled by r293724. Disable building all of EFI when the selected compiler is GCC. MFC after: 2 weeks X-MFC-With: r293268 Sponsored by: Multiplay --- sys/boot/efi/Makefile | 8 ++++++-- sys/boot/efi/boot1/Makefile | 5 ----- sys/boot/efi/loader/Makefile | 5 ----- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/sys/boot/efi/Makefile b/sys/boot/efi/Makefile index 57d4e81b4860..94a975a8ec0b 100644 --- a/sys/boot/efi/Makefile +++ b/sys/boot/efi/Makefile @@ -2,7 +2,8 @@ .include -SUBDIR= libefi +# In-tree GCC does not support __attribute__((ms_abi)). +.if ${COMPILER_TYPE} != "gcc" .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "arm" .if ${MK_FDT} != "no" @@ -13,7 +14,10 @@ SUBDIR+= fdt .if ${MACHINE_CPUARCH} == "aarch64" || \ ${MACHINE_CPUARCH} == "amd64" || \ ${MACHINE_CPUARCH} == "arm" -SUBDIR+= loader boot1 +SUBDIR+= libefi loader boot1 .endif +.endif # ${COMPILER_TYPE} != "gcc" + .include + diff --git a/sys/boot/efi/boot1/Makefile b/sys/boot/efi/boot1/Makefile index 9a003ce17e85..64f397bb1412 100644 --- a/sys/boot/efi/boot1/Makefile +++ b/sys/boot/efi/boot1/Makefile @@ -4,9 +4,6 @@ MAN= .include -# In-tree GCC does not support __attribute__((ms_abi)). -.if ${COMPILER_TYPE} != "gcc" - MK_SSP= no PROG= boot1.sym @@ -96,8 +93,6 @@ boot1.efifat: boot1.efi CLEANFILES= boot1.efi boot1.efifat -.endif # ${COMPILER_TYPE} != "gcc" - .include beforedepend ${OBJS}: machine diff --git a/sys/boot/efi/loader/Makefile b/sys/boot/efi/loader/Makefile index 56f3cabf8367..59d32ff33b6c 100644 --- a/sys/boot/efi/loader/Makefile +++ b/sys/boot/efi/loader/Makefile @@ -4,9 +4,6 @@ MAN= .include -# In-tree GCC does not support __attribute__((ms_abi)). -.if ${COMPILER_TYPE} != "gcc" - MK_SSP= no PROG= loader.sym @@ -113,8 +110,6 @@ DPADD= ${LIBFICL} ${LIBEFI} ${LIBFDT} ${LIBEFI_FDT} ${LIBSTAND} \ ${LDSCRIPT} LDADD= ${LIBFICL} ${LIBEFI} ${LIBFDT} ${LIBEFI_FDT} ${LIBSTAND} -.endif # ${COMPILER_TYPE} != "gcc" - .include beforedepend ${OBJS}: machine From c455b924834e5f38a33343dc45b8763dcc36447e Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Thu, 14 Jan 2016 19:00:13 +0000 Subject: [PATCH 57/88] Set -mlong-calls where needed to get a static clang and lldb 3.8.0 linking. These are too large for a branch instruction to branch from an earlier point in the code to somewhere later. This will also allow these to be build with Thumb-2 when we get this infrastructure. Reviewed by: dim Differential Revision: https://reviews.freebsd.org/D4855 --- lib/clang/clang.lib.mk | 4 ++++ lib/libc++/Makefile | 3 +++ usr.bin/clang/clang/Makefile | 4 ++++ usr.bin/clang/lldb/Makefile | 3 +++ 4 files changed, 14 insertions(+) diff --git a/lib/clang/clang.lib.mk b/lib/clang/clang.lib.mk index 13afa85bf4cc..ab3551e2d4f7 100644 --- a/lib/clang/clang.lib.mk +++ b/lib/clang/clang.lib.mk @@ -6,4 +6,8 @@ LLVM_SRCS= ${.CURDIR}/../../../contrib/llvm INTERNALLIB= +.if ${MACHINE_CPUARCH} == "arm" +STATIC_CXXFLAGS+= -mlong-calls +.endif + .include diff --git a/lib/libc++/Makefile b/lib/libc++/Makefile index d81710ef5364..94e91b94b003 100644 --- a/lib/libc++/Makefile +++ b/lib/libc++/Makefile @@ -6,6 +6,9 @@ _LIBCXXRTDIR= ${.CURDIR}/../../contrib/libcxxrt HDRDIR= ${.CURDIR}/../../contrib/libc++/include SRCDIR= ${.CURDIR}/../../contrib/libc++/src CXXINCLUDEDIR= ${INCLUDEDIR}/c++/v${SHLIB_MAJOR} +.if ${MACHINE_CPUARCH} == "arm" +STATIC_CXXFLAGS+= -mlong-calls +.endif .PATH: ${SRCDIR} diff --git a/usr.bin/clang/clang/Makefile b/usr.bin/clang/clang/Makefile index 1489a5e2d327..92ed6e47be22 100644 --- a/usr.bin/clang/clang/Makefile +++ b/usr.bin/clang/clang/Makefile @@ -11,6 +11,10 @@ SRCS= cc1_main.cpp \ .if ${MK_SHARED_TOOLCHAIN} == "no" NO_SHARED?= yes + +.if ${MACHINE_CPUARCH} == "arm" +CFLAGS+= -mlong-calls +.endif .endif LINKS= ${BINDIR}/clang ${BINDIR}/clang++ \ diff --git a/usr.bin/clang/lldb/Makefile b/usr.bin/clang/lldb/Makefile index 1b014e6d0a0a..da997f746ffc 100644 --- a/usr.bin/clang/lldb/Makefile +++ b/usr.bin/clang/lldb/Makefile @@ -8,6 +8,9 @@ LLDB_SRCS=${.CURDIR}/../../../contrib/llvm/tools/lldb CFLAGS+= -I${LLDB_SRCS}/include CXXFLAGS+= -std=c++11 +.if ${MACHINE_CPUARCH} == "arm" +CFLAGS+= -mlong-calls +.endif SRCDIR= tools/lldb/tools/driver SRCS= Driver.cpp \ From 750e48d766aa0b2a368feefd39ed6d7d227ed617 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Thu, 14 Jan 2016 19:33:13 +0000 Subject: [PATCH 58/88] Fix the handling of the "PDC write transfer length" erratum for at91. The problem affects revision 1xx hardware as well as later versions. Also, the recommended workaround is to set the PDC count register for a 12-byte transfer when the actual size is less than that, but there is no need to extend or zero-out the data buffer, because the blklen register contains the real transfer size and only that many bytes will be transferred. Also add a sysctl to turn debugging printfs on or off on the fly. --- sys/arm/at91/at91_mci.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/sys/arm/at91/at91_mci.c b/sys/arm/at91/at91_mci.c index 8e55e02f6f1e..5bab815202a3 100644 --- a/sys/arm/at91/at91_mci.c +++ b/sys/arm/at91/at91_mci.c @@ -446,6 +446,9 @@ at91_mci_attach(device_t dev) CTLFLAG_RW, &sc->allow_overclock, 0, "Allow up to 30MHz clock for 25MHz request when next highest speed 15MHz or less."); + SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "debug", + CTLFLAG_RWTUN, &mci_debug, 0, "enable debug output"); + /* * Our real min freq is master_clock/512, but upper driver layers are * going to set the min speed during card discovery, and the right speed @@ -783,15 +786,6 @@ at91_mci_start_cmd(struct at91_mci_softc *sc, struct mmc_command *cmd) WR4(sc, PDC_PTCR, PDC_PTCR_RXTEN); } else { len = min(BBSIZE, remaining); - /* - * If this is MCI1 revision 2xx controller, apply - * a work-around for the "Data Write Operation and - * number of bytes" erratum. - */ - if ((sc->sc_cap & CAP_MCI1_REV2XX) && len < 12) { - len = 12; - memset(sc->bbuf_vaddr[0], 0, 12); - } at91_bswap_buf(sc, sc->bbuf_vaddr[0], data->data, len); err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0], sc->bbuf_vaddr[0], len, at91_mci_getaddr, @@ -800,8 +794,13 @@ at91_mci_start_cmd(struct at91_mci_softc *sc, struct mmc_command *cmd) panic("IO write dmamap_load failed\n"); bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0], BUS_DMASYNC_PREWRITE); + /* + * Erratum workaround: PDC transfer length on a write + * must not be smaller than 12 bytes (3 words); only + * blklen bytes (set above) are actually transferred. + */ WR4(sc, PDC_TPR,paddr); - WR4(sc, PDC_TCR, len / 4); + WR4(sc, PDC_TCR, (len < 12) ? 3 : len / 4); sc->bbuf_len[0] = len; remaining -= len; if (remaining == 0) { @@ -818,7 +817,7 @@ at91_mci_start_cmd(struct at91_mci_softc *sc, struct mmc_command *cmd) bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1], BUS_DMASYNC_PREWRITE); WR4(sc, PDC_TNPR, paddr); - WR4(sc, PDC_TNCR, len / 4); + WR4(sc, PDC_TNCR, (len < 12) ? 3 : len / 4); sc->bbuf_len[1] = len; remaining -= len; } From ee8ce60b6b5f6cf028438aae1d6e4f527867dfe3 Mon Sep 17 00:00:00 2001 From: Oleksandr Tymoshenko Date: Thu, 14 Jan 2016 20:25:22 +0000 Subject: [PATCH 59/88] Fix order of last two arguments of mtx_init Spotted by: jmcneill@NetBSD.org --- sys/contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c index e0cbb6622d3d..93aa6f8ba27a 100644 --- a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c +++ b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c @@ -173,7 +173,7 @@ bcm_vchiq_attach(device_t dev) return (ENXIO); } - mtx_init(&sc->lock, "vchiq", MTX_DEF, 0); + mtx_init(&sc->lock, "vchiq", 0, MTX_DEF); bcm_vchiq_sc = sc; vchiq_init(); From c85650cacc5f13ac226f750ca43cdb4466567773 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 14 Jan 2016 20:51:48 +0000 Subject: [PATCH 60/88] Rename aiod_bio taskqueue to aiod_kick. This taskqueue is not used to handle bio requests. It is only used to run aio_kick_nowait() to spin up new aio daemon processes. Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D4904 --- sys/kern/vfs_aio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 122dff702045..19db3ba67bad 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -392,7 +392,7 @@ static struct filterops lio_filtops = { static eventhandler_tag exit_tag, exec_tag; -TASKQUEUE_DEFINE_THREAD(aiod_bio); +TASKQUEUE_DEFINE_THREAD(aiod_kick); /* * Main operations function for use as a kernel module. @@ -555,7 +555,7 @@ aio_unload(void) return error; async_io_version = 0; aio_swake = NULL; - taskqueue_free(taskqueue_aiod_bio); + taskqueue_free(taskqueue_aiod_kick); delete_unrhdr(aiod_unr); uma_zdestroy(kaio_zone); uma_zdestroy(aiop_zone); @@ -802,7 +802,7 @@ aio_proc_rundown(void *arg, struct proc *p) } } AIO_UNLOCK(ki); - taskqueue_drain(taskqueue_aiod_bio, &ki->kaio_task); + taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_task); mtx_destroy(&ki->kaio_mtx); uma_zfree(kaio_zone, ki); p->p_aioinfo = NULL; @@ -1861,7 +1861,7 @@ aio_kick_nowait(struct proc *userp) } else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) && ((ki->kaio_active_count + num_aio_resv_start) < ki->kaio_maxactive_count)) { - taskqueue_enqueue(taskqueue_aiod_bio, &ki->kaio_task); + taskqueue_enqueue(taskqueue_aiod_kick, &ki->kaio_task); } } From 492e9ee5a9e181c978edfc45ef9efb10ae22bb2b Mon Sep 17 00:00:00 2001 From: "Jonathan T. Looney" Date: Thu, 14 Jan 2016 21:08:23 +0000 Subject: [PATCH 61/88] Improvements to the MDXFileChunk() template function: - Remove unneeded fstat()/lseek() calls. - Return NULL and set errno to EINVAL on negative length. - Fix small style problems and expand variable names. After this change, it is possible to use this code for some irregular files. For example, 'md5 /dev/md0' should now succeed. Differential Revision: https://reviews.freebsd.org/D4748 Suggested by: bde Reviewed by: bde, allanjude, delphij --- lib/libmd/mdXhl.c | 60 ++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/lib/libmd/mdXhl.c b/lib/libmd/mdXhl.c index 378d8adfafc0..6ed214bce844 100644 --- a/lib/libmd/mdXhl.c +++ b/lib/libmd/mdXhl.c @@ -1,4 +1,5 @@ -/* mdXhl.c * ---------------------------------------------------------------------------- +/* mdXhl.c + * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think @@ -52,43 +53,44 @@ MDXFileChunk(const char *filename, char *buf, off_t ofs, off_t len) unsigned char buffer[16*1024]; MDX_CTX ctx; struct stat stbuf; - int f, i, e; - off_t n; + int fd, readrv, e; + off_t remain; + + if (len < 0) { + errno = EINVAL; + return NULL; + } MDXInit(&ctx); - f = open(filename, O_RDONLY); - if (f < 0) - return 0; - if (fstat(f, &stbuf) < 0) { - i = -1; - goto error; + fd = open(filename, O_RDONLY); + if (fd < 0) + return NULL; + if (ofs != 0) { + errno = 0; + if (lseek(fd, ofs, SEEK_SET) != ofs || + (ofs == -1 && errno != 0)) { + readrv = -1; + goto error; + } } - if (ofs > stbuf.st_size) - ofs = stbuf.st_size; - if ((len == 0) || (len > stbuf.st_size - ofs)) - len = stbuf.st_size - ofs; - if (lseek(f, ofs, SEEK_SET) < 0) { - i = -1; - goto error; - } - n = len; - i = 0; - while (n > 0) { - if (n > sizeof(buffer)) - i = read(f, buffer, sizeof(buffer)); + remain = len; + readrv = 0; + while (len == 0 || remain > 0) { + if (len == 0 || remain > sizeof(buffer)) + readrv = read(fd, buffer, sizeof(buffer)); else - i = read(f, buffer, n); - if (i <= 0) + readrv = read(fd, buffer, remain); + if (readrv <= 0) break; - MDXUpdate(&ctx, buffer, i); - n -= i; + MDXUpdate(&ctx, buffer, readrv); + remain -= readrv; } error: e = errno; - close(f); + close(fd); errno = e; - if (i < 0) - return 0; + if (readrv < 0) + return NULL; return (MDXEnd(&ctx, buf)); } From 6c8fd022838eaceb5dbfae7a671d6d226f6d4879 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 14 Jan 2016 21:28:56 +0000 Subject: [PATCH 62/88] Remove aiod_timeout. It hasn't been used since the AIO code was made MPSAFE 10 years ago. Reviewed by: kib Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D4946 --- sys/kern/vfs_aio.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 19db3ba67bad..e5081056a40a 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -117,10 +117,6 @@ static uint64_t jobseqno; #define MAX_BUF_AIO 16 #endif -#ifndef AIOD_TIMEOUT_DEFAULT -#define AIOD_TIMEOUT_DEFAULT (10 * hz) -#endif - #ifndef AIOD_LIFETIME_DEFAULT #define AIOD_LIFETIME_DEFAULT (30 * hz) #endif @@ -165,10 +161,6 @@ SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, CTLFLAG_RD, &num_buf_aio, 0, /* XXX This should be local to aio_aqueue() */ static int num_aio_resv_start = 0; -static int aiod_timeout; -SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_timeout, CTLFLAG_RW, &aiod_timeout, 0, - "Timeout value for synchronous aio operations"); - static int aiod_lifetime; SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, CTLFLAG_RW, &aiod_lifetime, 0, "Maximum lifetime for idle aiod"); @@ -504,7 +496,6 @@ aio_onceonly(void) NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiolio_zone = uma_zcreate("AIOLIO", sizeof(struct aioliojob), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); - aiod_timeout = AIOD_TIMEOUT_DEFAULT; aiod_lifetime = AIOD_LIFETIME_DEFAULT; jobrefid = 1; async_io_version = _POSIX_VERSION; From 276a15676c7975d17b6410fbd168566b3fdfea2d Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Thu, 14 Jan 2016 21:31:26 +0000 Subject: [PATCH 63/88] Prevent bogus compiler in ZFS boot code Silence a bogus compiler warning about indexing past the end of dn_bonus. The ZFS code ensures this is not possible but the compiler can't determine this so added an additional check to prevent this warning. Sponsored by: Multiplay --- sys/boot/zfs/zfsimpl.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sys/boot/zfs/zfsimpl.c b/sys/boot/zfs/zfsimpl.c index 927fbadd8021..aa1a789b6c33 100644 --- a/sys/boot/zfs/zfsimpl.c +++ b/sys/boot/zfs/zfsimpl.c @@ -2165,7 +2165,13 @@ zfs_lookup(const struct zfsmount *mount, const char *upath, dnode_phys_t *dnode) strcpy(&path[sb.st_size], p); else path[sb.st_size] = 0; - if (sb.st_size + sizeof(znode_phys_t) <= dn.dn_bonuslen) { + /* + * Second test is purely to silence bogus compiler + * warning about accessing past the end of dn_bonus. + */ + if (sb.st_size + sizeof(znode_phys_t) <= + dn.dn_bonuslen && sizeof(znode_phys_t) <= + sizeof(dn.dn_bonus)) { memcpy(path, &dn.dn_bonus[sizeof(znode_phys_t)], sb.st_size); } else { From bff57f705387b2afcec5542484ffccd2b3b7c2ab Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Thu, 14 Jan 2016 21:39:10 +0000 Subject: [PATCH 64/88] Remove unused reg param from fdt_fixup_memory MFC after: 2 weeks X-MFC-With: r293268 Sponsored by: Multiplay --- sys/boot/fdt/fdt_loader_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/boot/fdt/fdt_loader_cmd.c b/sys/boot/fdt/fdt_loader_cmd.c index 0ce8f153a80a..a1eaf9d8bbeb 100644 --- a/sys/boot/fdt/fdt_loader_cmd.c +++ b/sys/boot/fdt/fdt_loader_cmd.c @@ -464,7 +464,7 @@ fdt_fixup_memory(struct fdt_mem_region *region, size_t num) { struct fdt_mem_region *curmr; uint32_t addr_cells, size_cells; - uint32_t *addr_cellsp, *reg, *size_cellsp; + uint32_t *addr_cellsp, *size_cellsp; int err, i, len, memory, root; size_t realmrno; uint8_t *buf, *sb; From cb03a5029b2dad93d032b3ed7e9e845c56c4924f Mon Sep 17 00:00:00 2001 From: Ravi Pokala Date: Thu, 14 Jan 2016 21:52:21 +0000 Subject: [PATCH 65/88] Add rotationrate to geom disk dumpconf Parse and report the nominal rotation rate reported by the drive. Reviewed by: sbruno, jhb Approved by: jhb MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D4483 Requested by: Kevin Bowling < kevin.bowling @ kev009.com > --- sys/geom/geom_disk.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c index 1943430985ff..afd1fdef7a81 100644 --- a/sys/geom/geom_disk.c +++ b/sys/geom/geom_disk.c @@ -549,6 +549,23 @@ g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g indent, dp->d_fwheads); sbuf_printf(sb, "%s%u\n", indent, dp->d_fwsectors); + + /* + * "rotationrate" is a little complicated, because the value + * returned by the drive might not be the RPM; 0 and 1 are + * special cases, and there's also a valid range. + */ + sbuf_printf(sb, "%s", indent); + if (dp->d_rotation_rate == 0) /* Old drives don't */ + sbuf_printf(sb, "unknown"); /* report RPM. */ + else if (dp->d_rotation_rate == 1) /* Since 0 is used */ + sbuf_printf(sb, "0"); /* above, SSDs use 1. */ + else if ((dp->d_rotation_rate >= 0x041) && + (dp->d_rotation_rate <= 0xfffe)) + sbuf_printf(sb, "%u", dp->d_rotation_rate); + else + sbuf_printf(sb, "invalid"); + sbuf_printf(sb, "\n"); if (dp->d_getattr != NULL) { buf = g_malloc(DISK_IDENT_SIZE, M_WAITOK); bp = g_alloc_bio(); From e83ddf77d470b0e2877045ccfa12280b4482ad93 Mon Sep 17 00:00:00 2001 From: Juli Mallett Date: Thu, 14 Jan 2016 22:07:35 +0000 Subject: [PATCH 66/88] Make it possible to specify the path to userboot.so with the -b flag. Reviewed by: neel --- sys/boot/userboot/test/test.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sys/boot/userboot/test/test.c b/sys/boot/userboot/test/test.c index d7ec3e4b5e99..d5707de216a8 100644 --- a/sys/boot/userboot/test/test.c +++ b/sys/boot/userboot/test/test.c @@ -414,7 +414,7 @@ void usage() { - printf("usage: [-d ] [-h \n"); + printf("usage: [-b ] [-d ] [-h \n"); exit(1); } @@ -425,9 +425,14 @@ main(int argc, char** argv) void (*func)(struct loader_callbacks *, void *, int, int); int opt; char *disk_image = NULL; + const char *userboot_obj = "/boot/userboot.so"; - while ((opt = getopt(argc, argv, "d:h:")) != -1) { + while ((opt = getopt(argc, argv, "b:d:h:")) != -1) { switch (opt) { + case 'b': + userboot_obj = optarg; + break; + case 'd': disk_image = optarg; break; @@ -441,8 +446,7 @@ main(int argc, char** argv) } } - h = dlopen("/boot/userboot.so", - RTLD_LOCAL); + h = dlopen(userboot_obj, RTLD_LOCAL); if (!h) { printf("%s\n", dlerror()); return (1); From 1026c03c2854427858e94b89c4c8559371fbc5c6 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Thu, 14 Jan 2016 22:40:46 +0000 Subject: [PATCH 67/88] Fix OpenSSH client information leak. Security: SA-16:07.openssh Security: CVE-2016-0777 --- crypto/openssh/readconf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/openssh/readconf.c b/crypto/openssh/readconf.c index 700a79b50ce9..9cf6ab9bae8e 100644 --- a/crypto/openssh/readconf.c +++ b/crypto/openssh/readconf.c @@ -1610,7 +1610,7 @@ initialize_options(Options * options) options->tun_remote = -1; options->local_command = NULL; options->permit_local_command = -1; - options->use_roaming = -1; + options->use_roaming = 0; options->visual_host_key = -1; options->ip_qos_interactive = -1; options->ip_qos_bulk = -1; @@ -1788,8 +1788,7 @@ fill_default_options(Options * options) options->tun_remote = SSH_TUNID_ANY; if (options->permit_local_command == -1) options->permit_local_command = 0; - if (options->use_roaming == -1) - options->use_roaming = 1; + options->use_roaming = 0; if (options->visual_host_key == -1) options->visual_host_key = 0; if (options->ip_qos_interactive == -1) From b16ddb398952b95858aef1bbc621e6e2badf3602 Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Thu, 14 Jan 2016 23:22:43 +0000 Subject: [PATCH 68/88] Adjust VM_MAX_KERNEL_ADDRESS to the max address, not the minimum next. VM_MAX_KERNEL_ADDERESS is the maximum KVA address. 0xf8000000 is the start of device mapping space. Since several conditional checks use '<=' against VM_MAX_KERNEL_ADDRESS, bad things could feasibly happen. --- sys/powerpc/include/vmparam.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/powerpc/include/vmparam.h b/sys/powerpc/include/vmparam.h index 5b808abc1fee..1d7a06e5e6f8 100644 --- a/sys/powerpc/include/vmparam.h +++ b/sys/powerpc/include/vmparam.h @@ -111,7 +111,7 @@ #define KERNBASE 0xc0000000 /* start of kernel virtual */ #define VM_MIN_KERNEL_ADDRESS KERNBASE -#define VM_MAX_KERNEL_ADDRESS 0xf8000000 +#define VM_MAX_KERNEL_ADDRESS 0xf7ffffff #define VM_MAX_SAFE_KERNEL_ADDRESS VM_MAX_KERNEL_ADDRESS #endif /* AIM/E500 */ @@ -129,7 +129,7 @@ struct pmap_physseg { * The physical address space is densely populated on 32-bit systems, * but may not be on 64-bit ones. */ -#ifdef __powerpc64__ +#ifdef __powerpc__ #define VM_PHYSSEG_SPARSE #else #define VM_PHYSSEG_DENSE From c7e732ae61ef0abed9f1d2167e1ffdb7d06f4829 Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Fri, 15 Jan 2016 00:26:15 +0000 Subject: [PATCH 69/88] Fix a bug in INIT handling on accepted 1-to-1 style sockets when the listener is closed. This fix allows the following packetdrill test to pass: // Setup a connected, blocking 1-to-1 style socket +0.0 socket(..., SOCK_STREAM, IPPROTO_SCTP) = 3 // Check the handshake with en empty(!) cookie +0.0 bind(3, ..., ...) = 0 +0.0 listen(3, 1) = 0 +0.0 < sctp: INIT[flgs=0, tag=1, a_rwnd=1500, os=1, is=1, tsn=1] +0.0 > sctp: INIT_ACK[flgs=0, tag=2, a_rwnd=..., os=..., is=..., tsn=1, ...] +0.0 < sctp: COOKIE_ECHO[flgs=0, len=..., val=...] +0.0 > sctp: COOKIE_ACK[flgs=0] +0.0 accept(3, ..., ...) = 4 +0.0 close(3) = 0 // Inject an INIT chunk and expect an INIT-ACK +0.0 < sctp: INIT[flgs=0, tag=3, a_rwnd=1500, os=1, is=1, tsn=1] +0.0 > sctp: INIT_ACK[flgs=0, tag=..., a_rwnd=..., os=..., is=..., tsn=..., ...] MFC after: 3 days --- sys/netinet/sctp_pcb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index fc3be4c955dc..d4a9dff0bb22 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -2269,8 +2269,12 @@ sctp_findassociation_addr(struct mbuf *m, int offset, } } find_tcp_pool = 0; - if ((ch->chunk_type != SCTP_INITIATION) && - (ch->chunk_type != SCTP_INITIATION_ACK) && + /* + * Don't consider INIT chunks since that breaks 1-to-1 sockets: When + * a server closes the listener, incoming INIT chunks are not + * responsed by an INIT-ACK chunk. + */ + if ((ch->chunk_type != SCTP_INITIATION_ACK) && (ch->chunk_type != SCTP_COOKIE_ACK) && (ch->chunk_type != SCTP_COOKIE_ECHO)) { /* Other chunk types go to the tcp pool. */ From dc9ba0270bf3ef2c3de5fbd3cf427ffbbabf7428 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Fri, 15 Jan 2016 00:55:36 +0000 Subject: [PATCH 70/88] Make common boot file_loadraw name parameter const Fix compiler warnings about dropping const qualifier by changing file_loadraw name param to const, and updating method to make that the case (it was abusing the variable). MFC after: 2 weeks X-MFC-With: r293268 Sponsored by: Multiplay --- sys/boot/common/bootstrap.h | 2 +- sys/boot/common/module.c | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/sys/boot/common/bootstrap.h b/sys/boot/common/bootstrap.h index 78d742d62718..7efc2c2ddc68 100644 --- a/sys/boot/common/bootstrap.h +++ b/sys/boot/common/bootstrap.h @@ -237,7 +237,7 @@ void unload(void); struct preloaded_file *file_alloc(void); struct preloaded_file *file_findfile(const char *name, const char *type); struct file_metadata *file_findmetadata(struct preloaded_file *fp, int type); -struct preloaded_file *file_loadraw(char *name, char *type, int insert); +struct preloaded_file *file_loadraw(const char *name, char *type, int insert); void file_discard(struct preloaded_file *fp); void file_addmetadata(struct preloaded_file *fp, int type, size_t size, void *p); int file_addmodule(struct preloaded_file *fp, char *modname, int version, diff --git a/sys/boot/common/module.c b/sys/boot/common/module.c index d73f1c8d0fb9..923da5b4a795 100644 --- a/sys/boot/common/module.c +++ b/sys/boot/common/module.c @@ -388,14 +388,14 @@ file_load_dependencies(struct preloaded_file *base_file) } /* - * We've been asked to load (name) as (type), so just suck it in, + * We've been asked to load (fname) as (type), so just suck it in, * no arguments or anything. */ struct preloaded_file * -file_loadraw(char *name, char *type, int insert) +file_loadraw(const char *fname, char *type, int insert) { struct preloaded_file *fp; - char *cp; + char *name; int fd, got; vm_offset_t laddr; @@ -406,12 +406,11 @@ file_loadraw(char *name, char *type, int insert) } /* locate the file on the load path */ - cp = file_search(name, NULL); - if (cp == NULL) { - sprintf(command_errbuf, "can't find '%s'", name); + name = file_search(fname, NULL); + if (name == NULL) { + sprintf(command_errbuf, "can't find '%s'", fname); return(NULL); } - name = cp; if ((fd = open(name, O_RDONLY)) < 0) { sprintf(command_errbuf, "can't open '%s': %s", name, strerror(errno)); From 108d68faae30a091d00f2ef97190c463acfbef10 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Fri, 15 Jan 2016 01:06:37 +0000 Subject: [PATCH 71/88] Ensure boot fsread correctly probes all partitions The boot code fsread was caching the result of meta data request and reusing it even for calls with inode = 0, which is used to partitions trigger a probe. The result was that success was incorrectly returned for all partition probes after the first valid success, even for partitions which are not UFS. MFC after: 2 weeks X-MFC-With: r293268 Sponsored by: Multiplay --- sys/boot/common/ufsread.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sys/boot/common/ufsread.c b/sys/boot/common/ufsread.c index acff1e56b84e..08ab697d881b 100644 --- a/sys/boot/common/ufsread.c +++ b/sys/boot/common/ufsread.c @@ -187,8 +187,15 @@ fsread(ufs_ino_t inode, void *buf, size_t nbyte) blkbuf = dmadat->blkbuf; indbuf = dmadat->indbuf; - if (!dsk_meta) { + + /* + * Force probe if inode is zero to ensure we have a valid fs, otherwise + * when probing multiple paritions, reads from subsequent parititions + * will incorrectly succeed. + */ + if (!dsk_meta || inode == 0) { inomap = 0; + dsk_meta = 0; for (n = 0; sblock_try[n] != -1; n++) { if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE, SBLOCKSIZE / DEV_BSIZE)) From b2ec7c304ef4f5898b18d006c22cc010e18457f4 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Fri, 15 Jan 2016 01:22:36 +0000 Subject: [PATCH 72/88] Modularise EFI boot loader Make EFI boot loader modular in preparation for adding ZFS support. This is a partial commit of the D4515. Submitted by: Eric McCorkle Reviewed by: emaste (in part) MFC after: 2 weeks X-MFC-With: r293268 Sponsored by: Multiplay Differential Revision: https://reviews.freebsd.org/D4515 --- sys/boot/efi/boot1/Makefile | 5 +- sys/boot/efi/boot1/boot1.c | 430 +++++++++++++++++-------------- sys/boot/efi/boot1/boot_module.h | 110 ++++++++ sys/boot/efi/boot1/ufs_module.c | 253 ++++++++++++++++++ sys/boot/efi/include/efilib.h | 1 - sys/boot/efi/loader/devicename.c | 37 +-- sys/boot/efi/loader/main.c | 63 +++-- 7 files changed, 668 insertions(+), 231 deletions(-) create mode 100644 sys/boot/efi/boot1/boot_module.h create mode 100644 sys/boot/efi/boot1/ufs_module.c diff --git a/sys/boot/efi/boot1/Makefile b/sys/boot/efi/boot1/Makefile index 64f397bb1412..c05be8dd82e8 100644 --- a/sys/boot/efi/boot1/Makefile +++ b/sys/boot/efi/boot1/Makefile @@ -2,7 +2,7 @@ MAN= -.include +.include MK_SSP= no @@ -11,13 +11,14 @@ INTERNALPROG= WARNS?= 6 # architecture-specific loader code -SRCS= boot1.c self_reloc.c start.S +SRCS= boot1.c self_reloc.c start.S ufs_module.c CFLAGS+= -I. CFLAGS+= -I${.CURDIR}/../include CFLAGS+= -I${.CURDIR}/../include/${MACHINE} CFLAGS+= -I${.CURDIR}/../../../contrib/dev/acpica/include CFLAGS+= -I${.CURDIR}/../../.. +CFLAGS+= -DEFI_UFS_BOOT # Always add MI sources and REGULAR efi loader bits .PATH: ${.CURDIR}/../loader/arch/${MACHINE} diff --git a/sys/boot/efi/boot1/boot1.c b/sys/boot/efi/boot1/boot1.c index 2b000e066d8e..6c85ea220f80 100644 --- a/sys/boot/efi/boot1/boot1.c +++ b/sys/boot/efi/boot1/boot1.c @@ -5,6 +5,8 @@ * All rights reserved. * Copyright (c) 2014 Nathan Whitehorn * All rights reserved. + * Copyright (c) 2015 Eric McCorkle + * All rights reserved. * * Redistribution and use in source and binary forms are freely * permitted provided that the above copyright notice and this @@ -21,7 +23,6 @@ __FBSDID("$FreeBSD$"); #include -#include #include #include #include @@ -29,19 +30,29 @@ __FBSDID("$FreeBSD$"); #include #include +#include "boot_module.h" + #define _PATH_LOADER "/boot/loader.efi" -#define _PATH_KERNEL "/boot/kernel/kernel" -#define BSIZEMAX 16384 +static const boot_module_t *boot_modules[] = +{ +#ifdef EFI_UFS_BOOT + &ufs_module +#endif +}; + +#define NUM_BOOT_MODULES (sizeof(boot_modules) / sizeof(boot_module_t*)) +/* The initial number of handles used to query EFI for partitions. */ +#define NUM_HANDLES_INIT 24 -void panic(const char *fmt, ...) __dead2; void putchar(int c); EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab); -static int domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet); -static void load(const char *fname); +static void try_load(const boot_module_t* mod); +static EFI_STATUS probe_handle(EFI_HANDLE h); -static EFI_SYSTEM_TABLE *systab; +EFI_SYSTEM_TABLE *systab; +EFI_BOOT_SERVICES *bs; static EFI_HANDLE *image; static EFI_GUID BlockIoProtocolGUID = BLOCK_IO_PROTOCOL; @@ -49,27 +60,92 @@ static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL; static EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL; static EFI_GUID ConsoleControlGUID = EFI_CONSOLE_CONTROL_PROTOCOL_GUID; -static EFI_BLOCK_IO *bootdev; -static EFI_DEVICE_PATH *bootdevpath; -static EFI_HANDLE *bootdevhandle; - -EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab) +/* + * Provide Malloc / Free backed by EFIs AllocatePool / FreePool which ensures + * memory is correctly aligned avoiding EFI_INVALID_PARAMETER returns from + * EFI methods. + */ +void * +Malloc(size_t len, const char *file __unused, int line __unused) { - EFI_HANDLE handles[128]; - EFI_BLOCK_IO *blkio; - UINTN i, nparts = sizeof(handles), cols, rows, max_dim, best_mode; + void *out; + + if (bs->AllocatePool(EfiLoaderData, len, &out) == EFI_SUCCESS) + return (out); + + return (NULL); +} + +void +Free(void *buf, const char *file __unused, int line __unused) +{ + (void)bs->FreePool(buf); +} + +/* + * This function only returns if it fails to load the kernel. If it + * succeeds, it simply boots the kernel. + */ +void +try_load(const boot_module_t *mod) +{ + size_t bufsize; + void *buf; + dev_info_t *dev; + EFI_HANDLE loaderhandle; + EFI_LOADED_IMAGE *loaded_image; + EFI_STATUS status; + + status = mod->load(_PATH_LOADER, &dev, &buf, &bufsize); + if (status == EFI_NOT_FOUND) + return; + + if (status != EFI_SUCCESS) { + printf("%s failed to load %s (%lu)\n", mod->name, _PATH_LOADER, + EFI_ERROR_CODE(status)); + return; + } + + if ((status = bs->LoadImage(TRUE, image, dev->devpath, buf, bufsize, + &loaderhandle)) != EFI_SUCCESS) { + printf("Failed to load image provided by %s, size: %zu, (%lu)\n", + mod->name, bufsize, EFI_ERROR_CODE(status)); + return; + } + + if ((status = bs->HandleProtocol(loaderhandle, &LoadedImageGUID, + (VOID**)&loaded_image)) != EFI_SUCCESS) { + printf("Failed to query LoadedImage provided by %s (%lu)\n", + mod->name, EFI_ERROR_CODE(status)); + return; + } + + loaded_image->DeviceHandle = dev->devhandle; + + if ((status = bs->StartImage(loaderhandle, NULL, NULL)) != + EFI_SUCCESS) { + printf("Failed start image provided by %s (%lu)\n", mod->name, + EFI_ERROR_CODE(status)); + return; + } +} + +EFI_STATUS +efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE *Xsystab) +{ + EFI_HANDLE *handles; EFI_STATUS status; - EFI_DEVICE_PATH *devpath; - EFI_BOOT_SERVICES *BS; EFI_CONSOLE_CONTROL_PROTOCOL *ConsoleControl = NULL; SIMPLE_TEXT_OUTPUT_INTERFACE *conout = NULL; - const char *path = _PATH_LOADER; + UINTN i, max_dim, best_mode, cols, rows, hsize, nhandles; + /* Basic initialization*/ systab = Xsystab; image = Ximage; + bs = Xsystab->BootServices; - BS = systab->BootServices; - status = BS->LocateProtocol(&ConsoleControlGUID, NULL, + /* Set up the console, so printf works. */ + status = bs->LocateProtocol(&ConsoleControlGUID, NULL, (VOID **)&ConsoleControl); if (status == EFI_SUCCESS) (void)ConsoleControl->SetMode(ConsoleControl, @@ -94,200 +170,162 @@ EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab) conout->EnableCursor(conout, TRUE); conout->ClearScreen(conout); - printf("\n" - ">> FreeBSD EFI boot block\n"); - printf(" Loader path: %s\n", path); - - status = systab->BootServices->LocateHandle(ByProtocol, - &BlockIoProtocolGUID, NULL, &nparts, handles); - nparts /= sizeof(handles[0]); - - for (i = 0; i < nparts; i++) { - status = systab->BootServices->HandleProtocol(handles[i], - &DevicePathGUID, (void **)&devpath); - if (EFI_ERROR(status)) + printf("\n>> FreeBSD EFI boot block\n"); + printf(" Loader path: %s\n\n", _PATH_LOADER); + printf(" Initializing modules:"); + for (i = 0; i < NUM_BOOT_MODULES; i++) { + if (boot_modules[i] == NULL) continue; - while (!IsDevicePathEnd(NextDevicePathNode(devpath))) - devpath = NextDevicePathNode(devpath); + printf(" %s", boot_modules[i]->name); + if (boot_modules[i]->init != NULL) + boot_modules[i]->init(); + } + putchar('\n'); - status = systab->BootServices->HandleProtocol(handles[i], - &BlockIoProtocolGUID, (void **)&blkio); - if (EFI_ERROR(status)) - continue; + /* Get all the device handles */ + hsize = (UINTN)NUM_HANDLES_INIT * sizeof(EFI_HANDLE); + if ((status = bs->AllocatePool(EfiLoaderData, hsize, (void **)&handles)) + != EFI_SUCCESS) + panic("Failed to allocate %d handles (%lu)", NUM_HANDLES_INIT, + EFI_ERROR_CODE(status)); - if (!blkio->Media->LogicalPartition) - continue; + status = bs->LocateHandle(ByProtocol, &BlockIoProtocolGUID, NULL, + &hsize, handles); + switch (status) { + case EFI_SUCCESS: + break; + case EFI_BUFFER_TOO_SMALL: + (void)bs->FreePool(handles); + if ((status = bs->AllocatePool(EfiLoaderData, hsize, + (void **)&handles) != EFI_SUCCESS)) { + panic("Failed to allocate %zu handles (%lu)", hsize / + sizeof(*handles), EFI_ERROR_CODE(status)); + } + status = bs->LocateHandle(ByProtocol, &BlockIoProtocolGUID, + NULL, &hsize, handles); + if (status != EFI_SUCCESS) + panic("Failed to get device handles (%lu)\n", + EFI_ERROR_CODE(status)); + break; + default: + panic("Failed to get device handles (%lu)", + EFI_ERROR_CODE(status)); + } - if (domount(devpath, blkio, 1) >= 0) + /* Scan all partitions, probing with all modules. */ + nhandles = hsize / sizeof(*handles); + printf(" Probing %zu block devices...", nhandles); + for (i = 0; i < nhandles; i++) { + status = probe_handle(handles[i]); + switch (status) { + case EFI_UNSUPPORTED: + printf("."); + break; + case EFI_SUCCESS: + printf("+"); + break; + default: + printf("x"); break; - } - - if (i == nparts) - panic("No bootable partition found"); - - bootdevhandle = handles[i]; - load(path); - - panic("Load failed"); - - return EFI_SUCCESS; -} - -static int -dskread(void *buf, u_int64_t lba, int nblk) -{ - EFI_STATUS status; - int size; - - lba = lba / (bootdev->Media->BlockSize / DEV_BSIZE); - size = nblk * DEV_BSIZE; - status = bootdev->ReadBlocks(bootdev, bootdev->Media->MediaId, lba, - size, buf); - - if (EFI_ERROR(status)) - return (-1); - - return (0); -} - -#include "ufsread.c" - -static ssize_t -fsstat(ufs_ino_t inode) -{ -#ifndef UFS2_ONLY - static struct ufs1_dinode dp1; -#endif -#ifndef UFS1_ONLY - static struct ufs2_dinode dp2; -#endif - static struct fs fs; - static ufs_ino_t inomap; - char *blkbuf; - void *indbuf; - size_t n, size; - static ufs2_daddr_t blkmap, indmap; - - blkbuf = dmadat->blkbuf; - indbuf = dmadat->indbuf; - if (!dsk_meta) { - inomap = 0; - for (n = 0; sblock_try[n] != -1; n++) { - if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE, - SBLOCKSIZE / DEV_BSIZE)) - return -1; - memcpy(&fs, dmadat->sbbuf, sizeof(struct fs)); - if (( -#if defined(UFS1_ONLY) - fs.fs_magic == FS_UFS1_MAGIC -#elif defined(UFS2_ONLY) - (fs.fs_magic == FS_UFS2_MAGIC && - fs.fs_sblockloc == sblock_try[n]) -#else - fs.fs_magic == FS_UFS1_MAGIC || - (fs.fs_magic == FS_UFS2_MAGIC && - fs.fs_sblockloc == sblock_try[n]) -#endif - ) && - fs.fs_bsize <= MAXBSIZE && - fs.fs_bsize >= (int32_t)sizeof(struct fs)) - break; } - if (sblock_try[n] == -1) { - return -1; + } + printf(" done\n"); + + /* Status summary. */ + for (i = 0; i < NUM_BOOT_MODULES; i++) { + if (boot_modules[i] != NULL) { + printf(" "); + boot_modules[i]->status(); } - dsk_meta++; - } else - memcpy(&fs, dmadat->sbbuf, sizeof(struct fs)); - if (!inode) - return 0; - if (inomap != inode) { - n = IPERVBLK(&fs); - if (dskread(blkbuf, INO_TO_VBA(&fs, n, inode), DBPERVBLK)) - return -1; - n = INO_TO_VBO(n, inode); -#if defined(UFS1_ONLY) - memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n, - sizeof(struct ufs1_dinode)); -#elif defined(UFS2_ONLY) - memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n, - sizeof(struct ufs2_dinode)); -#else - if (fs.fs_magic == FS_UFS1_MAGIC) - memcpy(&dp1, (struct ufs1_dinode *)(void *)blkbuf + n, - sizeof(struct ufs1_dinode)); - else - memcpy(&dp2, (struct ufs2_dinode *)(void *)blkbuf + n, - sizeof(struct ufs2_dinode)); -#endif - inomap = inode; - fs_off = 0; - blkmap = indmap = 0; } - size = DIP(di_size); - n = size - fs_off; - return (n); + + /* Select a partition to boot by trying each module in order. */ + for (i = 0; i < NUM_BOOT_MODULES; i++) + if (boot_modules[i] != NULL) + try_load(boot_modules[i]); + + /* If we get here, we're out of luck... */ + panic("No bootable partitions found!"); } -static struct dmadat __dmadat; - -static int -domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet) +static EFI_STATUS +probe_handle(EFI_HANDLE h) { - - dmadat = &__dmadat; - bootdev = blkio; - bootdevpath = device; - if (fsread(0, NULL, 0)) { - if (!quiet) - printf("domount: can't read superblock\n"); - return (-1); - } - if (!quiet) - printf("Succesfully mounted UFS filesystem\n"); - return (0); -} - -static void -load(const char *fname) -{ - ufs_ino_t ino; + dev_info_t *devinfo; + EFI_BLOCK_IO *blkio; + EFI_DEVICE_PATH *devpath; EFI_STATUS status; - EFI_HANDLE loaderhandle; - EFI_LOADED_IMAGE *loaded_image; - void *buffer; - size_t bufsize; + UINTN i; - if ((ino = lookup(fname)) == 0) { - printf("File %s not found\n", fname); + /* Figure out if we're dealing with an actual partition. */ + status = bs->HandleProtocol(h, &DevicePathGUID, (void **)&devpath); + if (status == EFI_UNSUPPORTED) + return (status); + + if (status != EFI_SUCCESS) { + DPRINTF("\nFailed to query DevicePath (%lu)\n", + EFI_ERROR_CODE(status)); + return (status); + } + + while (!IsDevicePathEnd(NextDevicePathNode(devpath))) + devpath = NextDevicePathNode(devpath); + + status = bs->HandleProtocol(h, &BlockIoProtocolGUID, (void **)&blkio); + if (status == EFI_UNSUPPORTED) + return (status); + + if (status != EFI_SUCCESS) { + DPRINTF("\nFailed to query BlockIoProtocol (%lu)\n", + EFI_ERROR_CODE(status)); + return (status); + } + + if (!blkio->Media->LogicalPartition) + return (EFI_UNSUPPORTED); + + /* Run through each module, see if it can load this partition */ + for (i = 0; i < NUM_BOOT_MODULES; i++) { + if (boot_modules[i] == NULL) + continue; + + if ((status = bs->AllocatePool(EfiLoaderData, + sizeof(*devinfo), (void **)&devinfo)) != + EFI_SUCCESS) { + DPRINTF("\nFailed to allocate devinfo (%lu)\n", + EFI_ERROR_CODE(status)); + continue; + } + devinfo->dev = blkio; + devinfo->devpath = devpath; + devinfo->devhandle = h; + devinfo->devdata = NULL; + devinfo->next = NULL; + + status = boot_modules[i]->probe(devinfo); + if (status == EFI_SUCCESS) + return (EFI_SUCCESS); + (void)bs->FreePool(devinfo); + } + + return (EFI_UNSUPPORTED); +} + +void +add_device(dev_info_t **devinfop, dev_info_t *devinfo) +{ + dev_info_t *dev; + + if (*devinfop == NULL) { + *devinfop = devinfo; return; } - bufsize = fsstat(ino); - status = systab->BootServices->AllocatePool(EfiLoaderData, - bufsize, &buffer); - fsread(ino, buffer, bufsize); + for (dev = *devinfop; dev->next != NULL; dev = dev->next) + ; - /* XXX: For secure boot, we need our own loader here */ - status = systab->BootServices->LoadImage(TRUE, image, bootdevpath, - buffer, bufsize, &loaderhandle); - if (EFI_ERROR(status)) - printf("LoadImage failed with error %lu\n", - EFI_ERROR_CODE(status)); - - status = systab->BootServices->HandleProtocol(loaderhandle, - &LoadedImageGUID, (VOID**)&loaded_image); - if (EFI_ERROR(status)) - printf("HandleProtocol failed with error %lu\n", - EFI_ERROR_CODE(status)); - - loaded_image->DeviceHandle = bootdevhandle; - - status = systab->BootServices->StartImage(loaderhandle, NULL, NULL); - if (EFI_ERROR(status)) - printf("StartImage failed with error %lu\n", - EFI_ERROR_CODE(status)); + dev->next = devinfo; } void diff --git a/sys/boot/efi/boot1/boot_module.h b/sys/boot/efi/boot1/boot_module.h new file mode 100644 index 000000000000..ceb8843ba787 --- /dev/null +++ b/sys/boot/efi/boot1/boot_module.h @@ -0,0 +1,110 @@ +/*- + * Copyright (c) 2015 Eric McCorkle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _BOOT_MODULE_H_ +#define _BOOT_MODULE_H_ + +#include + +#include +#include +#include + +#ifdef EFI_DEBUG +#define DPRINTF(fmt, args...) \ + do { \ + printf(fmt, ##args) \ + } while (0) +#else +#define DPRINTF(fmt, args...) {} +#endif + +/* EFI device info */ +typedef struct dev_info +{ + EFI_BLOCK_IO *dev; + EFI_DEVICE_PATH *devpath; + EFI_HANDLE *devhandle; + void *devdata; + struct dev_info *next; +} dev_info_t; + +/* + * A boot loader module. + * + * This is a standard interface for filesystem modules in the EFI system. + */ +typedef struct boot_module_t +{ + const char *name; + + /* init is the optional initialiser for the module. */ + void (*init)(); + + /* + * probe checks to see if the module can handle dev. + * + * Return codes: + * EFI_SUCCESS = The module can handle the device. + * EFI_NOT_FOUND = The module can not handle the device. + * Other = The module encountered an error. + */ + EFI_STATUS (*probe)(dev_info_t* dev); + + /* + * load should select the best out of a set of devices that probe + * indicated were loadable and load it. + * + * Return codes: + * EFI_SUCCESS = The module can handle the device. + * EFI_NOT_FOUND = The module can not handle the device. + * Other = The module encountered an error. + */ + EFI_STATUS (*load)(const char *loader_path, dev_info_t **devinfo, + void **buf, size_t *bufsize); + + /* status outputs information about the probed devices. */ + void (*status)(); + +} boot_module_t; + +/* Standard boot modules. */ +#ifdef EFI_UFS_BOOT +extern const boot_module_t ufs_module; +#endif + +/* Functions available to modules. */ +extern void add_device(dev_info_t **devinfop, dev_info_t *devinfo); +extern void panic(const char *fmt, ...) __dead2; +extern int printf(const char *fmt, ...); +extern int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap); + +extern EFI_SYSTEM_TABLE *systab; +extern EFI_BOOT_SERVICES *bs; + +#endif diff --git a/sys/boot/efi/boot1/ufs_module.c b/sys/boot/efi/boot1/ufs_module.c new file mode 100644 index 000000000000..7d36f84df216 --- /dev/null +++ b/sys/boot/efi/boot1/ufs_module.c @@ -0,0 +1,253 @@ +/*- + * Copyright (c) 1998 Robert Nordier + * All rights reserved. + * Copyright (c) 2001 Robert Drehmel + * All rights reserved. + * Copyright (c) 2014 Nathan Whitehorn + * All rights reserved. + * Copyright (c) 2015 Eric McCorkle + * All rights reverved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include + +#include "boot_module.h" + +static dev_info_t *devinfo; +static dev_info_t *devices; + +static int +dskread(void *buf, u_int64_t lba, int nblk) +{ + int size; + EFI_STATUS status; + + lba = lba / (devinfo->dev->Media->BlockSize / DEV_BSIZE); + size = nblk * DEV_BSIZE; + + status = devinfo->dev->ReadBlocks(devinfo->dev, + devinfo->dev->Media->MediaId, lba, size, buf); + + if (status != EFI_SUCCESS) { + DPRINTF("dskread: failed dev: %p, id: %u, lba: %lu, size: %d, " + "status: %lu\n", devinfo->dev, + devinfo->dev->Media->MediaId, lba, size, + EFI_ERROR_CODE(status)); + return (-1); + } + + return (0); +} + +#include "ufsread.c" + +static ssize_t +fsstat(ufs_ino_t inode) +{ +#ifndef UFS2_ONLY + static struct ufs1_dinode dp1; +#endif +#ifndef UFS1_ONLY + static struct ufs2_dinode dp2; +#endif + static struct fs fs; + static ufs_ino_t inomap; + char *blkbuf; + void *indbuf; + size_t n, size; + static ufs2_daddr_t blkmap, indmap; + + blkbuf = dmadat->blkbuf; + indbuf = dmadat->indbuf; + if (!dsk_meta) { + inomap = 0; + for (n = 0; sblock_try[n] != -1; n++) { + if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE, + SBLOCKSIZE / DEV_BSIZE)) + return (-1); + memcpy(&fs, dmadat->sbbuf, sizeof(struct fs)); + if (( +#if defined(UFS1_ONLY) + fs.fs_magic == FS_UFS1_MAGIC +#elif defined(UFS2_ONLY) + (fs.fs_magic == FS_UFS2_MAGIC && + fs.fs_sblockloc == sblock_try[n]) +#else + fs.fs_magic == FS_UFS1_MAGIC || + (fs.fs_magic == FS_UFS2_MAGIC && + fs.fs_sblockloc == sblock_try[n]) +#endif + ) && + fs.fs_bsize <= MAXBSIZE && + fs.fs_bsize >= (int32_t)sizeof(struct fs)) + break; + } + if (sblock_try[n] == -1) { + return (-1); + } + dsk_meta++; + } else + memcpy(&fs, dmadat->sbbuf, sizeof(struct fs)); + if (!inode) + return (0); + if (inomap != inode) { + n = IPERVBLK(&fs); + if (dskread(blkbuf, INO_TO_VBA(&fs, n, inode), DBPERVBLK)) + return (-1); + n = INO_TO_VBO(n, inode); +#if defined(UFS1_ONLY) + memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n, + sizeof(struct ufs1_dinode)); +#elif defined(UFS2_ONLY) + memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n, + sizeof(struct ufs2_dinode)); +#else + if (fs.fs_magic == FS_UFS1_MAGIC) + memcpy(&dp1, (struct ufs1_dinode *)(void *)blkbuf + n, + sizeof(struct ufs1_dinode)); + else + memcpy(&dp2, (struct ufs2_dinode *)(void *)blkbuf + n, + sizeof(struct ufs2_dinode)); +#endif + inomap = inode; + fs_off = 0; + blkmap = indmap = 0; + } + size = DIP(di_size); + n = size - fs_off; + + return (n); +} + +static struct dmadat __dmadat; + +static EFI_STATUS +probe(dev_info_t* dev) +{ + + devinfo = dev; + dmadat = &__dmadat; + if (fsread(0, NULL, 0) < 0) + return (EFI_UNSUPPORTED); + + add_device(&devices, dev); + + return (EFI_SUCCESS); +} + +static EFI_STATUS +try_load(dev_info_t *dev, const char *loader_path, void **bufp, size_t *bufsize) +{ + ufs_ino_t ino; + EFI_STATUS status; + size_t size; + ssize_t read; + void *buf; + + devinfo = dev; + if ((ino = lookup(loader_path)) == 0) + return (EFI_NOT_FOUND); + + size = fsstat(ino); + if (size <= 0) { + printf("Failed to fsstat %s ino: %d\n", loader_path, ino); + return (EFI_INVALID_PARAMETER); + } + + if ((status = bs->AllocatePool(EfiLoaderData, size, &buf)) != + EFI_SUCCESS) { + printf("Failed to allocate read buffer (%lu)\n", + EFI_ERROR_CODE(status)); + return (status); + } + + read = fsread(ino, buf, size); + if ((size_t)read != size) { + printf("Failed to read %s (%zd != %zu)\n", loader_path, read, + size); + (void)bs->FreePool(buf); + return (EFI_INVALID_PARAMETER); + } + + *bufp = buf; + *bufsize = size; + + return (EFI_SUCCESS); +} + +static EFI_STATUS +load(const char *loader_path, dev_info_t **devinfop, void **buf, + size_t *bufsize) +{ + dev_info_t *dev; + EFI_STATUS status; + + for (dev = devices; dev != NULL; dev = dev->next) { + status = try_load(dev, loader_path, buf, bufsize); + if (status == EFI_SUCCESS) { + *devinfop = dev; + return (EFI_SUCCESS); + } else if (status != EFI_NOT_FOUND) { + return (status); + } + } + + return (EFI_NOT_FOUND); +} + +static void +status() +{ + int i; + dev_info_t *dev; + + for (dev = devices, i = 0; dev != NULL; dev = dev->next, i++) + ; + + printf("%s found ", ufs_module.name); + switch (i) { + case 0: + printf("no partitions\n"); + break; + case 1: + printf("%d partition\n", i); + break; + default: + printf("%d partitions\n", i); + } +} + +const boot_module_t ufs_module = +{ + .name = "UFS", + .probe = probe, + .load = load, + .status = status +}; diff --git a/sys/boot/efi/include/efilib.h b/sys/boot/efi/include/efilib.h index b67ffc5305ca..a6197b9f5db2 100644 --- a/sys/boot/efi/include/efilib.h +++ b/sys/boot/efi/include/efilib.h @@ -39,7 +39,6 @@ extern struct devsw efinet_dev; extern struct netif_driver efinetif; void *efi_get_table(EFI_GUID *tbl); -void efi_main(EFI_HANDLE image_handle, EFI_SYSTEM_TABLE *system_table); int efi_register_handles(struct devsw *, EFI_HANDLE *, EFI_HANDLE *, int); EFI_HANDLE efi_find_handle(struct devsw *, int); diff --git a/sys/boot/efi/loader/devicename.c b/sys/boot/efi/loader/devicename.c index 45f9871bd45d..c6591c879717 100644 --- a/sys/boot/efi/loader/devicename.c +++ b/sys/boot/efi/loader/devicename.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -86,7 +87,7 @@ efi_parsedev(struct devdesc **dev, const char *devspec, const char **path) struct devsw *dv; char *cp; const char *np; - int i, err; + int i; /* minimum length check */ if (strlen(devspec) < 2) @@ -101,24 +102,26 @@ efi_parsedev(struct devdesc **dev, const char *devspec, const char **path) if (devsw[i] == NULL) return (ENOENT); - idev = malloc(sizeof(struct devdesc)); - if (idev == NULL) - return (ENOMEM); - - idev->d_dev = dv; - idev->d_type = dv->dv_type; - idev->d_unit = -1; - - err = 0; np = devspec + strlen(dv->dv_name); - if (*np != '\0' && *np != ':') { - idev->d_unit = strtol(np, &cp, 0); - if (cp == np) { - idev->d_unit = -1; - free(idev); - return (EUNIT); + + { + idev = malloc(sizeof(struct devdesc)); + if (idev == NULL) + return (ENOMEM); + + idev->d_dev = dv; + idev->d_type = dv->dv_type; + idev->d_unit = -1; + if (*np != '\0' && *np != ':') { + idev->d_unit = strtol(np, &cp, 0); + if (cp == np) { + idev->d_unit = -1; + free(idev); + return (EUNIT); + } } } + if (*cp != '\0' && *cp != ':') { free(idev); return (EINVAL); @@ -137,7 +140,7 @@ char * efi_fmtdev(void *vdev) { struct devdesc *dev = (struct devdesc *)vdev; - static char buf[32]; /* XXX device length constant? */ + static char buf[SPECNAMELEN + 1]; switch(dev->d_type) { case DEVT_NONE: diff --git a/sys/boot/efi/loader/main.c b/sys/boot/efi/loader/main.c index ddd96b41e572..43a015dd0253 100644 --- a/sys/boot/efi/loader/main.c +++ b/sys/boot/efi/loader/main.c @@ -28,6 +28,7 @@ #include __FBSDID("$FreeBSD$"); +#include #include #include #include @@ -45,7 +46,6 @@ extern char bootprog_rev[]; extern char bootprog_date[]; extern char bootprog_maker[]; -struct devdesc currdev; /* our current device */ struct arch_switch archsw; /* MI/MD interface boundary */ EFI_GUID acpi = ACPI_TABLE_GUID; @@ -61,15 +61,36 @@ EFI_GUID memtype = MEMORY_TYPE_INFORMATION_TABLE_GUID; EFI_GUID debugimg = DEBUG_IMAGE_INFO_TABLE_GUID; EFI_GUID fdtdtb = FDT_TABLE_GUID; +/* + * Need this because EFI uses UTF-16 unicode string constants, but we + * use UTF-8. We can't use printf due to the possiblity of \0 and we + * don't support support wide characters either. + */ +static void +print_str16(const CHAR16 *str) +{ + int i; + + for (i = 0; str[i]; i++) + printf("%c", (char)str[i]); +} + EFI_STATUS main(int argc, CHAR16 *argv[]) { char var[128]; EFI_LOADED_IMAGE *img; EFI_GUID *guid; - int i, j, vargood; + int i, j, vargood, unit; + struct devsw *dev; UINTN k; + archsw.arch_autoload = efi_autoload; + archsw.arch_getdev = efi_getdev; + archsw.arch_copyin = efi_copyin; + archsw.arch_copyout = efi_copyout; + archsw.arch_readin = efi_readin; + /* * XXX Chicken-and-egg problem; we want to have console output * early, but some console attributes may depend on reading from @@ -116,6 +137,13 @@ main(int argc, CHAR16 *argv[]) /* Get our loaded image protocol interface structure. */ BS->HandleProtocol(IH, &imgid, (VOID**)&img); + printf("Command line arguments:"); + for (i = 0; i < argc; i++) { + printf(" "); + print_str16(argv[i]); + } + printf("\n"); + printf("Image base: 0x%lx\n", (u_long)img->ImageBase); printf("EFI version: %d.%02d\n", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff); @@ -129,9 +157,6 @@ main(int argc, CHAR16 *argv[]) printf("%s, Revision %s\n", bootprog_name, bootprog_rev); printf("(%s, %s)\n", bootprog_maker, bootprog_date); - efi_handle_lookup(img->DeviceHandle, &currdev.d_dev, &currdev.d_unit); - currdev.d_type = currdev.d_dev->dv_type; - /* * Disable the watchdog timer. By default the boot manager sets * the timer to 5 minutes before invoking a boot option. If we @@ -143,19 +168,27 @@ main(int argc, CHAR16 *argv[]) */ BS->SetWatchdogTimer(0, 0, 0, NULL); - env_setenv("currdev", EV_VOLATILE, efi_fmtdev(&currdev), - efi_setcurrdev, env_nounset); - env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset, - env_nounset); + if (efi_handle_lookup(img->DeviceHandle, &dev, &unit) != 0) + return (EFI_NOT_FOUND); + + switch (dev->dv_type) { + default: { + struct devdesc currdev; + + currdev.d_dev = dev; + currdev.d_unit = unit; + currdev.d_opendata = NULL; + currdev.d_type = currdev.d_dev->dv_type; + env_setenv("currdev", EV_VOLATILE, efi_fmtdev(&currdev), + efi_setcurrdev, env_nounset); + env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset, + env_nounset); + break; + } + } setenv("LINES", "24", 1); /* optional */ - archsw.arch_autoload = efi_autoload; - archsw.arch_getdev = efi_getdev; - archsw.arch_copyin = efi_copyin; - archsw.arch_copyout = efi_copyout; - archsw.arch_readin = efi_readin; - for (k = 0; k < ST->NumberOfTableEntries; k++) { guid = &ST->ConfigurationTable[k].VendorGuid; if (!memcmp(guid, &smbios, sizeof(EFI_GUID))) { From 6ca07079afcad5a0129c4bcf2662131fde11d823 Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Fri, 15 Jan 2016 01:34:43 +0000 Subject: [PATCH 73/88] ioat(4): Add support for 'fence' bit with DMA_FENCE flag Some classes of IOAT hardware prefetch reads. DMA operations that depend on the result of prior DMA operations must use the DMA_FENCE flag to prevent stale reads. (E.g., I've hit this personally on Broadwell-EP. The Broadwell-DE has a different IOAT unit that is documented to not pipeline DMA operations.) Sponsored by: EMC / Isilon Storage Division --- share/man/man4/ioat.4 | 15 +++++++++++++-- sys/dev/ioat/ioat.c | 2 ++ sys/dev/ioat/ioat.h | 8 +++++++- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/share/man/man4/ioat.4 b/share/man/man4/ioat.4 index 10f2663ed8f0..e71c2e12b745 100644 --- a/share/man/man4/ioat.4 +++ b/share/man/man4/ioat.4 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 7, 2016 +.Dd January 14, 2016 .Dt IOAT 4 .Os .Sh NAME @@ -134,7 +134,7 @@ Null operations do nothing, but may be used to test the interrupt and callback mechanism. .Pp All operations can optionally trigger an interrupt at completion with the -.Ar DMA_EN_INT +.Ar DMA_INT_EN flag. For example, a user might submit multiple operations to the same channel and only enable an interrupt and callback for the last operation. @@ -160,6 +160,17 @@ flag. .Ar DMA_NO_WAIT may return NULL.) .Pp +Operations that depend on the result of prior operations should use +.Ar DMA_FENCE . +For example, such a scenario can happen when two related DMA operations are +queued. +First, a DMA copy to one location (A), followed directly by a DMA copy +from A to B. +In this scenario, some classes of I/OAT hardware may prefetch A for the second +operation before it is written by the first operation. +To avoid reading a stale value in sequences of dependent operations, use +.Ar DMA_FENCE . +.Pp All operations, as well as .Fn ioat_get_dmaengine , can return NULL in special circumstances. diff --git a/sys/dev/ioat/ioat.c b/sys/dev/ioat/ioat.c index 7f14c3ff40ec..956e8d1463f4 100644 --- a/sys/dev/ioat/ioat.c +++ b/sys/dev/ioat/ioat.c @@ -852,6 +852,8 @@ ioat_op_generic(struct ioat_softc *ioat, uint8_t op, if ((flags & DMA_INT_EN) != 0) hw_desc->u.control_generic.int_enable = 1; + if ((flags & DMA_FENCE) != 0) + hw_desc->u.control_generic.fence = 1; hw_desc->size = size; hw_desc->src_addr = src; diff --git a/sys/dev/ioat/ioat.h b/sys/dev/ioat/ioat.h index 64f97830a2d6..3b6e0946ac1e 100644 --- a/sys/dev/ioat/ioat.h +++ b/sys/dev/ioat/ioat.h @@ -46,7 +46,13 @@ __FBSDID("$FreeBSD$"); * descriptor without blocking. */ #define DMA_NO_WAIT 0x2 -#define DMA_ALL_FLAGS (DMA_INT_EN | DMA_NO_WAIT) +/* + * Disallow prefetching the source of the following operation. Ordinarily, DMA + * operations can be pipelined on some hardware. E.g., operation 2's source + * may be prefetched before operation 1 completes. + */ +#define DMA_FENCE 0x4 +#define DMA_ALL_FLAGS (DMA_INT_EN | DMA_NO_WAIT | DMA_FENCE) /* * Hardware revision number. Different hardware revisions support different From 7bd249ecf0fdcd84e977d67bf90fc17135388196 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Fri, 15 Jan 2016 02:33:47 +0000 Subject: [PATCH 74/88] Add EFI ZFS boot support This builds on the modular EFI loader support added r294060 adding a module to provide ZFS boot support on EFI systems. It should be noted that EFI uses a fixed size memory block for all allocations performed by the loader so it may be necessary to tune this size. For example when building an image which uses mfs_root e.g. mfsbsd, adding the following to /etc/make.conf would be needed to prevent EFI from running out of memory when loading the mfs_root image. EFI_STAGING_SIZE=128 Submitted by: Eric McCorkle MFC after: 2 weeks X-MFC-With: r293268 Sponsored by: Multiplay --- sys/boot/efi/boot1/Makefile | 20 ++++ sys/boot/efi/boot1/boot1.c | 3 + sys/boot/efi/boot1/boot_module.h | 3 + sys/boot/efi/boot1/zfs_module.c | 199 +++++++++++++++++++++++++++++++ sys/boot/efi/include/efilib.h | 3 +- sys/boot/efi/libefi/handles.c | 24 +++- sys/boot/efi/loader/Makefile | 15 +++ sys/boot/efi/loader/conf.c | 9 ++ sys/boot/efi/loader/devicename.c | 24 ++++ sys/boot/efi/loader/main.c | 75 +++++++++++- 10 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 sys/boot/efi/boot1/zfs_module.c diff --git a/sys/boot/efi/boot1/Makefile b/sys/boot/efi/boot1/Makefile index c05be8dd82e8..6b1ea8d7d583 100644 --- a/sys/boot/efi/boot1/Makefile +++ b/sys/boot/efi/boot1/Makefile @@ -10,8 +10,22 @@ PROG= boot1.sym INTERNALPROG= WARNS?= 6 +.if ${MK_ZFS} != "no" +# Disable warnings that are currently incompatible with the zfs boot code +CWARNFLAGS.zfs_module.c += -Wno-array-bounds +CWARNFLAGS.zfs_module.c += -Wno-cast-align +CWARNFLAGS.zfs_module.c += -Wno-cast-qual +CWARNFLAGS.zfs_module.c += -Wno-missing-prototypes +CWARNFLAGS.zfs_module.c += -Wno-sign-compare +CWARNFLAGS.zfs_module.c += -Wno-unused-parameter +CWARNFLAGS.zfs_module.c += -Wno-unused-function +.endif + # architecture-specific loader code SRCS= boot1.c self_reloc.c start.S ufs_module.c +.if ${MK_ZFS} != "no" +SRCS+= zfs_module.c +.endif CFLAGS+= -I. CFLAGS+= -I${.CURDIR}/../include @@ -20,6 +34,12 @@ CFLAGS+= -I${.CURDIR}/../../../contrib/dev/acpica/include CFLAGS+= -I${.CURDIR}/../../.. CFLAGS+= -DEFI_UFS_BOOT +.if ${MK_ZFS} != "no" +CFLAGS+= -I${.CURDIR}/../../zfs/ +CFLAGS+= -I${.CURDIR}/../../../cddl/boot/zfs/ +CFLAGS+= -DEFI_ZFS_BOOT +.endif + # Always add MI sources and REGULAR efi loader bits .PATH: ${.CURDIR}/../loader/arch/${MACHINE} .PATH: ${.CURDIR}/../loader diff --git a/sys/boot/efi/boot1/boot1.c b/sys/boot/efi/boot1/boot1.c index 6c85ea220f80..e4d039d48ff0 100644 --- a/sys/boot/efi/boot1/boot1.c +++ b/sys/boot/efi/boot1/boot1.c @@ -36,6 +36,9 @@ __FBSDID("$FreeBSD$"); static const boot_module_t *boot_modules[] = { +#ifdef EFI_ZFS_BOOT + &zfs_module, +#endif #ifdef EFI_UFS_BOOT &ufs_module #endif diff --git a/sys/boot/efi/boot1/boot_module.h b/sys/boot/efi/boot1/boot_module.h index ceb8843ba787..1d0729538f77 100644 --- a/sys/boot/efi/boot1/boot_module.h +++ b/sys/boot/efi/boot1/boot_module.h @@ -97,6 +97,9 @@ typedef struct boot_module_t #ifdef EFI_UFS_BOOT extern const boot_module_t ufs_module; #endif +#ifdef EFI_ZFS_BOOT +extern const boot_module_t zfs_module; +#endif /* Functions available to modules. */ extern void add_device(dev_info_t **devinfop, dev_info_t *devinfo); diff --git a/sys/boot/efi/boot1/zfs_module.c b/sys/boot/efi/boot1/zfs_module.c new file mode 100644 index 000000000000..345d0e114f59 --- /dev/null +++ b/sys/boot/efi/boot1/zfs_module.c @@ -0,0 +1,199 @@ +/*- + * Copyright (c) 2015 Eric McCorkle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#include +#include +#include +#include +#include +#include +#include + +#include "boot_module.h" + +#include "libzfs.h" +#include "zfsimpl.c" + +static dev_info_t *devices; + +static int +vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) +{ + dev_info_t *devinfo; + off_t lba; + EFI_STATUS status; + + devinfo = (dev_info_t *)priv; + lba = off / devinfo->dev->Media->BlockSize; + + status = devinfo->dev->ReadBlocks(devinfo->dev, + devinfo->dev->Media->MediaId, lba, bytes, buf); + if (status != EFI_SUCCESS) { + DPRINTF("vdev_read: failed dev: %p, id: %u, lba: %lu, size: %d," + " status: %lu\n", devinfo->dev, + devinfo->dev->Media->MediaId, lba, size, + EFI_ERROR_CODE(status)); + return (-1); + } + + return (0); +} + +static EFI_STATUS +probe(dev_info_t *dev) +{ + spa_t *spa; + dev_info_t *tdev; + EFI_STATUS status; + + /* ZFS consumes the dev on success so we need a copy. */ + if ((status = bs->AllocatePool(EfiLoaderData, sizeof(*dev), + (void**)&tdev)) != EFI_SUCCESS) { + DPRINTF("Failed to allocate tdev (%lu)\n", + EFI_ERROR_CODE(status)); + return (status); + } + memcpy(tdev, dev, sizeof(*dev)); + + if (vdev_probe(vdev_read, tdev, &spa) != 0) { + (void)bs->FreePool(tdev); + return (EFI_UNSUPPORTED); + } + + dev->devdata = spa; + add_device(&devices, dev); + + return (EFI_SUCCESS); +} + +static EFI_STATUS +try_load(dev_info_t *devinfo, const char *loader_path, void **bufp, size_t *bufsize) +{ + spa_t *spa; + struct zfsmount zfsmount; + dnode_phys_t dn; + struct stat st; + int err; + void *buf; + EFI_STATUS status; + + spa = devinfo->devdata; + if (zfs_spa_init(spa) != 0) { + /* Init failed, don't report this loudly. */ + return (EFI_NOT_FOUND); + } + + if (zfs_mount(spa, 0, &zfsmount) != 0) { + /* Mount failed, don't report this loudly. */ + return (EFI_NOT_FOUND); + } + + if ((err = zfs_lookup(&zfsmount, loader_path, &dn)) != 0) { + printf("Failed to lookup %s on pool %s (%d)\n", loader_path, + spa->spa_name, err); + return (EFI_INVALID_PARAMETER); + } + + if ((err = zfs_dnode_stat(spa, &dn, &st)) != 0) { + printf("Failed to lookup %s on pool %s (%d)\n", loader_path, + spa->spa_name, err); + return (EFI_INVALID_PARAMETER); + } + + if ((status = bs->AllocatePool(EfiLoaderData, (UINTN)st.st_size, &buf)) + != EFI_SUCCESS) { + printf("Failed to allocate load buffer for pool %s (%lu)\n", + spa->spa_name, EFI_ERROR_CODE(status)); + return (EFI_INVALID_PARAMETER); + } + + if ((err = dnode_read(spa, &dn, 0, buf, st.st_size)) != 0) { + printf("Failed to read node from %s (%d)\n", spa->spa_name, + err); + (void)bs->FreePool(buf); + return (EFI_INVALID_PARAMETER); + } + + *bufsize = st.st_size; + *bufp = buf; + + return (EFI_SUCCESS); +} + +static EFI_STATUS +load(const char *loader_path, dev_info_t **devinfop, void **bufp, + size_t *bufsize) +{ + dev_info_t *devinfo; + EFI_STATUS status; + + for (devinfo = devices; devinfo != NULL; devinfo = devinfo->next) { + status = try_load(devinfo, loader_path, bufp, bufsize); + if (status == EFI_SUCCESS) { + *devinfop = devinfo; + return (EFI_SUCCESS); + } else if (status != EFI_NOT_FOUND) { + return (status); + } + } + + return (EFI_NOT_FOUND); +} + +static void +status() +{ + spa_t *spa; + + spa = STAILQ_FIRST(&zfs_pools); + if (spa == NULL) { + printf("%s found no pools\n", zfs_module.name); + return; + } + + printf("%s found the following pools:", zfs_module.name); + STAILQ_FOREACH(spa, &zfs_pools, spa_link) + printf(" %s", spa->spa_name); + + printf("\n"); +} + +static void +init() +{ + + zfs_init(); +} + +const boot_module_t zfs_module = +{ + .name = "ZFS", + .init = init, + .probe = probe, + .load = load, + .status = status +}; diff --git a/sys/boot/efi/include/efilib.h b/sys/boot/efi/include/efilib.h index a6197b9f5db2..ba5b663da5f0 100644 --- a/sys/boot/efi/include/efilib.h +++ b/sys/boot/efi/include/efilib.h @@ -42,7 +42,8 @@ void *efi_get_table(EFI_GUID *tbl); int efi_register_handles(struct devsw *, EFI_HANDLE *, EFI_HANDLE *, int); EFI_HANDLE efi_find_handle(struct devsw *, int); -int efi_handle_lookup(EFI_HANDLE, struct devsw **, int *); +int efi_handle_lookup(EFI_HANDLE, struct devsw **, int *, uint64_t *); +int efi_handle_update_dev(EFI_HANDLE, struct devsw *, int, uint64_t); int efi_status_to_errno(EFI_STATUS); time_t efi_time(EFI_TIME *); diff --git a/sys/boot/efi/libefi/handles.c b/sys/boot/efi/libefi/handles.c index b15c0a5c1322..1e4ef6ffbd50 100644 --- a/sys/boot/efi/libefi/handles.c +++ b/sys/boot/efi/libefi/handles.c @@ -35,6 +35,7 @@ struct entry { EFI_HANDLE alias; struct devsw *dev; int unit; + uint64_t extra; }; struct entry *entry; @@ -79,7 +80,7 @@ efi_find_handle(struct devsw *dev, int unit) } int -efi_handle_lookup(EFI_HANDLE h, struct devsw **dev, int *unit) +efi_handle_lookup(EFI_HANDLE h, struct devsw **dev, int *unit, uint64_t *extra) { int idx; @@ -90,7 +91,28 @@ efi_handle_lookup(EFI_HANDLE h, struct devsw **dev, int *unit) *dev = entry[idx].dev; if (unit != NULL) *unit = entry[idx].unit; + if (extra != NULL) + *extra = entry[idx].extra; return (0); } return (ENOENT); } + +int +efi_handle_update_dev(EFI_HANDLE h, struct devsw *dev, int unit, + uint64_t guid) +{ + int idx; + + for (idx = 0; idx < nentries; idx++) { + if (entry[idx].handle != h) + continue; + entry[idx].dev = dev; + entry[idx].unit = unit; + entry[idx].alias = NULL; + entry[idx].extra = guid; + return (0); + } + + return (ENOENT); +} diff --git a/sys/boot/efi/loader/Makefile b/sys/boot/efi/loader/Makefile index 59d32ff33b6c..d36e54bc077d 100644 --- a/sys/boot/efi/loader/Makefile +++ b/sys/boot/efi/loader/Makefile @@ -21,6 +21,16 @@ SRCS= autoload.c \ smbios.c \ vers.c +.if ${MK_ZFS} != "no" +SRCS+= zfs.c +.PATH: ${.CURDIR}/../../zfs + +# Disable warnings that are currently incompatible with the zfs boot code +CWARNFLAGS.zfs.c+= -Wno-sign-compare +CWARNFLAGS.zfs.c+= -Wno-array-bounds +CWARNFLAGS.zfs.c+= -Wno-missing-prototypes +.endif + .PATH: ${.CURDIR}/arch/${MACHINE} # For smbios.c .PATH: ${.CURDIR}/../../i386/libi386 @@ -33,6 +43,11 @@ CFLAGS+= -I${.CURDIR}/../include/${MACHINE} CFLAGS+= -I${.CURDIR}/../../../contrib/dev/acpica/include CFLAGS+= -I${.CURDIR}/../../.. CFLAGS+= -I${.CURDIR}/../../i386/libi386 +.if ${MK_ZFS} != "no" +CFLAGS+= -I${.CURDIR}/../../zfs +CFLAGS+= -I${.CURDIR}/../../../cddl/boot/zfs +CFLAGS+= -DEFI_ZFS_BOOT +.endif CFLAGS+= -DNO_PCI -DEFI # make buildenv doesn't set DESTDIR, this means LIBSTAND diff --git a/sys/boot/efi/loader/conf.c b/sys/boot/efi/loader/conf.c index 8bb9d5f0cda7..ecbf8b02c8e3 100644 --- a/sys/boot/efi/loader/conf.c +++ b/sys/boot/efi/loader/conf.c @@ -31,14 +31,23 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef EFI_ZFS_BOOT +#include +#endif struct devsw *devsw[] = { &efipart_dev, &efinet_dev, +#ifdef EFI_ZFS_BOOT + &zfs_dev, +#endif NULL }; struct fs_ops *file_system[] = { +#ifdef EFI_ZFS_BOOT + &zfs_fsops, +#endif &dosfs_fsops, &ufs_fsops, &cd9660_fsops, diff --git a/sys/boot/efi/loader/devicename.c b/sys/boot/efi/loader/devicename.c index c6591c879717..8fc80ebbc57e 100644 --- a/sys/boot/efi/loader/devicename.c +++ b/sys/boot/efi/loader/devicename.c @@ -33,6 +33,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef EFI_ZFS_BOOT +#include +#endif #include #include @@ -104,6 +107,23 @@ efi_parsedev(struct devdesc **dev, const char *devspec, const char **path) np = devspec + strlen(dv->dv_name); +#ifdef EFI_ZFS_BOOT + if (dv->dv_type == DEVT_ZFS) { + int err; + + idev = malloc(sizeof(struct zfs_devdesc)); + if (idev == NULL) + return (ENOMEM); + + err = zfs_parsedev((struct zfs_devdesc*)idev, np, path); + if (err != 0) { + free(idev); + return (err); + } + *dev = idev; + cp = strchr(np + 1, ':'); + } else +#endif { idev = malloc(sizeof(struct devdesc)); if (idev == NULL) @@ -143,6 +163,10 @@ efi_fmtdev(void *vdev) static char buf[SPECNAMELEN + 1]; switch(dev->d_type) { +#ifdef EFI_ZFS_BOOT + case DEVT_ZFS: + return (zfs_fmtdev(dev)); +#endif case DEVT_NONE: strcpy(buf, "(no device)"); break; diff --git a/sys/boot/efi/loader/main.c b/sys/boot/efi/loader/main.c index 43a015dd0253..4ba42d9f395d 100644 --- a/sys/boot/efi/loader/main.c +++ b/sys/boot/efi/loader/main.c @@ -39,6 +39,10 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef EFI_ZFS_BOOT +#include +#endif + #include "loader_efi.h" extern char bootprog_name[]; @@ -61,6 +65,10 @@ EFI_GUID memtype = MEMORY_TYPE_INFORMATION_TABLE_GUID; EFI_GUID debugimg = DEBUG_IMAGE_INFO_TABLE_GUID; EFI_GUID fdtdtb = FDT_TABLE_GUID; +#ifdef EFI_ZFS_BOOT +static void efi_zfs_probe(void); +#endif + /* * Need this because EFI uses UTF-16 unicode string constants, but we * use UTF-8. We can't use printf due to the possiblity of \0 and we @@ -83,6 +91,7 @@ main(int argc, CHAR16 *argv[]) EFI_GUID *guid; int i, j, vargood, unit; struct devsw *dev; + uint64_t pool_guid; UINTN k; archsw.arch_autoload = efi_autoload; @@ -90,6 +99,10 @@ main(int argc, CHAR16 *argv[]) archsw.arch_copyin = efi_copyin; archsw.arch_copyout = efi_copyout; archsw.arch_readin = efi_readin; +#ifdef EFI_ZFS_BOOT + /* Note this needs to be set before ZFS init. */ + archsw.arch_zfs_probe = efi_zfs_probe; +#endif /* * XXX Chicken-and-egg problem; we want to have console output @@ -168,10 +181,27 @@ main(int argc, CHAR16 *argv[]) */ BS->SetWatchdogTimer(0, 0, 0, NULL); - if (efi_handle_lookup(img->DeviceHandle, &dev, &unit) != 0) + if (efi_handle_lookup(img->DeviceHandle, &dev, &unit, &pool_guid) != 0) return (EFI_NOT_FOUND); switch (dev->dv_type) { +#ifdef EFI_ZFS_BOOT + case DEVT_ZFS: { + struct zfs_devdesc currdev; + + currdev.d_dev = dev; + currdev.d_unit = unit; + currdev.d_type = currdev.d_dev->dv_type; + currdev.d_opendata = NULL; + currdev.pool_guid = pool_guid; + currdev.root_guid = 0; + env_setenv("currdev", EV_VOLATILE, efi_fmtdev(&currdev), + efi_setcurrdev, env_nounset); + env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset, + env_nounset); + break; + } +#endif default: { struct devdesc currdev; @@ -456,6 +486,29 @@ command_nvram(int argc, char *argv[]) return (CMD_OK); } +#ifdef EFI_ZFS_BOOT +COMMAND_SET(lszfs, "lszfs", "list child datasets of a zfs dataset", + command_lszfs); + +static int +command_lszfs(int argc, char *argv[]) +{ + int err; + + if (argc != 2) { + command_errmsg = "wrong number of arguments"; + return (CMD_ERROR); + } + + err = zfs_list(argv[1]); + if (err != 0) { + command_errmsg = strerror(err); + return (CMD_ERROR); + } + return (CMD_OK); +} +#endif + #ifdef LOADER_FDT_SUPPORT extern int command_fdt_internal(int argc, char *argv[]); @@ -474,3 +527,23 @@ command_fdt(int argc, char *argv[]) COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt); #endif + +#ifdef EFI_ZFS_BOOT +static void +efi_zfs_probe(void) +{ + EFI_HANDLE h; + u_int unit; + int i; + char dname[SPECNAMELEN + 1]; + uint64_t guid; + + unit = 0; + h = efi_find_handle(&efipart_dev, 0); + for (i = 0; h != NULL; h = efi_find_handle(&efipart_dev, ++i)) { + snprintf(dname, sizeof(dname), "%s%d:", efipart_dev.dv_name, i); + if (zfs_probe_dev(dname, &guid) == 0) + (void)efi_handle_update_dev(h, &zfs_dev, unit++, guid); + } +} +#endif From fbe958861eef9522d1f5d12ca7fc16f1be145116 Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Fri, 15 Jan 2016 05:45:10 +0000 Subject: [PATCH 75/88] Move init_zfs_bootenv to sys/boot/zfs/zfs.c instead of having a copy in each loader While here, add a filter to ignore special datasets MFC after: 3 days Sponsored by: ScaleEngine Inc. --- sys/boot/i386/loader/main.c | 29 --------------------------- sys/boot/userboot/userboot/main.c | 27 ------------------------- sys/boot/zfs/libzfs.h | 1 + sys/boot/zfs/zfs.c | 33 +++++++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 56 deletions(-) diff --git a/sys/boot/i386/loader/main.c b/sys/boot/i386/loader/main.c index 644747e81510..c0782fc70e37 100644 --- a/sys/boot/i386/loader/main.c +++ b/sys/boot/i386/loader/main.c @@ -69,7 +69,6 @@ static int isa_inb(int port); static void isa_outb(int port, int value); void exit(int code); #ifdef LOADER_ZFS_SUPPORT -static void init_zfs_bootenv(char *currdev); static void i386_zfs_probe(void); #endif @@ -306,34 +305,6 @@ extract_currdev(void) env_nounset); } -#ifdef LOADER_ZFS_SUPPORT -static void -init_zfs_bootenv(char *currdev) -{ - char *beroot; - - if (strlen(currdev) == 0) - return; - if(strncmp(currdev, "zfs:", 4) != 0) - return; - /* Remove the trailing : */ - currdev[strlen(currdev) - 1] = '\0'; - setenv("zfs_be_active", currdev, 1); - setenv("zfs_be_currpage", "1", 1); - /* Do not overwrite if already set */ - setenv("vfs.root.mountfrom", currdev, 0); - /* Forward past zfs: */ - currdev = strchr(currdev, ':'); - currdev++; - /* Remove the last element (current bootenv) */ - beroot = strrchr(currdev, '/'); - if (beroot != NULL) - beroot[0] = '\0'; - beroot = currdev; - setenv("zfs_be_root", beroot, 1); -} -#endif - COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); static int diff --git a/sys/boot/userboot/userboot/main.c b/sys/boot/userboot/userboot/main.c index a52550cbaea8..e64bbc702419 100644 --- a/sys/boot/userboot/userboot/main.c +++ b/sys/boot/userboot/userboot/main.c @@ -41,7 +41,6 @@ __FBSDID("$FreeBSD$"); static void userboot_zfs_probe(void); static int userboot_zfs_found; -static void init_zfs_bootenv(char *currdev); #endif #define USERBOOT_VERSION USERBOOT_VERSION_3 @@ -199,32 +198,6 @@ extract_currdev(void) } #if defined(USERBOOT_ZFS_SUPPORT) -static void -init_zfs_bootenv(char *currdev) -{ - char *beroot; - - if (strlen(currdev) == 0) - return; - if(strncmp(currdev, "zfs:", 4) != 0) - return; - /* Remove the trailing : */ - currdev[strlen(currdev) - 1] = '\0'; - setenv("zfs_be_active", currdev, 1); - setenv("zfs_be_currpage", "1", 1); - /* Do not overwrite if already set */ - setenv("vfs.root.mountfrom", currdev, 0); - /* Forward past zfs: */ - currdev = strchr(currdev, ':'); - currdev++; - /* Remove the last element (current bootenv) */ - beroot = strrchr(currdev, '/'); - if (beroot != NULL) - beroot[0] = '\0'; - beroot = currdev; - setenv("zfs_be_root", beroot, 1); -} - static void userboot_zfs_probe(void) { diff --git a/sys/boot/zfs/libzfs.h b/sys/boot/zfs/libzfs.h index b28984958c9a..ee64d1cf32d3 100644 --- a/sys/boot/zfs/libzfs.h +++ b/sys/boot/zfs/libzfs.h @@ -62,6 +62,7 @@ int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, char *zfs_fmtdev(void *vdev); int zfs_probe_dev(const char *devname, uint64_t *pool_guid); int zfs_list(const char *name); +void init_zfs_bootenv(char *currdev); int zfs_bootenv(const char *name); int zfs_belist_add(const char *name); int zfs_set_env(void); diff --git a/sys/boot/zfs/zfs.c b/sys/boot/zfs/zfs.c index c8b1284bc8c7..8e6d420b4e92 100644 --- a/sys/boot/zfs/zfs.c +++ b/sys/boot/zfs/zfs.c @@ -709,6 +709,32 @@ zfs_list(const char *name) return (zfs_list_dataset(spa, objid)); } +void +init_zfs_bootenv(char *currdev) +{ + char *beroot; + + if (strlen(currdev) == 0) + return; + if(strncmp(currdev, "zfs:", 4) != 0) + return; + /* Remove the trailing : */ + currdev[strlen(currdev) - 1] = '\0'; + setenv("zfs_be_active", currdev, 1); + setenv("zfs_be_currpage", "1", 1); + /* Do not overwrite if already set */ + setenv("vfs.root.mountfrom", currdev, 0); + /* Forward past zfs: */ + currdev = strchr(currdev, ':'); + currdev++; + /* Remove the last element (current bootenv) */ + beroot = strrchr(currdev, '/'); + if (beroot != NULL) + beroot[0] = '\0'; + beroot = currdev; + setenv("zfs_be_root", beroot, 1); +} + int zfs_bootenv(const char *name) { @@ -779,8 +805,15 @@ int zfs_belist_add(const char *name) { + /* Skip special datasets that start with a $ character */ + if (strncmp(name, "$", 1) == 0) { + return (0); + } /* Add the boot environment to the head of the SLIST */ zfs_be = malloc(sizeof(struct zfs_be_entry)); + if (zfs_be == NULL) { + return (ENOMEM); + } zfs_be->name = name; SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries); zfs_env_count++; From 1b65ef29088dad5930e70a712f7afb2acfccd93d Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Fri, 15 Jan 2016 05:45:45 +0000 Subject: [PATCH 76/88] Connect the ZFS boot environment menu to the UEFI loader MFC after: 3 days Sponsored by: ScaleEngine Inc. --- sys/boot/efi/loader/main.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/sys/boot/efi/loader/main.c b/sys/boot/efi/loader/main.c index 4ba42d9f395d..0b9dcf6da09c 100644 --- a/sys/boot/efi/loader/main.c +++ b/sys/boot/efi/loader/main.c @@ -199,6 +199,7 @@ main(int argc, CHAR16 *argv[]) efi_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset, env_nounset); + init_zfs_bootenv(zfs_fmtdev(&currdev)); break; } #endif @@ -507,6 +508,38 @@ command_lszfs(int argc, char *argv[]) } return (CMD_OK); } + +COMMAND_SET(reloadbe, "reloadbe", "refresh the list of ZFS Boot Environments", + command_reloadbe); + +static int +command_reloadbe(int argc, char *argv[]) +{ + int err; + char *root; + + if (argc > 2) { + command_errmsg = "wrong number of arguments"; + return (CMD_ERROR); + } + + if (argc == 2) { + err = zfs_bootenv(argv[1]); + } else { + root = getenv("zfs_be_root"); + if (root == NULL) { + return (CMD_OK); + } + err = zfs_bootenv(root); + } + + if (err != 0) { + command_errmsg = strerror(err); + return (CMD_ERROR); + } + + return (CMD_OK); +} #endif #ifdef LOADER_FDT_SUPPORT From 177347ddfb55cd346c084ff29d7dc1a76d4edd82 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Fri, 15 Jan 2016 06:21:39 +0000 Subject: [PATCH 77/88] sfxge: rename hunt_link_state_t to ef10_link_state_t Submitted by: Mark Spender Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4932 --- sys/dev/sfxge/common/hunt_impl.h | 18 +++++++++--------- sys/dev/sfxge/common/hunt_mac.c | 16 ++++++++-------- sys/dev/sfxge/common/hunt_nic.c | 8 ++++---- sys/dev/sfxge/common/hunt_phy.c | 12 ++++++------ sys/dev/sfxge/common/medford_nic.c | 8 ++++---- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/sys/dev/sfxge/common/hunt_impl.h b/sys/dev/sfxge/common/hunt_impl.h index 874c526ff6cf..a57d1ac2e834 100644 --- a/sys/dev/sfxge/common/hunt_impl.h +++ b/sys/dev/sfxge/common/hunt_impl.h @@ -482,16 +482,16 @@ ef10_nvram_partn_size( /* PHY */ -typedef struct hunt_link_state_s { - uint32_t hls_adv_cap_mask; - uint32_t hls_lp_cap_mask; - unsigned int hls_fcntl; - efx_link_mode_t hls_link_mode; +typedef struct ef10_link_state_s { + uint32_t els_adv_cap_mask; + uint32_t els_lp_cap_mask; + unsigned int els_fcntl; + efx_link_mode_t els_link_mode; #if EFSYS_OPT_LOOPBACK - efx_loopback_type_t hls_loopback; + efx_loopback_type_t els_loopback; #endif - boolean_t hls_mac_up; -} hunt_link_state_t; + boolean_t els_mac_up; +} ef10_link_state_t; extern void hunt_phy_link_ev( @@ -502,7 +502,7 @@ hunt_phy_link_ev( extern __checkReturn efx_rc_t hunt_phy_get_link( __in efx_nic_t *enp, - __out hunt_link_state_t *hlsp); + __out ef10_link_state_t *elsp); extern __checkReturn efx_rc_t hunt_phy_power( diff --git a/sys/dev/sfxge/common/hunt_mac.c b/sys/dev/sfxge/common/hunt_mac.c index 358c4d91e920..dcc64c1ec53f 100644 --- a/sys/dev/sfxge/common/hunt_mac.c +++ b/sys/dev/sfxge/common/hunt_mac.c @@ -48,16 +48,16 @@ hunt_mac_poll( */ efx_port_t *epp = &(enp->en_port); - hunt_link_state_t hls; + ef10_link_state_t els; efx_rc_t rc; - if ((rc = hunt_phy_get_link(enp, &hls)) != 0) + if ((rc = hunt_phy_get_link(enp, &els)) != 0) goto fail1; - epp->ep_adv_cap_mask = hls.hls_adv_cap_mask; - epp->ep_fcntl = hls.hls_fcntl; + epp->ep_adv_cap_mask = els.els_adv_cap_mask; + epp->ep_fcntl = els.els_fcntl; - *link_modep = hls.hls_link_mode; + *link_modep = els.els_link_mode; return (0); @@ -79,17 +79,17 @@ hunt_mac_up( * essentially identical. */ - hunt_link_state_t hls; + ef10_link_state_t els; efx_rc_t rc; /* * Because Huntington doesn't *require* polling, we can't rely on * hunt_mac_poll() being executed to populate epp->ep_mac_up. */ - if ((rc = hunt_phy_get_link(enp, &hls)) != 0) + if ((rc = hunt_phy_get_link(enp, &els)) != 0) goto fail1; - *mac_upp = hls.hls_mac_up; + *mac_upp = els.els_mac_up; return (0); diff --git a/sys/dev/sfxge/common/hunt_nic.c b/sys/dev/sfxge/common/hunt_nic.c index 823393e7859a..da04fd3dfc43 100644 --- a/sys/dev/sfxge/common/hunt_nic.c +++ b/sys/dev/sfxge/common/hunt_nic.c @@ -1096,7 +1096,7 @@ hunt_board_cfg( efx_nic_cfg_t *encp = &(enp->en_nic_cfg); uint8_t mac_addr[6]; uint32_t board_type = 0; - hunt_link_state_t hls; + ef10_link_state_t els; efx_port_t *epp = &(enp->en_port); uint32_t port; uint32_t pf; @@ -1170,10 +1170,10 @@ hunt_board_cfg( goto fail6; /* Obtain the default PHY advertised capabilities */ - if ((rc = hunt_phy_get_link(enp, &hls)) != 0) + if ((rc = hunt_phy_get_link(enp, &els)) != 0) goto fail7; - epp->ep_default_adv_cap_mask = hls.hls_adv_cap_mask; - epp->ep_adv_cap_mask = hls.hls_adv_cap_mask; + epp->ep_default_adv_cap_mask = els.els_adv_cap_mask; + epp->ep_adv_cap_mask = els.els_adv_cap_mask; /* * Enable firmware workarounds for hardware errata. diff --git a/sys/dev/sfxge/common/hunt_phy.c b/sys/dev/sfxge/common/hunt_phy.c index c25e8200c510..d0d709b2e637 100644 --- a/sys/dev/sfxge/common/hunt_phy.c +++ b/sys/dev/sfxge/common/hunt_phy.c @@ -219,7 +219,7 @@ hunt_phy_power( __checkReturn efx_rc_t hunt_phy_get_link( __in efx_nic_t *enp, - __out hunt_link_state_t *hlsp) + __out ef10_link_state_t *elsp) { /* * TBD: consider common Siena/Hunt function: Hunt is very similar @@ -252,14 +252,14 @@ hunt_phy_get_link( } hunt_phy_decode_cap(MCDI_OUT_DWORD(req, GET_LINK_OUT_CAP), - &hlsp->hls_adv_cap_mask); + &elsp->els_adv_cap_mask); hunt_phy_decode_cap(MCDI_OUT_DWORD(req, GET_LINK_OUT_LP_CAP), - &hlsp->hls_lp_cap_mask); + &elsp->els_lp_cap_mask); hunt_phy_decode_link_mode(enp, MCDI_OUT_DWORD(req, GET_LINK_OUT_FLAGS), MCDI_OUT_DWORD(req, GET_LINK_OUT_LINK_SPEED), MCDI_OUT_DWORD(req, GET_LINK_OUT_FCNTL), - &hlsp->hls_link_mode, &hlsp->hls_fcntl); + &elsp->els_link_mode, &elsp->els_fcntl); #if EFSYS_OPT_LOOPBACK /* Assert the MC_CMD_LOOPBACK and EFX_LOOPBACK namespace agree */ @@ -282,10 +282,10 @@ hunt_phy_get_link( EFX_STATIC_ASSERT(MC_CMD_LOOPBACK_PCS == EFX_LOOPBACK_PCS); EFX_STATIC_ASSERT(MC_CMD_LOOPBACK_PMAPMD == EFX_LOOPBACK_PMA_PMD); - hlsp->hls_loopback = MCDI_OUT_DWORD(req, GET_LINK_OUT_LOOPBACK_MODE); + elsp->els_loopback = MCDI_OUT_DWORD(req, GET_LINK_OUT_LOOPBACK_MODE); #endif /* EFSYS_OPT_LOOPBACK */ - hlsp->hls_mac_up = MCDI_OUT_DWORD(req, GET_LINK_OUT_MAC_FAULT) == 0; + elsp->els_mac_up = MCDI_OUT_DWORD(req, GET_LINK_OUT_MAC_FAULT) == 0; return (0); diff --git a/sys/dev/sfxge/common/medford_nic.c b/sys/dev/sfxge/common/medford_nic.c index 51a8ac0fe75e..149be1790dba 100644 --- a/sys/dev/sfxge/common/medford_nic.c +++ b/sys/dev/sfxge/common/medford_nic.c @@ -47,7 +47,7 @@ medford_board_cfg( efx_nic_cfg_t *encp = &(enp->en_nic_cfg); uint8_t mac_addr[6] = { 0 }; uint32_t board_type = 0; - hunt_link_state_t hls; + ef10_link_state_t els; efx_port_t *epp = &(enp->en_port); uint32_t port; uint32_t pf; @@ -126,10 +126,10 @@ medford_board_cfg( goto fail6; /* Obtain the default PHY advertised capabilities */ - if ((rc = hunt_phy_get_link(enp, &hls)) != 0) + if ((rc = hunt_phy_get_link(enp, &els)) != 0) goto fail7; - epp->ep_default_adv_cap_mask = hls.hls_adv_cap_mask; - epp->ep_adv_cap_mask = hls.hls_adv_cap_mask; + epp->ep_default_adv_cap_mask = els.els_adv_cap_mask; + epp->ep_adv_cap_mask = els.els_adv_cap_mask; if (EFX_PCI_FUNCTION_IS_VF(encp)) { /* From 1935b20b5a593e11464c50263f5423e0e7194c62 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Fri, 15 Jan 2016 06:23:04 +0000 Subject: [PATCH 78/88] sfxge: rework MCDI request completion Submitted by: Andy Moreton Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4933 --- sys/dev/sfxge/common/efx_impl.h | 1 - sys/dev/sfxge/common/efx_mcdi.c | 63 +++++++++++++++++++++++-------- sys/dev/sfxge/common/efx_mcdi.h | 5 --- sys/dev/sfxge/common/hunt_impl.h | 5 --- sys/dev/sfxge/common/hunt_mcdi.c | 44 --------------------- sys/dev/sfxge/common/siena_impl.h | 5 --- sys/dev/sfxge/common/siena_mcdi.c | 29 -------------- 7 files changed, 47 insertions(+), 105 deletions(-) diff --git a/sys/dev/sfxge/common/efx_impl.h b/sys/dev/sfxge/common/efx_impl.h index 02a3e8eb6ebc..31f45a5af77f 100644 --- a/sys/dev/sfxge/common/efx_impl.h +++ b/sys/dev/sfxge/common/efx_impl.h @@ -463,7 +463,6 @@ typedef struct efx_mcdi_ops_s { efx_rc_t (*emco_init)(efx_nic_t *, const efx_mcdi_transport_t *); void (*emco_send_request)(efx_nic_t *, void *, size_t, void *, size_t); - void (*emco_request_copyout)(efx_nic_t *, efx_mcdi_req_t *); efx_rc_t (*emco_poll_reboot)(efx_nic_t *); boolean_t (*emco_poll_response)(efx_nic_t *); void (*emco_read_response)(efx_nic_t *, void *, size_t, size_t); diff --git a/sys/dev/sfxge/common/efx_mcdi.c b/sys/dev/sfxge/common/efx_mcdi.c index 5ff37ced8dc0..9f7a9ffedd8c 100644 --- a/sys/dev/sfxge/common/efx_mcdi.c +++ b/sys/dev/sfxge/common/efx_mcdi.c @@ -62,7 +62,6 @@ __FBSDID("$FreeBSD$"); static efx_mcdi_ops_t __efx_mcdi_siena_ops = { siena_mcdi_init, /* emco_init */ siena_mcdi_send_request, /* emco_send_request */ - siena_mcdi_request_copyout, /* emco_request_copyout */ siena_mcdi_poll_reboot, /* emco_poll_reboot */ siena_mcdi_poll_response, /* emco_poll_response */ siena_mcdi_read_response, /* emco_read_response */ @@ -77,7 +76,6 @@ static efx_mcdi_ops_t __efx_mcdi_siena_ops = { static efx_mcdi_ops_t __efx_mcdi_ef10_ops = { ef10_mcdi_init, /* emco_init */ ef10_mcdi_send_request, /* emco_send_request */ - ef10_mcdi_request_copyout, /* emco_request_copyout */ ef10_mcdi_poll_reboot, /* emco_poll_reboot */ ef10_mcdi_poll_response, /* emco_poll_response */ ef10_mcdi_read_response, /* emco_read_response */ @@ -211,16 +209,6 @@ efx_mcdi_send_request( emcop->emco_send_request(enp, hdrp, hdr_len, sdup, sdu_len); } -static void -efx_mcdi_request_copyout( - __in efx_nic_t *enp, - __in efx_mcdi_req_t *emrp) -{ - efx_mcdi_ops_t *emcop = enp->en_mcdi.em_emcop; - - emcop->emco_request_copyout(enp, emrp); -} - static efx_rc_t efx_mcdi_poll_reboot( __in efx_nic_t *enp) @@ -353,7 +341,7 @@ efx_mcdi_request_start( } - void +static void efx_mcdi_read_response_header( __in efx_nic_t *enp, __inout efx_mcdi_req_t *emrp) @@ -468,6 +456,50 @@ efx_mcdi_read_response_header( emrp->emr_out_length_used = 0; } +static void +efx_mcdi_finish_response( + __in efx_nic_t *enp, + __in efx_mcdi_req_t *emrp) +{ +#if EFSYS_OPT_MCDI_LOGGING + const efx_mcdi_transport_t *emtp = enp->en_mcdi.em_emtp; +#endif /* EFSYS_OPT_MCDI_LOGGING */ + efx_dword_t hdr[2]; + unsigned int hdr_len; + size_t bytes; + + if (emrp->emr_out_buf == NULL) + return; + + /* Read the command header to detect MCDI response format */ + hdr_len = sizeof (hdr[0]); + efx_mcdi_read_response(enp, &hdr[0], 0, hdr_len); + if (EFX_DWORD_FIELD(hdr[0], MCDI_HEADER_CODE) == MC_CMD_V2_EXTN) { + /* + * Read the actual payload length. The length given in the event + * is only correct for responses with the V1 format. + */ + efx_mcdi_read_response(enp, &hdr[1], hdr_len, sizeof (hdr[1])); + hdr_len += sizeof (hdr[1]); + + emrp->emr_out_length_used = EFX_DWORD_FIELD(hdr[1], + MC_CMD_V2_EXTN_IN_ACTUAL_LEN); + } + + /* Copy payload out into caller supplied buffer */ + bytes = MIN(emrp->emr_out_length_used, emrp->emr_out_length); + efx_mcdi_read_response(enp, emrp->emr_out_buf, hdr_len, bytes); + +#if EFSYS_OPT_MCDI_LOGGING + if (emtp->emt_logger != NULL) { + emtp->emt_logger(emtp->emt_context, + EFX_LOG_MCDI_RESPONSE, + &hdr, hdr_len, + emrp->emr_out_buf, bytes); + } +#endif /* EFSYS_OPT_MCDI_LOGGING */ +} + __checkReturn boolean_t efx_mcdi_request_poll( @@ -515,7 +547,7 @@ efx_mcdi_request_poll( if ((rc = emrp->emr_rc) != 0) goto fail2; - efx_mcdi_request_copyout(enp, emrp); + efx_mcdi_finish_response(enp, emrp); return (B_TRUE); fail2: @@ -709,7 +741,6 @@ efx_mcdi_ev_cpl( { efx_mcdi_iface_t *emip = &(enp->en_mcdi.em_emip); const efx_mcdi_transport_t *emtp = enp->en_mcdi.em_emtp; - efx_mcdi_ops_t *emcop = enp->en_mcdi.em_emcop; efx_mcdi_req_t *emrp; int state; @@ -751,7 +782,7 @@ efx_mcdi_ev_cpl( } } if (errcode == 0) { - emcop->emco_request_copyout(enp, emrp); + efx_mcdi_finish_response(enp, emrp); } emtp->emt_ev_cpl(emtp->emt_context); diff --git a/sys/dev/sfxge/common/efx_mcdi.h b/sys/dev/sfxge/common/efx_mcdi.h index 36b3d8d44aae..dd1d76e5fafa 100644 --- a/sys/dev/sfxge/common/efx_mcdi.h +++ b/sys/dev/sfxge/common/efx_mcdi.h @@ -88,11 +88,6 @@ efx_mcdi_execute_quiet( __in efx_nic_t *enp, __inout efx_mcdi_req_t *emrp); - extern void -efx_mcdi_read_response_header( - __in efx_nic_t *enp, - __inout efx_mcdi_req_t *emrp); - extern void efx_mcdi_ev_cpl( __in efx_nic_t *enp, diff --git a/sys/dev/sfxge/common/hunt_impl.h b/sys/dev/sfxge/common/hunt_impl.h index a57d1ac2e834..6f6da9235a49 100644 --- a/sys/dev/sfxge/common/hunt_impl.h +++ b/sys/dev/sfxge/common/hunt_impl.h @@ -305,11 +305,6 @@ ef10_mcdi_read_response( __in size_t offset, __in size_t length); -extern void -ef10_mcdi_request_copyout( - __in efx_nic_t *enp, - __in efx_mcdi_req_t *emrp); - extern efx_rc_t ef10_mcdi_poll_reboot( __in efx_nic_t *enp); diff --git a/sys/dev/sfxge/common/hunt_mcdi.c b/sys/dev/sfxge/common/hunt_mcdi.c index cef049bbc01a..f39e977ef705 100644 --- a/sys/dev/sfxge/common/hunt_mcdi.c +++ b/sys/dev/sfxge/common/hunt_mcdi.c @@ -150,50 +150,6 @@ ef10_mcdi_send_request( EFX_BAR_WRITED(enp, ER_DZ_MC_DB_HWRD_REG, &dword, B_FALSE); } - void -ef10_mcdi_request_copyout( - __in efx_nic_t *enp, - __in efx_mcdi_req_t *emrp) -{ -#if EFSYS_OPT_MCDI_LOGGING - const efx_mcdi_transport_t *emtp = enp->en_mcdi.em_emtp; -#endif /* EFSYS_OPT_MCDI_LOGGING */ - efx_dword_t hdr[2]; - unsigned int hdr_len; - size_t bytes; - - if (emrp->emr_out_buf == NULL) - return; - - /* Read the command header to detect MCDI response format */ - hdr_len = sizeof (hdr[0]); - ef10_mcdi_read_response(enp, &hdr[0], 0, hdr_len); - if (EFX_DWORD_FIELD(hdr[0], MCDI_HEADER_CODE) == MC_CMD_V2_EXTN) { - /* - * Read the actual payload length. The length given in the event - * is only correct for responses with the V1 format. - */ - ef10_mcdi_read_response(enp, &hdr[1], hdr_len, sizeof (hdr[1])); - hdr_len += sizeof (hdr[1]); - - emrp->emr_out_length_used = EFX_DWORD_FIELD(hdr[1], - MC_CMD_V2_EXTN_IN_ACTUAL_LEN); - } - - /* Copy payload out into caller supplied buffer */ - bytes = MIN(emrp->emr_out_length_used, emrp->emr_out_length); - ef10_mcdi_read_response(enp, emrp->emr_out_buf, hdr_len, bytes); - -#if EFSYS_OPT_MCDI_LOGGING - if (emtp->emt_logger != NULL) { - emtp->emt_logger(emtp->emt_context, - EFX_LOG_MCDI_RESPONSE, - &hdr, hdr_len, - emrp->emr_out_buf, bytes); - } -#endif /* EFSYS_OPT_MCDI_LOGGING */ -} - __checkReturn boolean_t ef10_mcdi_poll_response( __in efx_nic_t *enp) diff --git a/sys/dev/sfxge/common/siena_impl.h b/sys/dev/sfxge/common/siena_impl.h index d328395c277e..13bc242485f6 100644 --- a/sys/dev/sfxge/common/siena_impl.h +++ b/sys/dev/sfxge/common/siena_impl.h @@ -132,11 +132,6 @@ siena_mcdi_read_response( __in size_t offset, __in size_t length); -extern void -siena_mcdi_request_copyout( - __in efx_nic_t *enp, - __in efx_mcdi_req_t *emrp); - extern efx_rc_t siena_mcdi_poll_reboot( __in efx_nic_t *enp); diff --git a/sys/dev/sfxge/common/siena_mcdi.c b/sys/dev/sfxge/common/siena_mcdi.c index 14ca6cfeac88..4fb2b6c8c87f 100644 --- a/sys/dev/sfxge/common/siena_mcdi.c +++ b/sys/dev/sfxge/common/siena_mcdi.c @@ -89,35 +89,6 @@ siena_mcdi_send_request( EFX_BAR_TBL_WRITED(enp, FR_CZ_MC_TREG_SMEM, dbr, &dword, B_FALSE); } - void -siena_mcdi_request_copyout( - __in efx_nic_t *enp, - __in efx_mcdi_req_t *emrp) -{ -#if EFSYS_OPT_MCDI_LOGGING - const efx_mcdi_transport_t *emtp = enp->en_mcdi.em_emtp; - efx_dword_t hdr; -#endif - size_t bytes = MIN(emrp->emr_out_length_used, emrp->emr_out_length); - - /* Copy payload out if caller supplied buffer */ - if (emrp->emr_out_buf != NULL) { - siena_mcdi_read_response(enp, emrp->emr_out_buf, - sizeof (efx_dword_t), bytes); - } - -#if EFSYS_OPT_MCDI_LOGGING - if (emtp->emt_logger != NULL) { - siena_mcdi_read_response(enp, &hdr, 0, sizeof (hdr)); - - emtp->emt_logger(emtp->emt_context, - EFX_LOG_MCDI_RESPONSE, - &hdr, sizeof (hdr), - emrp->emr_out_buf, bytes); - } -#endif /* EFSYS_OPT_MCDI_LOGGING */ -} - efx_rc_t siena_mcdi_poll_reboot( __in efx_nic_t *enp) From a45a0da19cba5356e7c980ace24b1d01272a8ecc Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Fri, 15 Jan 2016 06:25:26 +0000 Subject: [PATCH 79/88] sfxge: support FATSOv2 Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4934 --- share/man/man4/sfxge.4 | 6 +- sys/dev/sfxge/sfxge.h | 5 +- sys/dev/sfxge/sfxge_tx.c | 234 +++++++++++++++++++++++++++------------ sys/dev/sfxge/sfxge_tx.h | 1 + 4 files changed, 171 insertions(+), 75 deletions(-) diff --git a/share/man/man4/sfxge.4 b/share/man/man4/sfxge.4 index 81e2af4bf26f..7ddd684b6535 100644 --- a/share/man/man4/sfxge.4 +++ b/share/man/man4/sfxge.4 @@ -121,8 +121,10 @@ If a packet is dropped, the counter is incremented and the local sender receives ENOBUFS. The value must be greater than or equal to 0. .It Va hw.sfxge.tso_fw_assisted -Enable/disable usage of FW-assisted TSO if supported by NIC firmware. -Enabled by default. +Bitmask to enable/disable usage of FW-assisted TSO version if supported +by NIC firmware. +FATSOv1 (bit 0) and FATSOv2 (bit 1) are supported. +All enabled by default. .It Va hw.sfxge.N.max_rss_channels The maximum number of allocated RSS channels for the Nth adapter. If set to 0 or unset, the number of channels is determined by the number diff --git a/sys/dev/sfxge/sfxge.h b/sys/dev/sfxge/sfxge.h index fbe0bd4542dd..9ff84c03a0b7 100644 --- a/sys/dev/sfxge/sfxge.h +++ b/sys/dev/sfxge/sfxge.h @@ -280,7 +280,10 @@ struct sfxge_softc { unsigned int rxq_count; unsigned int txq_count; - int tso_fw_assisted; + unsigned int tso_fw_assisted; +#define SFXGE_FATSOV1 (1 << 0) +#define SFXGE_FATSOV2 (1 << 1) + #if EFSYS_OPT_MCDI_LOGGING int mcdi_logging; #endif diff --git a/sys/dev/sfxge/sfxge_tx.c b/sys/dev/sfxge/sfxge_tx.c index 1ffb4bedd0cf..9cf5c79a8213 100644 --- a/sys/dev/sfxge/sfxge_tx.c +++ b/sys/dev/sfxge/sfxge_tx.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -96,11 +97,11 @@ SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN, "Maximum number of any packets in deferred packet put-list"); #define SFXGE_PARAM_TSO_FW_ASSISTED SFXGE_PARAM(tso_fw_assisted) -static int sfxge_tso_fw_assisted = 1; +static int sfxge_tso_fw_assisted = (SFXGE_FATSOV1 | SFXGE_FATSOV2); TUNABLE_INT(SFXGE_PARAM_TSO_FW_ASSISTED, &sfxge_tso_fw_assisted); SYSCTL_INT(_hw_sfxge, OID_AUTO, tso_fw_assisted, CTLFLAG_RDTUN, &sfxge_tso_fw_assisted, 0, - "Use FW-assisted TSO if supported by NIC firmware"); + "Bitmask of FW-assisted TSO allowed to use if supported by NIC firmware"); static const struct { @@ -850,6 +851,8 @@ struct sfxge_tso_state { unsigned out_len; /* Remaining length in current segment */ unsigned seqnum; /* Current sequence number */ unsigned packet_space; /* Remaining space in current packet */ + unsigned segs_space; /* Remaining number of DMA segments + for the packet (FATSOv2 only) */ /* Input position */ uint64_t dma_addr; /* DMA address of current position */ @@ -952,7 +955,7 @@ static void tso_start(struct sfxge_txq *txq, struct sfxge_tso_state *tso, struct tcphdr th_copy; #endif - tso->fw_assisted = txq->sc->tso_fw_assisted; + tso->fw_assisted = txq->tso_fw_assisted; tso->mbuf = mbuf; /* Find network protocol and header */ @@ -1059,6 +1062,8 @@ static void tso_fill_packet_with_fragment(struct sfxge_txq *txq, { efx_desc_t *desc; int n; + uint64_t dma_addr = tso->dma_addr; + boolean_t eop; if (tso->in_len == 0 || tso->packet_space == 0) return; @@ -1066,20 +1071,38 @@ static void tso_fill_packet_with_fragment(struct sfxge_txq *txq, KASSERT(tso->in_len > 0, ("TSO input length went negative")); KASSERT(tso->packet_space > 0, ("TSO packet space went negative")); - n = min(tso->in_len, tso->packet_space); + if (tso->fw_assisted & SFXGE_FATSOV2) { + n = tso->in_len; + tso->out_len -= n; + tso->seqnum += n; + tso->in_len = 0; + if (n < tso->packet_space) { + tso->packet_space -= n; + tso->segs_space--; + } else { + tso->packet_space = tso->seg_size - + (n - tso->packet_space) % tso->seg_size; + tso->segs_space = + EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1 - + (tso->packet_space != tso->seg_size); + } + } else { + n = min(tso->in_len, tso->packet_space); + tso->packet_space -= n; + tso->out_len -= n; + tso->dma_addr += n; + tso->in_len -= n; + } - tso->packet_space -= n; - tso->out_len -= n; - tso->in_len -= n; + /* + * It is OK to use binary OR below to avoid extra branching + * since all conditions may always be checked. + */ + eop = (tso->out_len == 0) | (tso->packet_space == 0) | + (tso->segs_space == 0); desc = &txq->pend_desc[txq->n_pend_desc++]; - efx_tx_qdesc_dma_create(txq->common, - tso->dma_addr, - n, - tso->out_len == 0 || tso->packet_space == 0, - desc); - - tso->dma_addr += n; + efx_tx_qdesc_dma_create(txq->common, dma_addr, n, eop, desc); } /* Callback from bus_dmamap_load() for long TSO headers. */ @@ -1112,28 +1135,47 @@ static int tso_start_new_packet(struct sfxge_txq *txq, int rc; if (tso->fw_assisted) { - uint8_t tcp_flags = tso->tcp_flags; + if (tso->fw_assisted & SFXGE_FATSOV2) { + /* Add 2 FATSOv2 option descriptors */ + desc = &txq->pend_desc[txq->n_pend_desc]; + efx_tx_qdesc_tso2_create(txq->common, + tso->packet_id, + tso->seqnum, + tso->seg_size, + desc, + EFX_TX_FATSOV2_OPT_NDESCS); + desc += EFX_TX_FATSOV2_OPT_NDESCS; + txq->n_pend_desc += EFX_TX_FATSOV2_OPT_NDESCS; + KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); + id = (id + EFX_TX_FATSOV2_OPT_NDESCS) & txq->ptr_mask; - if (tso->out_len > tso->seg_size) - tcp_flags &= ~(TH_FIN | TH_PUSH); + tso->segs_space = + EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1; + } else { + uint8_t tcp_flags = tso->tcp_flags; - /* TSO option descriptor */ - desc = &txq->pend_desc[txq->n_pend_desc++]; - efx_tx_qdesc_tso_create(txq->common, - tso->packet_id, - tso->seqnum, - tcp_flags, - desc++); - KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); - id = (id + 1) & txq->ptr_mask; + if (tso->out_len > tso->seg_size) + tcp_flags &= ~(TH_FIN | TH_PUSH); + + /* Add FATSOv1 option descriptor */ + desc = &txq->pend_desc[txq->n_pend_desc++]; + efx_tx_qdesc_tso_create(txq->common, + tso->packet_id, + tso->seqnum, + tcp_flags, + desc++); + KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); + id = (id + 1) & txq->ptr_mask; + + tso->seqnum += tso->seg_size; + tso->segs_space = UINT_MAX; + } /* Header DMA descriptor */ *desc = tso->header_desc; txq->n_pend_desc++; KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); id = (id + 1) & txq->ptr_mask; - - tso->seqnum += tso->seg_size; } else { /* Allocate a DMA-mapped header buffer. */ if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) { @@ -1215,6 +1257,8 @@ static int tso_start_new_packet(struct sfxge_txq *txq, 0, desc); id = (id + 1) & txq->ptr_mask; + + tso->segs_space = UINT_MAX; } tso->packet_space = tso->seg_size; txq->tso_packets++; @@ -1264,15 +1308,19 @@ sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf, } /* End of packet? */ - if (tso.packet_space == 0) { + if ((tso.packet_space == 0) | (tso.segs_space == 0)) { + unsigned int n_fatso_opt_desc = + (tso.fw_assisted & SFXGE_FATSOV2) ? + EFX_TX_FATSOV2_OPT_NDESCS : + (tso.fw_assisted & SFXGE_FATSOV1) ? 1 : 0; + /* If the queue is now full due to tiny MSS, * or we can't create another header, discard * the remainder of the input mbuf but do not * roll back the work we have done. */ - if (txq->n_pend_desc + tso.fw_assisted + - 1 /* header */ + n_dma_seg > - txq->max_pkt_desc) { + if (txq->n_pend_desc + n_fatso_opt_desc + + 1 /* header */ + n_dma_seg > txq->max_pkt_desc) { txq->tso_pdrop_too_many++; break; } @@ -1407,12 +1455,67 @@ sfxge_tx_qstop(struct sfxge_softc *sc, unsigned int index) SFXGE_TXQ_UNLOCK(txq); } +/* + * Estimate maximum number of Tx descriptors required for TSO packet. + * With minimum MSS and maximum mbuf length we might need more (even + * than a ring-ful of descriptors), but this should not happen in + * practice except due to deliberate attack. In that case we will + * truncate the output at a packet boundary. + */ +static unsigned int +sfxge_tx_max_pkt_desc(const struct sfxge_softc *sc, enum sfxge_txq_type type, + unsigned int tso_fw_assisted) +{ + /* One descriptor for every input fragment */ + unsigned int max_descs = SFXGE_TX_MAPPING_MAX_SEG; + unsigned int sw_tso_max_descs; + unsigned int fa_tso_v1_max_descs = 0; + unsigned int fa_tso_v2_max_descs = 0; + + /* VLAN tagging Tx option descriptor may be required */ + if (efx_nic_cfg_get(sc->enp)->enc_hw_tx_insert_vlan_enabled) + max_descs++; + + if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) { + /* + * Plus header and payload descriptor for each output segment. + * Minus one since header fragment is already counted. + * Even if FATSO is used, we should be ready to fallback + * to do it in the driver. + */ + sw_tso_max_descs = SFXGE_TSO_MAX_SEGS * 2 - 1; + + /* FW assisted TSOv1 requires one more descriptor per segment + * in comparison to SW TSO */ + if (tso_fw_assisted & SFXGE_FATSOV1) + fa_tso_v1_max_descs = + sw_tso_max_descs + SFXGE_TSO_MAX_SEGS; + + /* FW assisted TSOv2 requires 3 (2 FATSO plus header) extra + * descriptors per superframe limited by number of DMA fetches + * per packet. The first packet header is already counted. + */ + if (tso_fw_assisted & SFXGE_FATSOV2) { + fa_tso_v2_max_descs = + howmany(SFXGE_TX_MAPPING_MAX_SEG, + EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1) * + (EFX_TX_FATSOV2_OPT_NDESCS + 1) - 1; + } + + max_descs += MAX(sw_tso_max_descs, + MAX(fa_tso_v1_max_descs, fa_tso_v2_max_descs)); + } + + return (max_descs); +} + static int sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index) { struct sfxge_txq *txq; efsys_mem_t *esmp; uint16_t flags; + unsigned int tso_fw_assisted; struct sfxge_evq *evq; unsigned int desc_index; int rc; @@ -1434,6 +1537,7 @@ sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index) return (rc); /* Determine the kind of queue we are creating. */ + tso_fw_assisted = 0; switch (txq->type) { case SFXGE_TXQ_NON_CKSUM: flags = 0; @@ -1443,6 +1547,9 @@ sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index) break; case SFXGE_TXQ_IP_TCP_UDP_CKSUM: flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP; + tso_fw_assisted = sc->tso_fw_assisted; + if (tso_fw_assisted & SFXGE_FATSOV2) + flags |= EFX_TXQ_FATSOV2; break; default: KASSERT(0, ("Impossible TX queue")); @@ -1453,8 +1560,19 @@ sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index) /* Create the common code transmit queue. */ if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp, sc->txq_entries, txq->buf_base_id, flags, evq->common, - &txq->common, &desc_index)) != 0) - goto fail; + &txq->common, &desc_index)) != 0) { + /* Retry if no FATSOv2 resources, otherwise fail */ + if ((rc != ENOSPC) || (~flags & EFX_TXQ_FATSOV2)) + goto fail; + + /* Looks like all FATSOv2 contexts are used */ + flags &= ~EFX_TXQ_FATSOV2; + tso_fw_assisted &= ~SFXGE_FATSOV2; + if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp, + sc->txq_entries, txq->buf_base_id, flags, evq->common, + &txq->common, &desc_index)) != 0) + goto fail; + } /* Initialise queue descriptor indexes */ txq->added = txq->pending = txq->completed = txq->reaped = desc_index; @@ -1466,6 +1584,10 @@ sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index) txq->init_state = SFXGE_TXQ_STARTED; txq->flush_state = SFXGE_FLUSH_REQUIRED; + txq->tso_fw_assisted = tso_fw_assisted; + + txq->max_pkt_desc = sfxge_tx_max_pkt_desc(sc, txq->type, + tso_fw_assisted); SFXGE_TXQ_UNLOCK(txq); @@ -1574,38 +1696,6 @@ sfxge_tx_qfini(struct sfxge_softc *sc, unsigned int index) free(txq, M_SFXGE); } -/* - * Estimate maximum number of Tx descriptors required for TSO packet. - * With minimum MSS and maximum mbuf length we might need more (even - * than a ring-ful of descriptors), but this should not happen in - * practice except due to deliberate attack. In that case we will - * truncate the output at a packet boundary. - */ -static unsigned int -sfxge_tx_max_pkt_desc(const struct sfxge_softc *sc, enum sfxge_txq_type type) -{ - /* One descriptor for every input fragment */ - unsigned int max_descs = SFXGE_TX_MAPPING_MAX_SEG; - - /* VLAN tagging Tx option descriptor may be required */ - if (efx_nic_cfg_get(sc->enp)->enc_hw_tx_insert_vlan_enabled) - max_descs++; - - if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) { - /* - * Plus header and payload descriptor for each output segment. - * Minus one since header fragment is already counted. - */ - max_descs += SFXGE_TSO_MAX_SEGS * 2 - 1; - - /* FW assisted TSO requires one more descriptor per segment */ - if (sc->tso_fw_assisted) - max_descs += SFXGE_TSO_MAX_SEGS; - } - - return (max_descs); -} - static int sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index, enum sfxge_txq_type type, unsigned int evq_index) @@ -1735,8 +1825,6 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index, txq->init_state = SFXGE_TXQ_INITIALIZED; txq->hw_vlan_tci = 0; - txq->max_pkt_desc = sfxge_tx_max_pkt_desc(sc, type); - return (0); fail_txq_stat_init: @@ -1846,10 +1934,12 @@ sfxge_tx_init(struct sfxge_softc *sc) sc->txq_count = SFXGE_TXQ_NTYPES - 1 + sc->intr.n_alloc; sc->tso_fw_assisted = sfxge_tso_fw_assisted; - if (sc->tso_fw_assisted) - sc->tso_fw_assisted = - (encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO) && - (encp->enc_fw_assisted_tso_enabled); + if ((~encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO) || + (!encp->enc_fw_assisted_tso_enabled)) + sc->tso_fw_assisted &= ~SFXGE_FATSOV1; + if ((~encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO_V2) || + (!encp->enc_fw_assisted_tso_v2_enabled)) + sc->tso_fw_assisted &= ~SFXGE_FATSOV2; sc->txqs_node = SYSCTL_ADD_NODE( device_get_sysctl_ctx(sc->dev), diff --git a/sys/dev/sfxge/sfxge_tx.h b/sys/dev/sfxge/sfxge_tx.h index a18fdb7ce6a0..c24aee13ad50 100644 --- a/sys/dev/sfxge/sfxge_tx.h +++ b/sys/dev/sfxge/sfxge_tx.h @@ -170,6 +170,7 @@ struct sfxge_txq { struct sfxge_softc *sc; enum sfxge_txq_state init_state; enum sfxge_flush_state flush_state; + unsigned int tso_fw_assisted; enum sfxge_txq_type type; unsigned int txq_index; unsigned int evq_index; From 739ebba6198987bf2b54eb0701cf4cd7dd6a922c Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Fri, 15 Jan 2016 06:26:37 +0000 Subject: [PATCH 80/88] sfxge: medford stores a single global copy of VPD Not per PF copies as on Huntington. Submitted by: Mark Spender Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4935 --- sys/dev/sfxge/common/efx.h | 2 ++ sys/dev/sfxge/common/hunt_vpd.c | 34 ++++++++++++++++++++++-------- sys/dev/sfxge/common/medford_nic.c | 6 ++++++ 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/sys/dev/sfxge/common/efx.h b/sys/dev/sfxge/common/efx.h index 9f6dab9266d8..fcf9fc522f01 100644 --- a/sys/dev/sfxge/common/efx.h +++ b/sys/dev/sfxge/common/efx.h @@ -1162,6 +1162,8 @@ typedef struct efx_nic_cfg_s { /* External port identifier */ uint8_t enc_external_port; uint32_t enc_mcdi_max_payload_length; + /* VPD may be per-PF or global */ + boolean_t enc_vpd_is_global; } efx_nic_cfg_t; #define EFX_PCI_FUNCTION_IS_PF(_encp) ((_encp)->enc_vf == 0xffff) diff --git a/sys/dev/sfxge/common/hunt_vpd.c b/sys/dev/sfxge/common/hunt_vpd.c index 6ada785e456d..41b4b83fa381 100644 --- a/sys/dev/sfxge/common/hunt_vpd.c +++ b/sys/dev/sfxge/common/hunt_vpd.c @@ -48,13 +48,20 @@ ef10_vpd_init( caddr_t svpd; size_t svpd_size; uint32_t pci_pf; + uint32_t tag; efx_rc_t rc; EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_PROBE); EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON || enp->en_family == EFX_FAMILY_MEDFORD); - pci_pf = enp->en_nic_cfg.enc_pf; + if (enp->en_nic_cfg.enc_vpd_is_global) { + tag = TLV_TAG_GLOBAL_STATIC_VPD; + } else { + pci_pf = enp->en_nic_cfg.enc_pf; + tag = TLV_TAG_PF_STATIC_VPD(pci_pf); + } + /* * The VPD interface exposes VPD resources from the combined static and * dynamic VPD storage. As the static VPD configuration should *never* @@ -64,8 +71,7 @@ ef10_vpd_init( svpd_size = 0; rc = ef10_nvram_partn_read_tlv(enp, NVRAM_PARTITION_TYPE_STATIC_CONFIG, - TLV_TAG_PF_STATIC_VPD(pci_pf), - &svpd, &svpd_size); + tag, &svpd, &svpd_size); if (rc != 0) { if (rc == EACCES) { /* Unpriviledged functions cannot access VPD */ @@ -132,17 +138,22 @@ ef10_vpd_read( caddr_t dvpd; size_t dvpd_size; uint32_t pci_pf; + uint32_t tag; efx_rc_t rc; EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON || enp->en_family == EFX_FAMILY_MEDFORD); - pci_pf = enp->en_nic_cfg.enc_pf; + if (enp->en_nic_cfg.enc_vpd_is_global) { + tag = TLV_TAG_GLOBAL_DYNAMIC_VPD; + } else { + pci_pf = enp->en_nic_cfg.enc_pf; + tag = TLV_TAG_PF_DYNAMIC_VPD(pci_pf); + } if ((rc = ef10_nvram_partn_read_tlv(enp, NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG, - TLV_TAG_PF_DYNAMIC_VPD(pci_pf), - &dvpd, &dvpd_size)) != 0) + tag, &dvpd, &dvpd_size)) != 0) goto fail1; if (dvpd_size > size) { @@ -396,12 +407,18 @@ ef10_vpd_write( { size_t vpd_length; uint32_t pci_pf; + uint32_t tag; efx_rc_t rc; EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON || enp->en_family == EFX_FAMILY_MEDFORD); - pci_pf = enp->en_nic_cfg.enc_pf; + if (enp->en_nic_cfg.enc_vpd_is_global) { + tag = TLV_TAG_GLOBAL_DYNAMIC_VPD; + } else { + pci_pf = enp->en_nic_cfg.enc_pf; + tag = TLV_TAG_PF_DYNAMIC_VPD(pci_pf); + } /* Determine total length of new dynamic VPD */ if ((rc = efx_vpd_hunk_length(data, size, &vpd_length)) != 0) @@ -410,8 +427,7 @@ ef10_vpd_write( /* Store new dynamic VPD in all segments in DYNAMIC_CONFIG partition */ if ((rc = ef10_nvram_partn_write_segment_tlv(enp, NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG, - TLV_TAG_PF_DYNAMIC_VPD(pci_pf), - data, vpd_length, B_TRUE)) != 0) { + tag, data, vpd_length, B_TRUE)) != 0) { goto fail2; } diff --git a/sys/dev/sfxge/common/medford_nic.c b/sys/dev/sfxge/common/medford_nic.c index 149be1790dba..f94cafc0d25e 100644 --- a/sys/dev/sfxge/common/medford_nic.c +++ b/sys/dev/sfxge/common/medford_nic.c @@ -212,6 +212,12 @@ medford_board_cfg( */ encp->enc_tx_tso_tcp_header_offset_limit = EF10_TCP_HEADER_OFFSET_LIMIT; + /* + * Medford stores a single global copy of VPD, not per-PF as on + * Huntington. + */ + encp->enc_vpd_is_global = B_TRUE; + return (0); fail11: From 80af6f263cbb8af9aee010fc9d2add95bd88a966 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Fri, 15 Jan 2016 06:27:51 +0000 Subject: [PATCH 81/88] sfxge: Medford still needs fallback for no privilege mask Submitted by: Mark Spender Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4936 --- sys/dev/sfxge/common/ef10_impl.h | 5 +++ sys/dev/sfxge/common/hunt_impl.h | 4 +-- sys/dev/sfxge/common/hunt_nic.c | 52 ++++++++++++++++++++++-------- sys/dev/sfxge/common/medford_nic.c | 3 +- 4 files changed, 46 insertions(+), 18 deletions(-) diff --git a/sys/dev/sfxge/common/ef10_impl.h b/sys/dev/sfxge/common/ef10_impl.h index d6fab74edfdf..0c687ae1b452 100644 --- a/sys/dev/sfxge/common/ef10_impl.h +++ b/sys/dev/sfxge/common/ef10_impl.h @@ -81,6 +81,11 @@ extern __checkReturn efx_rc_t ef10_get_datapath_caps( __in efx_nic_t *enp); +extern __checkReturn efx_rc_t +ef10_get_privilege_mask( + __in efx_nic_t *enp, + __out uint32_t *maskp); + extern __checkReturn efx_rc_t ef10_external_port_mapping( __in efx_nic_t *enp, diff --git a/sys/dev/sfxge/common/hunt_impl.h b/sys/dev/sfxge/common/hunt_impl.h index 6f6da9235a49..028605141e8e 100644 --- a/sys/dev/sfxge/common/hunt_impl.h +++ b/sys/dev/sfxge/common/hunt_impl.h @@ -742,7 +742,7 @@ ef10_tx_qstats_update( #define HUNT_MIN_PIO_ALLOC_SIZE (HUNT_PIOBUF_SIZE / 32) -#define HUNT_LEGACY_PF_PRIVILEGE_MASK \ +#define EF10_LEGACY_PF_PRIVILEGE_MASK \ (MC_CMD_PRIVILEGE_MASK_IN_GRP_ADMIN | \ MC_CMD_PRIVILEGE_MASK_IN_GRP_LINK | \ MC_CMD_PRIVILEGE_MASK_IN_GRP_ONLOAD | \ @@ -755,7 +755,7 @@ ef10_tx_qstats_update( MC_CMD_PRIVILEGE_MASK_IN_GRP_ALL_MULTICAST | \ MC_CMD_PRIVILEGE_MASK_IN_GRP_PROMISCUOUS) -#define HUNT_LEGACY_VF_PRIVILEGE_MASK 0 +#define EF10_LEGACY_VF_PRIVILEGE_MASK 0 typedef uint32_t efx_piobuf_handle_t; diff --git a/sys/dev/sfxge/common/hunt_nic.c b/sys/dev/sfxge/common/hunt_nic.c index da04fd3dfc43..77dbf1edbe77 100644 --- a/sys/dev/sfxge/common/hunt_nic.c +++ b/sys/dev/sfxge/common/hunt_nic.c @@ -980,6 +980,42 @@ ef10_get_datapath_caps( return (rc); } + + __checkReturn efx_rc_t +ef10_get_privilege_mask( + __in efx_nic_t *enp, + __out uint32_t *maskp) +{ + efx_nic_cfg_t *encp = &(enp->en_nic_cfg); + uint32_t mask; + efx_rc_t rc; + + if ((rc = efx_mcdi_privilege_mask(enp, encp->enc_pf, encp->enc_vf, + &mask)) != 0) { + if (rc != ENOTSUP) + goto fail1; + + /* Fallback for old firmware without privilege mask support */ + if (EFX_PCI_FUNCTION_IS_PF(encp)) { + /* Assume PF has admin privilege */ + mask = EF10_LEGACY_PF_PRIVILEGE_MASK; + } else { + /* VF is always unprivileged by default */ + mask = EF10_LEGACY_VF_PRIVILEGE_MASK; + } + } + + *maskp = mask; + + return (0); + +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + + /* * The external port mapping is a one-based numbering of the external * connectors on the board. It does not distinguish off-board separated @@ -1312,20 +1348,8 @@ hunt_board_cfg( * the privilege mask to check for sufficient privileges, as that * can result in time-of-check/time-of-use bugs. */ - if ((rc = efx_mcdi_privilege_mask(enp, pf, vf, &mask)) != 0) { - if (rc != ENOTSUP) - goto fail13; - - /* Fallback for old firmware without privilege mask support */ - if (EFX_PCI_FUNCTION_IS_PF(encp)) { - /* Assume PF has admin privilege */ - mask = HUNT_LEGACY_PF_PRIVILEGE_MASK; - } else { - /* VF is always unprivileged by default */ - mask = HUNT_LEGACY_VF_PRIVILEGE_MASK; - } - } - + if ((rc = ef10_get_privilege_mask(enp, &mask)) != 0) + goto fail13; encp->enc_privilege_mask = mask; /* Get interrupt vector limits */ diff --git a/sys/dev/sfxge/common/medford_nic.c b/sys/dev/sfxge/common/medford_nic.c index f94cafc0d25e..21ba8e0c5967 100644 --- a/sys/dev/sfxge/common/medford_nic.c +++ b/sys/dev/sfxge/common/medford_nic.c @@ -189,9 +189,8 @@ medford_board_cfg( * the privilege mask to check for sufficient privileges, as that * can result in time-of-check/time-of-use bugs. */ - if ((rc = efx_mcdi_privilege_mask(enp, pf, vf, &mask)) != 0) + if ((rc = ef10_get_privilege_mask(enp, &mask)) != 0) goto fail10; - encp->enc_privilege_mask = mask; /* Get interrupt vector limits */ From 5d846e87968b9e98f59a3f950ffb5c6efb9f9022 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Fri, 15 Jan 2016 06:28:58 +0000 Subject: [PATCH 82/88] sfxge: convert nvram rw_start method to use partition id Submitted by: Andy Moreton Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4937 --- sys/dev/sfxge/common/efx_impl.h | 2 +- sys/dev/sfxge/common/efx_nvram.c | 14 ++++++++++---- sys/dev/sfxge/common/hunt_impl.h | 12 ++++++------ sys/dev/sfxge/common/hunt_nvram.c | 12 +++--------- sys/dev/sfxge/common/siena_impl.h | 12 ++++++------ sys/dev/sfxge/common/siena_nvram.c | 12 +++--------- 6 files changed, 29 insertions(+), 35 deletions(-) diff --git a/sys/dev/sfxge/common/efx_impl.h b/sys/dev/sfxge/common/efx_impl.h index 31f45a5af77f..6f9df035aa3a 100644 --- a/sys/dev/sfxge/common/efx_impl.h +++ b/sys/dev/sfxge/common/efx_impl.h @@ -485,7 +485,6 @@ typedef struct efx_nvram_ops_s { #endif /* EFSYS_OPT_DIAG */ efx_rc_t (*envo_get_version)(efx_nic_t *, efx_nvram_type_t, uint32_t *, uint16_t *); - efx_rc_t (*envo_rw_start)(efx_nic_t *, efx_nvram_type_t, size_t *); efx_rc_t (*envo_read_chunk)(efx_nic_t *, efx_nvram_type_t, unsigned int, caddr_t, size_t); efx_rc_t (*envo_erase)(efx_nic_t *, efx_nvram_type_t); @@ -498,6 +497,7 @@ typedef struct efx_nvram_ops_s { efx_rc_t (*envo_type_to_partn)(efx_nic_t *, efx_nvram_type_t, uint32_t *); efx_rc_t (*envo_partn_size)(efx_nic_t *, uint32_t, size_t *); + efx_rc_t (*envo_partn_rw_start)(efx_nic_t *, uint32_t, size_t *); } efx_nvram_ops_t; #endif /* EFSYS_OPT_NVRAM */ diff --git a/sys/dev/sfxge/common/efx_nvram.c b/sys/dev/sfxge/common/efx_nvram.c index a30548b8848c..abb1dd7508b9 100644 --- a/sys/dev/sfxge/common/efx_nvram.c +++ b/sys/dev/sfxge/common/efx_nvram.c @@ -43,7 +43,6 @@ static efx_nvram_ops_t __efx_nvram_falcon_ops = { falcon_nvram_test, /* envo_test */ #endif /* EFSYS_OPT_DIAG */ falcon_nvram_get_version, /* envo_get_version */ - falcon_nvram_rw_start, /* envo_rw_start */ falcon_nvram_read_chunk, /* envo_read_chunk */ falcon_nvram_erase, /* envo_erase */ falcon_nvram_write_chunk, /* envo_write_chunk */ @@ -51,6 +50,7 @@ static efx_nvram_ops_t __efx_nvram_falcon_ops = { falcon_nvram_set_version, /* envo_set_version */ falcon_nvram_type_to_partn, /* envo_type_to_partn */ falcon_nvram_partn_size, /* envo_partn_size */ + falcon_nvram_partn_rw_start, /* envo_partn_rw_start */ }; #endif /* EFSYS_OPT_FALCON */ @@ -62,7 +62,6 @@ static efx_nvram_ops_t __efx_nvram_siena_ops = { siena_nvram_test, /* envo_test */ #endif /* EFSYS_OPT_DIAG */ siena_nvram_get_version, /* envo_get_version */ - siena_nvram_rw_start, /* envo_rw_start */ siena_nvram_read_chunk, /* envo_read_chunk */ siena_nvram_erase, /* envo_erase */ siena_nvram_write_chunk, /* envo_write_chunk */ @@ -70,6 +69,7 @@ static efx_nvram_ops_t __efx_nvram_siena_ops = { siena_nvram_set_version, /* envo_set_version */ siena_nvram_type_to_partn, /* envo_type_to_partn */ siena_nvram_partn_size, /* envo_partn_size */ + siena_nvram_partn_rw_start, /* envo_partn_rw_start */ }; #endif /* EFSYS_OPT_SIENA */ @@ -81,7 +81,6 @@ static efx_nvram_ops_t __efx_nvram_ef10_ops = { ef10_nvram_test, /* envo_test */ #endif /* EFSYS_OPT_DIAG */ ef10_nvram_get_version, /* envo_get_version */ - ef10_nvram_rw_start, /* envo_rw_start */ ef10_nvram_read_chunk, /* envo_read_chunk */ ef10_nvram_erase, /* envo_erase */ ef10_nvram_write_chunk, /* envo_write_chunk */ @@ -89,6 +88,7 @@ static efx_nvram_ops_t __efx_nvram_ef10_ops = { ef10_nvram_set_version, /* envo_set_version */ ef10_nvram_type_to_partn, /* envo_type_to_partn */ ef10_nvram_partn_size, /* envo_partn_size */ + ef10_nvram_partn_rw_start, /* envo_partn_rw_start */ }; #endif /* EFSYS_OPT_HUNTINGTON || EFSYS_OPT_MEDFORD */ @@ -237,6 +237,7 @@ efx_nvram_rw_start( __out_opt size_t *chunk_sizep) { efx_nvram_ops_t *envop = enp->en_envop; + uint32_t partn; efx_rc_t rc; EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); @@ -247,13 +248,18 @@ efx_nvram_rw_start( EFSYS_ASSERT3U(enp->en_nvram_locked, ==, EFX_NVRAM_INVALID); - if ((rc = envop->envo_rw_start(enp, type, chunk_sizep)) != 0) + if ((rc = envop->envo_type_to_partn(enp, type, &partn)) != 0) goto fail1; + if ((rc = envop->envo_partn_rw_start(enp, partn, chunk_sizep)) != 0) + goto fail2; + enp->en_nvram_locked = type; return (0); +fail2: + EFSYS_PROBE(fail2); fail1: EFSYS_PROBE1(fail1, efx_rc_t, rc); diff --git a/sys/dev/sfxge/common/hunt_impl.h b/sys/dev/sfxge/common/hunt_impl.h index 028605141e8e..646817724b71 100644 --- a/sys/dev/sfxge/common/hunt_impl.h +++ b/sys/dev/sfxge/common/hunt_impl.h @@ -416,12 +416,6 @@ ef10_nvram_get_version( __out uint32_t *subtypep, __out_ecount(4) uint16_t version[4]); -extern __checkReturn efx_rc_t -ef10_nvram_rw_start( - __in efx_nic_t *enp, - __in efx_nvram_type_t type, - __out size_t *pref_chunkp); - extern __checkReturn efx_rc_t ef10_nvram_read_chunk( __in efx_nic_t *enp, @@ -472,6 +466,12 @@ ef10_nvram_partn_size( __in uint32_t partn, __out size_t *sizep); +extern __checkReturn efx_rc_t +ef10_nvram_partn_rw_start( + __in efx_nic_t *enp, + __in uint32_t partn, + __out size_t *chunk_sizep); + #endif /* EFSYS_OPT_NVRAM */ diff --git a/sys/dev/sfxge/common/hunt_nvram.c b/sys/dev/sfxge/common/hunt_nvram.c index 5498ab0e5f55..17dc4ef122d1 100644 --- a/sys/dev/sfxge/common/hunt_nvram.c +++ b/sys/dev/sfxge/common/hunt_nvram.c @@ -1747,27 +1747,21 @@ ef10_nvram_get_version( } __checkReturn efx_rc_t -ef10_nvram_rw_start( +ef10_nvram_partn_rw_start( __in efx_nic_t *enp, - __in efx_nvram_type_t type, + __in uint32_t partn, __out size_t *chunk_sizep) { - uint32_t partn; efx_rc_t rc; - if ((rc = ef10_nvram_type_to_partn(enp, type, &partn)) != 0) - goto fail1; - if ((rc = ef10_nvram_partn_lock(enp, partn)) != 0) - goto fail2; + goto fail1; if (chunk_sizep != NULL) *chunk_sizep = EF10_NVRAM_CHUNK; return (0); -fail2: - EFSYS_PROBE(fail2); fail1: EFSYS_PROBE1(fail1, efx_rc_t, rc); diff --git a/sys/dev/sfxge/common/siena_impl.h b/sys/dev/sfxge/common/siena_impl.h index 13bc242485f6..95c15ff2c9ac 100644 --- a/sys/dev/sfxge/common/siena_impl.h +++ b/sys/dev/sfxge/common/siena_impl.h @@ -216,12 +216,6 @@ siena_nvram_get_version( __out uint32_t *subtypep, __out_ecount(4) uint16_t version[4]); -extern __checkReturn efx_rc_t -siena_nvram_rw_start( - __in efx_nic_t *enp, - __in efx_nvram_type_t type, - __out size_t *pref_chunkp); - extern __checkReturn efx_rc_t siena_nvram_read_chunk( __in efx_nic_t *enp, @@ -266,6 +260,12 @@ siena_nvram_partn_size( __in uint32_t partn, __out size_t *sizep); +extern __checkReturn efx_rc_t +siena_nvram_partn_rw_start( + __in efx_nic_t *enp, + __in uint32_t partn, + __out size_t *chunk_sizep); + #endif /* EFSYS_OPT_NVRAM */ #if EFSYS_OPT_VPD diff --git a/sys/dev/sfxge/common/siena_nvram.c b/sys/dev/sfxge/common/siena_nvram.c index f19314261ad9..67e689ab4436 100644 --- a/sys/dev/sfxge/common/siena_nvram.c +++ b/sys/dev/sfxge/common/siena_nvram.c @@ -572,27 +572,21 @@ siena_nvram_get_version( } __checkReturn efx_rc_t -siena_nvram_rw_start( +siena_nvram_partn_rw_start( __in efx_nic_t *enp, - __in efx_nvram_type_t type, + __in uint32_t partn, __out size_t *chunk_sizep) { - uint32_t partn; efx_rc_t rc; - if ((rc = siena_nvram_type_to_partn(enp, type, &partn)) != 0) - goto fail1; - if ((rc = siena_nvram_partn_lock(enp, partn)) != 0) - goto fail2; + goto fail1; if (chunk_sizep != NULL) *chunk_sizep = SIENA_NVRAM_CHUNK; return (0); -fail2: - EFSYS_PROBE(fail2); fail1: EFSYS_PROBE1(fail1, efx_rc_t, rc); From b936cc8d7337c315561782e32012ccd6d7204944 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Fri, 15 Jan 2016 06:30:46 +0000 Subject: [PATCH 83/88] sfxge: cleanup: simplify ef10_get_datapath_caps Submitted by: Andy Moreton Reviewed by: gnn Sponsored by: Solarflare Communications, Inc. MFC after: 2 days Differential Revision: https://reviews.freebsd.org/D4938 --- sys/dev/sfxge/common/hunt_nic.c | 86 ++++++++++++++------------------- 1 file changed, 35 insertions(+), 51 deletions(-) diff --git a/sys/dev/sfxge/common/hunt_nic.c b/sys/dev/sfxge/common/hunt_nic.c index 77dbf1edbe77..08eaa9c3761e 100644 --- a/sys/dev/sfxge/common/hunt_nic.c +++ b/sys/dev/sfxge/common/hunt_nic.c @@ -430,8 +430,8 @@ efx_mcdi_get_vector_cfg( static __checkReturn efx_rc_t efx_mcdi_get_capabilities( __in efx_nic_t *enp, - __out efx_dword_t *flagsp, - __out efx_dword_t *flags2p) + __out uint32_t *flagsp, + __out uint32_t *flags2p) { efx_mcdi_req_t req; uint8_t payload[MAX(MC_CMD_GET_CAPABILITIES_IN_LEN, @@ -457,13 +457,12 @@ efx_mcdi_get_capabilities( goto fail2; } - *flagsp = *MCDI_OUT2(req, efx_dword_t, GET_CAPABILITIES_OUT_FLAGS1); + *flagsp = MCDI_OUT_DWORD(req, GET_CAPABILITIES_OUT_FLAGS1); if (req.emr_out_length_used < MC_CMD_GET_CAPABILITIES_V2_OUT_LEN) - EFX_ZERO_DWORD(*flags2p); + *flags2p = 0; else - *flags2p = *MCDI_OUT2(req, efx_dword_t, - GET_CAPABILITIES_V2_OUT_FLAGS2); + *flags2p = MCDI_OUT_DWORD(req, GET_CAPABILITIES_V2_OUT_FLAGS2); return (0); @@ -897,78 +896,63 @@ ef10_get_datapath_caps( __in efx_nic_t *enp) { efx_nic_cfg_t *encp = &(enp->en_nic_cfg); - efx_dword_t datapath_capabilities; - efx_dword_t datapath_capabilities_v2; + uint32_t flags; + uint32_t flags2; efx_rc_t rc; - if ((rc = efx_mcdi_get_capabilities(enp, &datapath_capabilities, - &datapath_capabilities_v2)) != 0) + if ((rc = efx_mcdi_get_capabilities(enp, &flags, &flags2)) != 0) goto fail1; +#define CAP_FLAG(flags1, field) \ + ((flags1) & (1 << (MC_CMD_GET_CAPABILITIES_V2_OUT_ ## field ## _LBN))) + +#define CAP_FLAG2(flags2, field) \ + ((flags2) & (1 << (MC_CMD_GET_CAPABILITIES_V2_OUT_ ## field ## _LBN))) + /* * Huntington RXDP firmware inserts a 0 or 14 byte prefix. * We only support the 14 byte prefix here. */ - if (MCDI_CMD_DWORD_FIELD(&datapath_capabilities, - GET_CAPABILITIES_OUT_RX_PREFIX_LEN_14) != 1) { + if (CAP_FLAG(flags, RX_PREFIX_LEN_14) == 0) { rc = ENOTSUP; goto fail2; } encp->enc_rx_prefix_size = 14; /* Check if the firmware supports TSO */ - if (MCDI_CMD_DWORD_FIELD(&datapath_capabilities, - GET_CAPABILITIES_OUT_TX_TSO) == 1) - encp->enc_fw_assisted_tso_enabled = B_TRUE; - else - encp->enc_fw_assisted_tso_enabled = B_FALSE; + encp->enc_fw_assisted_tso_enabled = + CAP_FLAG(flags, TX_TSO) ? B_TRUE : B_FALSE; /* Check if the firmware supports FATSOv2 */ - if (MCDI_CMD_DWORD_FIELD(&datapath_capabilities_v2, - GET_CAPABILITIES_V2_OUT_TX_TSO_V2) == 1) - encp->enc_fw_assisted_tso_v2_enabled = B_TRUE; - else - encp->enc_fw_assisted_tso_v2_enabled = B_FALSE; + encp->enc_fw_assisted_tso_v2_enabled = + CAP_FLAG2(flags2, TX_TSO_V2) ? B_TRUE : B_FALSE; /* Check if the firmware has vadapter/vport/vswitch support */ - if (MCDI_CMD_DWORD_FIELD(&datapath_capabilities, - GET_CAPABILITIES_OUT_EVB) == 1) - encp->enc_datapath_cap_evb = B_TRUE; - else - encp->enc_datapath_cap_evb = B_FALSE; + encp->enc_datapath_cap_evb = + CAP_FLAG(flags, EVB) ? B_TRUE : B_FALSE; /* Check if the firmware supports VLAN insertion */ - if (MCDI_CMD_DWORD_FIELD(&datapath_capabilities, - GET_CAPABILITIES_OUT_TX_VLAN_INSERTION) == 1) - encp->enc_hw_tx_insert_vlan_enabled = B_TRUE; - else - encp->enc_hw_tx_insert_vlan_enabled = B_FALSE; + encp->enc_hw_tx_insert_vlan_enabled = + CAP_FLAG(flags, TX_VLAN_INSERTION) ? B_TRUE : B_FALSE; /* Check if the firmware supports RX event batching */ - if (MCDI_CMD_DWORD_FIELD(&datapath_capabilities, - GET_CAPABILITIES_OUT_RX_BATCHING) == 1) { - encp->enc_rx_batching_enabled = B_TRUE; + encp->enc_rx_batching_enabled = + CAP_FLAG(flags, RX_BATCHING) ? B_TRUE : B_FALSE; + + if (encp->enc_rx_batching_enabled) encp->enc_rx_batch_max = 16; - } else { - encp->enc_rx_batching_enabled = B_FALSE; - } /* Check if the firmware supports disabling scatter on RXQs */ - if (MCDI_CMD_DWORD_FIELD(&datapath_capabilities, - GET_CAPABILITIES_OUT_RX_DISABLE_SCATTER) == 1) { - encp->enc_rx_disable_scatter_supported = B_TRUE; - } else { - encp->enc_rx_disable_scatter_supported = B_FALSE; - } + encp->enc_rx_disable_scatter_supported = + CAP_FLAG(flags, RX_DISABLE_SCATTER) ? B_TRUE : B_FALSE; /* Check if the firmware supports set mac with running filters */ - if (MCDI_CMD_DWORD_FIELD(&datapath_capabilities, - GET_CAPABILITIES_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED) - == 1) { - encp->enc_allow_set_mac_with_installed_filters = B_TRUE; - } else { - encp->enc_allow_set_mac_with_installed_filters = B_FALSE; - } + encp->enc_allow_set_mac_with_installed_filters = + CAP_FLAG(flags, VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED) ? + B_TRUE : B_FALSE; + +#undef CAP_FLAG +#undef CAP_FLAG2 return (0); From d3bf8f64866f8ef5929418ae19fd4c9ce3344b9f Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Fri, 15 Jan 2016 09:05:14 +0000 Subject: [PATCH 84/88] Make nfscl_getmyip() use new routing KPI. * Use standard IPv6 SAS instead of rt->rt_ifa address. * Make address lookup work for IPv6 LLA. * Save address into buffer provided by caller instead of using static vars. Discussed with: rmacklem --- sys/fs/nfs/nfs_var.h | 2 +- sys/fs/nfsclient/nfs_clport.c | 112 +++++++++++++++----------------- sys/fs/nfsclient/nfs_clrpcops.c | 3 +- 3 files changed, 57 insertions(+), 60 deletions(-) diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 23475b29e141..3b494a51fa88 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -306,7 +306,7 @@ void nfscl_reqstart(struct nfsrv_descript *, int, struct nfsmount *, nfsuint64 *nfscl_getcookie(struct nfsnode *, off_t off, int); void nfscl_fillsattr(struct nfsrv_descript *, struct vattr *, vnode_t, int, u_int32_t); -u_int8_t *nfscl_getmyip(struct nfsmount *, int *); +u_int8_t *nfscl_getmyip(struct nfsmount *, struct in6_addr *, int *); int nfsm_getfh(struct nfsrv_descript *, struct nfsfh **); int nfscl_mtofh(struct nfsrv_descript *, struct nfsfh **, struct nfsvattr *, int *); diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index cdccbc897c61..44085066216f 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -34,6 +34,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_inet.h" #include "opt_inet6.h" #include @@ -46,7 +47,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#include #include #include @@ -1038,73 +1041,66 @@ nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp) } /* - * Get a pointer to my IP addrress and return it. - * Return NULL if you can't find one. + * Lookups source address which should be used to communicate with + * @nmp and stores it inside @pdst. + * + * Returns 0 on success. */ u_int8_t * -nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) +nfscl_getmyip(struct nfsmount *nmp, struct in6_addr *paddr, int *isinet6p) { - struct sockaddr_in sad, *sin; - struct rtentry *rt; - u_int8_t *retp = NULL; - static struct in_addr laddr; + int error, fibnum; - *isinet6p = 0; - /* - * Loop up a route for the destination address. - */ + fibnum = curthread->td_proc->p_fibnum; + +#ifdef INET if (nmp->nm_nam->sa_family == AF_INET) { - bzero(&sad, sizeof (sad)); - sin = (struct sockaddr_in *)nmp->nm_nam; - sad.sin_family = AF_INET; - sad.sin_len = sizeof (struct sockaddr_in); - sad.sin_addr.s_addr = sin->sin_addr.s_addr; - CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); - rt = rtalloc1_fib((struct sockaddr *)&sad, 0, 0UL, - curthread->td_proc->p_fibnum); - if (rt != NULL) { - if (rt->rt_ifp != NULL && - rt->rt_ifa != NULL && - ((rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) && - rt->rt_ifa->ifa_addr->sa_family == AF_INET) { - sin = (struct sockaddr_in *) - rt->rt_ifa->ifa_addr; - laddr.s_addr = sin->sin_addr.s_addr; - retp = (u_int8_t *)&laddr; - } - RTFREE_LOCKED(rt); - } - CURVNET_RESTORE(); -#ifdef INET6 - } else if (nmp->nm_nam->sa_family == AF_INET6) { - struct sockaddr_in6 sad6, *sin6; - static struct in6_addr laddr6; + struct sockaddr_in *sin; + struct nhop4_extended nh_ext; - bzero(&sad6, sizeof (sad6)); - sin6 = (struct sockaddr_in6 *)nmp->nm_nam; - sad6.sin6_family = AF_INET6; - sad6.sin6_len = sizeof (struct sockaddr_in6); - sad6.sin6_addr = sin6->sin6_addr; + sin = (struct sockaddr_in *)nmp->nm_nam; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); - rt = rtalloc1_fib((struct sockaddr *)&sad6, 0, 0UL, - curthread->td_proc->p_fibnum); - if (rt != NULL) { - if (rt->rt_ifp != NULL && - rt->rt_ifa != NULL && - ((rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) && - rt->rt_ifa->ifa_addr->sa_family == AF_INET6) { - sin6 = (struct sockaddr_in6 *) - rt->rt_ifa->ifa_addr; - laddr6 = sin6->sin6_addr; - retp = (u_int8_t *)&laddr6; - *isinet6p = 1; - } - RTFREE_LOCKED(rt); - } + error = fib4_lookup_nh_ext(fibnum, sin->sin_addr, 0, 0, + &nh_ext); CURVNET_RESTORE(); -#endif + if (error != 0) + return (NULL); + + if ((ntohl(nh_ext.nh_src.s_addr) >> IN_CLASSA_NSHIFT) == + IN_LOOPBACKNET) { + /* Ignore loopback addresses */ + return (NULL); + } + + *isinet6p = 0; + *((struct in_addr *)paddr) = nh_ext.nh_src; + + return (u_int8_t *)paddr; } - return (retp); +#endif +#ifdef INET6 + if (nmp->nm_nam->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)nmp->nm_nam; + + CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); + error = in6_selectsrc_addr(fibnum, &sin6->sin6_addr, + sin6->sin6_scope_id, NULL, paddr, NULL); + CURVNET_RESTORE(); + if (error != 0) + return (NULL); + + if (IN6_IS_ADDR_LOOPBACK(paddr)) + return (NULL); + + /* Scope is embedded in */ + *isinet6p = 1; + + return (u_int8_t *)paddr; + } +#endif + return (NULL); } /* diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 710994442ff1..a7d47f0ab9e0 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -829,6 +829,7 @@ nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, u_int32_t lease; static u_int32_t rev = 0; struct nfsclds *dsp, *ndsp, *tdsp; + struct in6_addr a6; if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); @@ -889,7 +890,7 @@ nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, *tl = txdr_unsigned(NFS_CALLBCKPROG); callblen = strlen(nfsv4_callbackaddr); if (callblen == 0) - cp = nfscl_getmyip(nmp, &isinet6); + cp = nfscl_getmyip(nmp, &a6, &isinet6); if (nfscl_enablecallb && nfs_numnfscbd > 0 && (callblen > 0 || cp != NULL)) { port = htons(nfsv4_cbport); From 7d1333938f937dac1ba599abc6e4e4483241b589 Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Fri, 15 Jan 2016 11:18:58 +0000 Subject: [PATCH 85/88] Implement support for PCI suspend, resume and shutdown events in the LinuxKPI. Fix a few spaces to tabs. Bump the FreeBSD version to force recompilation of existing KMODs. MFC after: 1 week Sponsored by: Mellanox Technologies --- .../linuxkpi/common/include/linux/pci.h | 5 +- sys/compat/linuxkpi/common/src/linux_pci.c | 46 +++++++++++++++++++ sys/sys/param.h | 2 +- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/sys/compat/linuxkpi/common/include/linux/pci.h b/sys/compat/linuxkpi/common/include/linux/pci.h index 54ea99c5e8e4..0e6dbfd0bb4e 100644 --- a/sys/compat/linuxkpi/common/include/linux/pci.h +++ b/sys/compat/linuxkpi/common/include/linux/pci.h @@ -129,8 +129,9 @@ struct pci_driver { const struct pci_device_id *id_table; int (*probe)(struct pci_dev *dev, const struct pci_device_id *id); void (*remove)(struct pci_dev *dev); - int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ - int (*resume) (struct pci_dev *dev); /* Device woken up */ + int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ + int (*resume) (struct pci_dev *dev); /* Device woken up */ + void (*shutdown) (struct pci_dev *dev); /* Device shutdown */ driver_t driver; devclass_t bsdclass; const struct pci_error_handlers *err_handler; diff --git a/sys/compat/linuxkpi/common/src/linux_pci.c b/sys/compat/linuxkpi/common/src/linux_pci.c index 7c49a54a6dc0..74d4f7711297 100644 --- a/sys/compat/linuxkpi/common/src/linux_pci.c +++ b/sys/compat/linuxkpi/common/src/linux_pci.c @@ -61,11 +61,17 @@ __FBSDID("$FreeBSD$"); static device_probe_t linux_pci_probe; static device_attach_t linux_pci_attach; static device_detach_t linux_pci_detach; +static device_suspend_t linux_pci_suspend; +static device_resume_t linux_pci_resume; +static device_shutdown_t linux_pci_shutdown; static device_method_t pci_methods[] = { DEVMETHOD(device_probe, linux_pci_probe), DEVMETHOD(device_attach, linux_pci_attach), DEVMETHOD(device_detach, linux_pci_detach), + DEVMETHOD(device_suspend, linux_pci_suspend), + DEVMETHOD(device_resume, linux_pci_resume), + DEVMETHOD(device_shutdown, linux_pci_shutdown), DEVMETHOD_END }; @@ -169,6 +175,46 @@ linux_pci_detach(device_t dev) return (0); } +static int +linux_pci_suspend(device_t dev) +{ + struct pm_message pm = { }; + struct pci_dev *pdev; + int err; + + pdev = device_get_softc(dev); + if (pdev->pdrv->suspend != NULL) + err = -pdev->pdrv->suspend(pdev, pm); + else + err = 0; + return (err); +} + +static int +linux_pci_resume(device_t dev) +{ + struct pci_dev *pdev; + int err; + + pdev = device_get_softc(dev); + if (pdev->pdrv->resume != NULL) + err = -pdev->pdrv->resume(pdev); + else + err = 0; + return (err); +} + +static int +linux_pci_shutdown(device_t dev) +{ + struct pci_dev *pdev; + + pdev = device_get_softc(dev); + if (pdev->pdrv->shutdown != NULL) + pdev->pdrv->shutdown(pdev); + return (0); +} + int pci_register_driver(struct pci_driver *pdrv) { diff --git a/sys/sys/param.h b/sys/sys/param.h index 3c9747aa4045..6e25339e1f33 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1100093 /* Master, propagated to newvers */ +#define __FreeBSD_version 1100094 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, From ed5c72c308cebea5719ec818c085ccda4f65cb1a Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Fri, 15 Jan 2016 11:40:41 +0000 Subject: [PATCH 86/88] Remove the commented out sc device from the i.MX kernels, they both use vt(4). --- sys/arm/conf/IMX53 | 8 -------- sys/arm/conf/IMX6 | 8 -------- 2 files changed, 16 deletions(-) diff --git a/sys/arm/conf/IMX53 b/sys/arm/conf/IMX53 index 7fe97a290a02..972437516e38 100644 --- a/sys/arm/conf/IMX53 +++ b/sys/arm/conf/IMX53 @@ -136,11 +136,3 @@ device wlan_amrr # AMRR transmit rate control algorithm # Flattened Device Tree options FDT # Configure using FDT/DTB data - -# NOTE: serial console will be disabled if syscons enabled -# Uncomment following lines for framebuffer/syscons support -#device sc -#device vt -#device kbdmux -#options SC_DFLT_FONT # compile font in -#makeoptions SC_DFLT_FONT=cp437 diff --git a/sys/arm/conf/IMX6 b/sys/arm/conf/IMX6 index 6de7c26e549c..6b383e28ccfd 100644 --- a/sys/arm/conf/IMX6 +++ b/sys/arm/conf/IMX6 @@ -125,14 +125,6 @@ device u3g # USB modems #device wlan_tkip # 802.11 TKIP support #device wlan_amrr # AMRR transmit rate control algorithm -# NOTE: serial console will be disabled if syscons enabled -# Uncomment following lines for framebuffer/syscons support -# Wandboard has no video console support yet. -#device sc -#device kbdmux -#options SC_DFLT_FONT # compile font in -#makeoptions SC_DFLT_FONT=cp437 - device vt device kbdmux device ukbd From ac3490fdef8c0f0a6d5afebe3019b72d99f6b8dd Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Fri, 15 Jan 2016 12:09:15 +0000 Subject: [PATCH 87/88] Use the recently added "make_dev_s()" function to solve old race setting the si_drv1 field in "struct cdev" when creating new character devices. --- sys/dev/usb/usb_device.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sys/dev/usb/usb_device.c b/sys/dev/usb/usb_device.c index 8940171459a0..2976fec70be2 100644 --- a/sys/dev/usb/usb_device.c +++ b/sys/dev/usb/usb_device.c @@ -1962,6 +1962,7 @@ usb_make_dev(struct usb_device *udev, const char *devname, int ep, int fi, int rwmode, uid_t uid, gid_t gid, int mode) { struct usb_fs_privdata* pd; + struct make_dev_args args; char buffer[32]; /* Store information to locate ourselves again later */ @@ -1980,17 +1981,19 @@ usb_make_dev(struct usb_device *udev, const char *devname, int ep, pd->bus_index, pd->dev_index, pd->ep_addr); } - pd->cdev = make_dev(&usb_devsw, 0, uid, gid, mode, "%s", devname); + /* Setup arguments for make_dev_s() */ + make_dev_args_init(&args); + args.mda_devsw = &usb_devsw; + args.mda_uid = uid; + args.mda_gid = gid; + args.mda_mode = mode; + args.mda_si_drv1 = pd; - if (pd->cdev == NULL) { + if (make_dev_s(&args, &pd->cdev, "%s", devname) != 0) { DPRINTFN(0, "Failed to create device %s\n", devname); free(pd, M_USBDEV); return (NULL); } - - /* XXX setting si_drv1 and creating the device is not atomic! */ - pd->cdev->si_drv1 = pd; - return (pd); } From b7d076ed19f75e61a8552542b7f6f374ac0f1ba8 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Fri, 15 Jan 2016 13:47:11 +0000 Subject: [PATCH 88/88] Clean up original route path selection logic a bit. NULL pointer dereference claimed by Coverity was possible if one (or several) next-hops for had their weights set to 0. CID: 1348482 --- sys/net/radix_mpath.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index 56574002a972..5f40745f4904 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -201,19 +201,20 @@ static struct rtentry * rt_mpath_selectrte(struct rtentry *rte, uint32_t hash) { struct radix_node *rn0, *rn; - u_int32_t n; + uint32_t total_weight; struct rtentry *rt; int64_t weight; /* beyond here, we use rn as the master copy */ rn0 = rn = (struct radix_node *)rte; - n = rn_mpath_count(rn0); + rt = rte; /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ + total_weight = rn_mpath_count(rn0); hash += hashjitter; - hash %= n; - for (weight = abs((int32_t)hash), rt = rte; - weight >= rt->rt_weight && rn; + hash %= total_weight; + for (weight = abs((int32_t)hash); + rt != NULL && weight >= rt->rt_weight; weight -= rt->rt_weight) { /* stay within the multipath routes */