From 24b98f288d11750f2cdfbfe360be1c92a9c2ee1d Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Sun, 23 May 2021 14:58:29 -0700 Subject: [PATCH] cxgbe(4): Overhaul CLIP (Compressed Local IPv6) table management. - Process the list of local IPs once instead of once per adapter. Add addresses from all VNETs to the driver's list but leave hardware updates for later when the global VNET/IFADDR list locks have been released. - Add address to the hardware table synchronously when a CLIP entry is requested for an address that's not already in there. - Provide ioctls that allow userspace tools to manage addresses in the CLIP table. - Add a knob (hw.cxgbe.clip_db_auto) that controls whether local IPs are automatically added to the CLIP table or not. MFC after: 2 weeks Sponsored by: Chelsio Communications --- sys/dev/cxgbe/adapter.h | 16 +- sys/dev/cxgbe/crypto/t4_kern_tls.c | 4 +- sys/dev/cxgbe/t4_clip.c | 894 ++++++++++++++++++++++------- sys/dev/cxgbe/t4_clip.h | 15 +- sys/dev/cxgbe/t4_ioctl.h | 10 + sys/dev/cxgbe/t4_main.c | 37 ++ sys/dev/cxgbe/tom/t4_connect.c | 4 +- sys/dev/cxgbe/tom/t4_listen.c | 18 +- sys/dev/cxgbe/tom/t4_tom.c | 2 +- 9 files changed, 761 insertions(+), 239 deletions(-) diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index b3b214ce3c96..27655ec2fe59 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -68,6 +69,15 @@ MALLOC_DECLARE(M_CXGBE); #define CXGBE_UNIMPLEMENTED(s) \ panic("%s (%s, line %d) not implemented yet.", s, __FILE__, __LINE__) +/* + * Same as LIST_HEAD from queue.h. This is to avoid conflict with LinuxKPI's + * LIST_HEAD when building iw_cxgbe. + */ +#define CXGBE_LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + #ifndef SYSCTL_ADD_UQUAD #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD #define sysctl_handle_64 sysctl_handle_quad @@ -886,9 +896,11 @@ struct adapter { struct port_info *port[MAX_NPORTS]; uint8_t chan_map[MAX_NCHAN]; /* channel -> port */ - struct mtx clip_table_lock; - TAILQ_HEAD(, clip_entry) clip_table; + CXGBE_LIST_HEAD(, clip_entry) *clip_table; + TAILQ_HEAD(, clip_entry) clip_pending; /* these need hw update. */ + u_long clip_mask; int clip_gen; + struct timeout_task clip_task; void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; diff --git a/sys/dev/cxgbe/crypto/t4_kern_tls.c b/sys/dev/cxgbe/crypto/t4_kern_tls.c index 957d0202fa3f..99d0d33cf128 100644 --- a/sys/dev/cxgbe/crypto/t4_kern_tls.c +++ b/sys/dev/cxgbe/crypto/t4_kern_tls.c @@ -379,7 +379,7 @@ send_ktls_act_open_req(struct adapter *sc, struct vi_info *vi, isipv6 = (inp->inp_vflag & INP_IPV6) != 0; if (isipv6) { - tlsp->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL); + tlsp->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true); if (tlsp->ce == NULL) return (ENOENT); } @@ -2333,7 +2333,7 @@ cxgbe_tls_tag_free(struct m_snd_tag *mst) if (tlsp->tid >= 0) release_tid(sc, tlsp->tid, tlsp->ctrlq); if (tlsp->ce) - t4_release_lip(sc, tlsp->ce); + t4_release_clip_entry(sc, tlsp->ce); if (tlsp->tx_key_addr >= 0) free_keyid(tlsp, tlsp->tx_key_addr); diff --git a/sys/dev/cxgbe/t4_clip.c b/sys/dev/cxgbe/t4_clip.c index ad26d212315e..18d78a9e830b 100644 --- a/sys/dev/cxgbe/t4_clip.c +++ b/sys/dev/cxgbe/t4_clip.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * - * Copyright (c) 2012 Chelsio Communications, Inc. + * Copyright (c) 2012-2021 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * @@ -50,81 +50,233 @@ __FBSDID("$FreeBSD$"); #include "common/common.h" #include "t4_clip.h" +/* + * Code to deal with the Compressed Local IPv6 (CLIP) table in the ASIC. + * + * The driver maintains a global CLIP database (clip_db) of IPv6 addresses and a + * per-adapter CLIP table (sc->clip_table) with entries that point to an IPv6 in + * the clip_db. All access is protected by a single global lock (clip_db_lock). + * The correct lock order is clip lock before synchronized op. + * + * By default (hw.cxgbe.clip_db_auto=1) all local IPv6 addresses are added to + * the db. Addresses are also added on-demand when the driver allocates an + * entry for a filter, TOE tid, etc. krn_ref counts the number of times an + * address appears in the system. adp_ref counts the number of adapters that + * have that address in their CLIP table. If both are 0 then the entry is + * evicted from the db. Consumers of the CLIP table entry (filters, TOE tids) + * are tracked in ce->refcount. Driver ioctls let external consumers add/remove + * addresses from the CLIP table. + */ + #if defined(INET6) -static int add_lip(struct adapter *, struct in6_addr *); -static int delete_lip(struct adapter *, struct in6_addr *); -static struct clip_entry *search_lip(struct adapter *, struct in6_addr *); -static void update_clip(struct adapter *, void *); -static void t4_clip_task(void *, int); -static void update_clip_table(struct adapter *); +struct clip_db_entry { + LIST_ENTRY(clip_db_entry) link; /* clip_db hash linkage */ + struct in6_addr lip; + u_int krn_ref; /* # of times this IP6 appears in list of all IP6 */ + u_int adp_ref; /* # of adapters with this IP6 in their CLIP */ + u_int tmp_ref; /* Used only during refresh */ +}; + +struct clip_entry { + LIST_ENTRY(clip_entry) link; /* clip_table hash linkage */ + TAILQ_ENTRY(clip_entry) plink; /* clip_pending linkage */ + struct clip_db_entry *cde; + int16_t clip_idx; /* index in the hw table */ + bool pending; /* in clip_pending list */ + int refcount; +}; -static int in6_ifaddr_gen; static eventhandler_tag ifaddr_evhandler; -static struct timeout_task clip_task; +static struct mtx clip_db_lock; +static LIST_HEAD(, clip_db_entry) *clip_db; +static u_long clip_db_mask; +static int clip_db_gen; +static struct task clip_db_task; -static int -add_lip(struct adapter *sc, struct in6_addr *lip) +static int add_lip(struct adapter *, struct in6_addr *, int16_t *); +static int del_lip(struct adapter *, struct in6_addr *); +static void t4_clip_db_task(void *, int); +static void t4_clip_task(void *, int); +static void update_clip_db(void); +static int update_sw_clip_table(struct adapter *); +static int update_hw_clip_table(struct adapter *); +static void update_clip_table(struct adapter *, void *); +static int sysctl_clip_db(SYSCTL_HANDLER_ARGS); +static int sysctl_clip_db_auto(SYSCTL_HANDLER_ARGS); +static struct clip_db_entry *lookup_clip_db_entry(struct in6_addr *, bool); +static struct clip_entry *lookup_clip_entry(struct adapter *, struct in6_addr *, + bool); + +SYSCTL_PROC(_hw_cxgbe, OID_AUTO, clip_db, CTLTYPE_STRING | CTLFLAG_RD | + CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, 0, sysctl_clip_db, "A", + "CLIP database"); + +int t4_clip_db_auto = 1; +SYSCTL_PROC(_hw_cxgbe, OID_AUTO, clip_db_auto, CTLTYPE_INT | CTLFLAG_RWTUN | + CTLFLAG_MPSAFE, NULL, 0, sysctl_clip_db_auto, "I", + "Add local IPs to CLIP db automatically (0 = no, 1 = yes)"); + +static inline uint32_t +clip_hashfn(struct in6_addr *addr) { - struct fw_clip_cmd c; + return (fnv_32_buf(addr, sizeof(*addr), FNV1_32_INIT) & clip_db_mask); +} - ASSERT_SYNCHRONIZED_OP(sc); - mtx_assert(&sc->clip_table_lock, MA_OWNED); +static inline struct clip_db_entry * +alloc_clip_db_entry(struct in6_addr *in6) +{ + struct clip_db_entry *cde; - memset(&c, 0, sizeof(c)); - c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | - F_FW_CMD_WRITE); - c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); - c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; - c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; + cde = malloc(sizeof(*cde), M_CXGBE, M_NOWAIT | M_ZERO); + if (__predict_true(cde != NULL)) + memcpy(&cde->lip, in6, sizeof(cde->lip)); - return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); + return (cde); +} + +static inline struct clip_entry * +alloc_clip_entry(struct clip_db_entry *cde) +{ + struct clip_entry *ce; + + mtx_assert(&clip_db_lock, MA_OWNED); + + ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT | M_ZERO); + if (__predict_true(ce != NULL)) { + ce->cde = cde; + cde->adp_ref++; + ce->clip_idx = -1; + } + + return (ce); +} + +/* + * Look up the IP6 address in the CLIP db. If add is set then an entry for the + * IP6 will be added to the db. + */ +static struct clip_db_entry * +lookup_clip_db_entry(struct in6_addr *in6, bool add) +{ + struct clip_db_entry *cde; + const int bucket = clip_hashfn(in6); + + mtx_assert(&clip_db_lock, MA_OWNED); + + LIST_FOREACH(cde, &clip_db[bucket], link) { + if (IN6_ARE_ADDR_EQUAL(&cde->lip, in6)) + return (cde); + } + + /* Not found. Create a new entry if requested. */ + if (add) { + cde = alloc_clip_db_entry(in6); + if (cde != NULL) + LIST_INSERT_HEAD(&clip_db[bucket], cde, link); + } + + return (cde); +} + +/* + * Look up the IP6 address in the CLIP db. If add is set then an entry for the + * IP6 will be added to the db. + */ +static struct clip_entry * +lookup_clip_entry(struct adapter *sc, struct in6_addr *in6, bool add) +{ + struct clip_db_entry *cde; + struct clip_entry *ce; + const int bucket = clip_hashfn(in6); + + mtx_assert(&clip_db_lock, MA_OWNED); + + cde = lookup_clip_db_entry(in6, add); + if (cde == NULL) + return (NULL); + + LIST_FOREACH(ce, &sc->clip_table[bucket], link) { + if (ce->cde == cde) + return (ce); + } + + /* Not found. Create a new entry if requested. */ + if (add) { + ce = alloc_clip_entry(cde); + if (ce != NULL) { + LIST_INSERT_HEAD(&sc->clip_table[bucket], ce, link); + TAILQ_INSERT_TAIL(&sc->clip_pending, ce, plink); + ce->pending = true; + } + } + + return (ce); } static int -delete_lip(struct adapter *sc, struct in6_addr *lip) +add_lip(struct adapter *sc, struct in6_addr *lip, int16_t *idx) +{ + struct fw_clip_cmd c; + int rc; + + ASSERT_SYNCHRONIZED_OP(sc); + + memset(&c, 0, sizeof(c)); + c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | + F_FW_CMD_WRITE); + c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); + c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; + c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; + + rc = -t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c); + if (rc == 0 && idx != NULL) + *idx = G_FW_CLIP_CMD_INDEX(ntohl(c.alloc_to_len16)); + return (rc); +} + +static int +del_lip(struct adapter *sc, struct in6_addr *lip) { struct fw_clip_cmd c; ASSERT_SYNCHRONIZED_OP(sc); - mtx_assert(&sc->clip_table_lock, MA_OWNED); memset(&c, 0, sizeof(c)); c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); - c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); - c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; - c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; + c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); + c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; + c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); } - -static struct clip_entry * -search_lip(struct adapter *sc, struct in6_addr *lip) -{ - struct clip_entry *ce; - - mtx_assert(&sc->clip_table_lock, MA_OWNED); - - TAILQ_FOREACH(ce, &sc->clip_table, link) { - if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) - return (ce); - } - - return (NULL); -} #endif struct clip_entry * -t4_hold_lip(struct adapter *sc, struct in6_addr *lip, struct clip_entry *ce) +t4_get_clip_entry(struct adapter *sc, struct in6_addr *in6, bool add) { - #ifdef INET6 - mtx_lock(&sc->clip_table_lock); - if (ce == NULL) - ce = search_lip(sc, lip); - if (ce != NULL) - ce->refcount++; - mtx_unlock(&sc->clip_table_lock); + struct clip_entry *ce; + bool schedule = false; + + mtx_lock(&clip_db_lock); + ce = lookup_clip_entry(sc, in6, add); + if (ce != NULL) { + MPASS(ce->cde->adp_ref > 0); + if (++ce->refcount == 1 && ce->pending && ce->clip_idx != -1) { + /* + * Valid entry that was waiting to be deleted. It is in + * use now so take it off the pending list. + */ + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + ce->pending = false; + } + if (ce->clip_idx == -1 && update_hw_clip_table(sc) != 0) + schedule = true; + } + mtx_unlock(&clip_db_lock); + if (schedule) + taskqueue_enqueue_timeout(taskqueue_thread, &sc->clip_task, 0); return (ce); #else @@ -133,29 +285,110 @@ t4_hold_lip(struct adapter *sc, struct in6_addr *lip, struct clip_entry *ce) } void -t4_release_lip(struct adapter *sc, struct clip_entry *ce) +t4_hold_clip_entry(struct adapter *sc, struct clip_entry *ce) { +#ifdef INET6 + MPASS(ce != NULL); + MPASS(ce->cde->adp_ref > 0); + + mtx_lock(&clip_db_lock); + MPASS(ce->refcount > 0); /* Caller should already have a reference */ + ce->refcount++; + mtx_unlock(&clip_db_lock); +#endif +} #ifdef INET6 - mtx_lock(&sc->clip_table_lock); - KASSERT(search_lip(sc, &ce->lip) == ce, - ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); - KASSERT(ce->refcount > 0, - ("%s: CLIP entry %p has refcount 0", __func__, ce)); - --ce->refcount; - mtx_unlock(&sc->clip_table_lock); +static void +release_clip_entry_locked(struct adapter *sc, struct clip_entry *ce) +{ + struct clip_db_entry *cde; + + mtx_assert(&clip_db_lock, MA_OWNED); + MPASS(ce->refcount > 0); + cde = ce->cde; + MPASS(cde->adp_ref > 0); + if (--ce->refcount == 0 && cde->krn_ref == 0) { + if (ce->clip_idx == -1) { + /* Was never written to the hardware. */ + MPASS(ce->pending); + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + LIST_REMOVE(ce, link); + free(ce, M_CXGBE); + if (--cde->adp_ref == 0) { + LIST_REMOVE(cde, link); + free(cde, M_CXGBE); + } + } else { + /* + * Valid entry is now unused, add to the pending list + * for deletion. Its refcount was 1 on entry so it + * can't already be pending. + */ + MPASS(!ce->pending); + TAILQ_INSERT_HEAD(&sc->clip_pending, ce, plink); + ce->pending = true; + } + } +} #endif + +void +t4_release_clip_entry(struct adapter *sc, struct clip_entry *ce) +{ +#ifdef INET6 + MPASS(ce != NULL); + + mtx_lock(&clip_db_lock); + release_clip_entry_locked(sc, ce); + /* + * This isn't a manual release via the ioctl. No need to update the + * hw right now even if the release resulted in the entry being queued + * for deletion. + */ + mtx_unlock(&clip_db_lock); +#endif +} + +int +t4_release_clip_addr(struct adapter *sc, struct in6_addr *in6) +{ + int rc = ENOTSUP; +#ifdef INET6 + struct clip_entry *ce; + bool schedule = false; + + mtx_lock(&clip_db_lock); + ce = lookup_clip_entry(sc, in6, false); + if (ce == NULL) + rc = ENOENT; + else if (ce->refcount == 0) + rc = EIO; + else { + release_clip_entry_locked(sc, ce); + if (update_hw_clip_table(sc) != 0) + schedule = true; + rc = 0; + } + mtx_unlock(&clip_db_lock); + if (schedule) + taskqueue_enqueue_timeout(taskqueue_thread, &sc->clip_task, 0); +#endif + return (rc); } #ifdef INET6 void t4_init_clip_table(struct adapter *sc) { - - mtx_init(&sc->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); - TAILQ_INIT(&sc->clip_table); + TAILQ_INIT(&sc->clip_pending); + TIMEOUT_TASK_INIT(taskqueue_thread, &sc->clip_task, 0, t4_clip_task, sc); sc->clip_gen = -1; + sc->clip_table = hashinit(CLIP_HASH_SIZE, M_CXGBE, &sc->clip_mask); + /* Both the hashes must use the same bucket for the same key. */ + if (sc->clip_table != NULL) + MPASS(sc->clip_mask == clip_db_mask); /* * Don't bother forcing an update of the clip table when the * adapter is initialized. Before an interface can be used it @@ -164,194 +397,344 @@ t4_init_clip_table(struct adapter *sc) */ } +/* + * Returns true if any additions or deletions were made to the CLIP DB. + */ static void -update_clip(struct adapter *sc, void *arg __unused) -{ - - if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4clip")) - return; - - if (mtx_initialized(&sc->clip_table_lock) && !hw_off_limits(sc)) - update_clip_table(sc); - - end_synchronized_op(sc, LOCK_HELD); -} - -static void -t4_clip_task(void *arg, int count) -{ - - t4_iterate(update_clip, NULL); -} - -static void -update_clip_table(struct adapter *sc) +update_clip_db(void) { + VNET_ITERATOR_DECL(vnet_iter); struct rm_priotracker in6_ifa_tracker; + struct in6_addr *in6, tin6; struct in6_ifaddr *ia; - struct in6_addr *lip, tlip; - TAILQ_HEAD(, clip_entry) stale; - struct clip_entry *ce, *ce_temp; - struct vi_info *vi; - int rc, gen, i, j; - uintptr_t last_vnet; - - ASSERT_SYNCHRONIZED_OP(sc); + struct clip_db_entry *cde, *cde_tmp; + int i, addel; + VNET_LIST_RLOCK(); IN6_IFADDR_RLOCK(&in6_ifa_tracker); - mtx_lock(&sc->clip_table_lock); - - gen = atomic_load_acq_int(&in6_ifaddr_gen); - if (gen == sc->clip_gen) - goto done; - - TAILQ_INIT(&stale); - TAILQ_CONCAT(&stale, &sc->clip_table, link); - - /* - * last_vnet optimizes the common cases where all if_vnet = NULL (no - * VIMAGE) or all if_vnet = vnet0. - */ - last_vnet = (uintptr_t)(-1); - for_each_port(sc, i) - for_each_vi(sc->port[i], j, vi) { - if (IS_DOOMED(vi)) - continue; - - if (last_vnet == (uintptr_t)vi->ifp->if_vnet) - continue; - - /* XXX: races with if_vmove */ - CURVNET_SET(vi->ifp->if_vnet); + mtx_lock(&clip_db_lock); + VNET_FOREACH(vnet_iter) { + CURVNET_SET_QUIET(vnet_iter); CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { - lip = &ia->ia_addr.sin6_addr; - - KASSERT(!IN6_IS_ADDR_MULTICAST(lip), - ("%s: mcast address in in6_ifaddr list", __func__)); - - if (IN6_IS_ADDR_LOOPBACK(lip)) + if (ia->ia_ifp->if_flags & IFF_LOOPBACK) + continue; + in6 = &ia->ia_addr.sin6_addr; + KASSERT(!IN6_IS_ADDR_MULTICAST(in6), + ("%s: mcast address in in6_ifaddr list", __func__)); + if (IN6_IS_ADDR_LOOPBACK(in6)) continue; - if (IN6_IS_SCOPE_EMBED(lip)) { - /* Remove the embedded scope */ - tlip = *lip; - lip = &tlip; - in6_clearscope(lip); - } - /* - * XXX: how to weed out the link local address for the - * loopback interface? It's fe80::1 usually (always?). - */ - /* - * If it's in the main list then we already know it's - * not stale. - */ - TAILQ_FOREACH(ce, &sc->clip_table, link) { - if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) - goto next; + if (IN6_IS_SCOPE_EMBED(in6)) { + tin6 = *in6; + in6 = &tin6; + in6_clearscope(in6); } - - /* - * If it's in the stale list we should move it to the - * main list. - */ - TAILQ_FOREACH(ce, &stale, link) { - if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) { - TAILQ_REMOVE(&stale, ce, link); - TAILQ_INSERT_TAIL(&sc->clip_table, ce, - link); - goto next; - } - } - - /* A new IP6 address; add it to the CLIP table */ - ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); - memcpy(&ce->lip, lip, sizeof(ce->lip)); - ce->refcount = 0; - rc = add_lip(sc, lip); - if (rc == 0) - TAILQ_INSERT_TAIL(&sc->clip_table, ce, link); - else { - char ip[INET6_ADDRSTRLEN]; - - inet_ntop(AF_INET6, &ce->lip, &ip[0], - sizeof(ip)); - if (sc->flags & KERN_TLS_ON || - sc->active_ulds != 0) { - log(LOG_ERR, - "%s: could not add %s (%d)\n", - __func__, ip, rc); - } - free(ce, M_CXGBE); - } -next: - continue; + cde = lookup_clip_db_entry(in6, true); + if (cde == NULL) + continue; + cde->tmp_ref++; } CURVNET_RESTORE(); - last_vnet = (uintptr_t)vi->ifp->if_vnet; } - /* - * Remove stale addresses (those no longer in V_in6_ifaddrhead) that are - * no longer referenced by the driver. - */ - TAILQ_FOREACH_SAFE(ce, &stale, link, ce_temp) { - if (ce->refcount == 0) { - rc = delete_lip(sc, &ce->lip); - if (rc == 0) { - TAILQ_REMOVE(&stale, ce, link); - free(ce, M_CXGBE); - } else { - char ip[INET6_ADDRSTRLEN]; + addel = 0; + for (i = 0; i <= clip_db_mask; i++) { + LIST_FOREACH_SAFE(cde, &clip_db[i], link, cde_tmp) { + if (cde->krn_ref == 0 && cde->tmp_ref > 0) { + addel++; /* IP6 addr added. */ + } else if (cde->krn_ref > 0 && cde->tmp_ref == 0) { + if (cde->adp_ref == 0) { + LIST_REMOVE(cde, link); + free(cde, M_CXGBE); + continue; + } + addel++; /* IP6 addr deleted. */ + } + cde->krn_ref = cde->tmp_ref; + cde->tmp_ref = 0; + } + } + if (addel > 0) + clip_db_gen++; + mtx_unlock(&clip_db_lock); + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); + VNET_LIST_RUNLOCK(); - inet_ntop(AF_INET6, &ce->lip, &ip[0], - sizeof(ip)); - log(LOG_ERR, "%s: could not delete %s (%d)\n", - __func__, ip, rc); +} + +/* + * Update the CLIP db and then update the CLIP tables on all the adapters. + */ +static void +t4_clip_db_task(void *arg, int count) +{ + update_clip_db(); + t4_iterate(update_clip_table, NULL); +} + +/* + * Refresh the sw CLIP table for this adapter from the global CLIP db. Entries + * that need to be added or deleted from the hardware CLIP table are placed on a + * pending list but the hardware is not touched. The pending list is something + * reasonable even if this fails so it's ok to apply that to the hardware. + */ +static int +update_sw_clip_table(struct adapter *sc) +{ + struct clip_db_entry *cde; + struct clip_entry *ce, *ce_temp; + int i; + bool found; + + mtx_assert(&clip_db_lock, MA_OWNED); + + /* + * We are about to rebuild the pending list from scratch. Deletions are + * placed before additions because that's how we want to submit them to + * the hardware. + */ + TAILQ_INIT(&sc->clip_pending); + + /* + * Walk the sw CLIP table first. We want to reset every entry's pending + * status as we're rebuilding the pending list. + */ + for (i = 0; i <= clip_db_mask; i++) { + LIST_FOREACH_SAFE(ce, &sc->clip_table[i], link, ce_temp) { + cde = ce->cde; + MPASS(cde->adp_ref > 0); + if (ce->refcount != 0 || cde->krn_ref != 0) { + /* + * Entry should stay in the CLIP. + */ + + if (ce->clip_idx != -1) { + ce->pending = false; + } else { + /* Was never added, carry forward. */ + MPASS(ce->pending); + TAILQ_INSERT_TAIL(&sc->clip_pending, ce, + plink); + } + continue; + } + + /* + * Entry should be removed from the CLIP. + */ + + if (ce->clip_idx != -1) { + ce->pending = true; + TAILQ_INSERT_HEAD(&sc->clip_pending, ce, plink); + } else { + /* Was never added, free right now. */ + MPASS(ce->pending); + LIST_REMOVE(ce, link); + free(ce, M_CXGBE); + if (--cde->adp_ref == 0) { + LIST_REMOVE(cde, link); + free(cde, M_CXGBE); + } } } } - /* The ones that are still referenced need to stay in the CLIP table */ - TAILQ_CONCAT(&sc->clip_table, &stale, link); - sc->clip_gen = gen; + for (i = 0; i <= clip_db_mask; i++) { + LIST_FOREACH(cde, &clip_db[i], link) { + if (cde->krn_ref == 0) + continue; + + found = false; + LIST_FOREACH(ce, &sc->clip_table[i], link) { + if (ce->cde == cde) { + found = true; + break; + } + } + if (found) + continue; + ce = alloc_clip_entry(cde); + if (ce == NULL) + return (ENOMEM); + LIST_INSERT_HEAD(&sc->clip_table[i], ce, link); + TAILQ_INSERT_TAIL(&sc->clip_pending, ce, plink); + ce->pending = true; + } + } + + sc->clip_gen = clip_db_gen; + return (0); +} + +static int +update_hw_clip_table(struct adapter *sc) +{ + struct clip_db_entry *cde; + struct clip_entry *ce; + int rc; + char ip[INET6_ADDRSTRLEN]; + + mtx_assert(&clip_db_lock, MA_OWNED); + rc = begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4clip"); + if (rc != 0) + return (rc); + if (hw_off_limits(sc)) + goto done; /* with rc = 0, we don't want to reschedule. */ + while (!TAILQ_EMPTY(&sc->clip_pending)) { + ce = TAILQ_FIRST(&sc->clip_pending); + MPASS(ce->pending); + cde = ce->cde; + MPASS(cde->adp_ref > 0); + + if (ce->clip_idx == -1) { + /* + * Entry was queued for addition to the HW CLIP. + */ + + if (ce->refcount == 0 && cde->krn_ref == 0) { + /* No need to add to HW CLIP. */ + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + LIST_REMOVE(ce, link); + free(ce, M_CXGBE); + if (--cde->adp_ref == 0) { + LIST_REMOVE(cde, link); + free(cde, M_CXGBE); + } + } else { + /* Add to the HW CLIP. */ + rc = add_lip(sc, &cde->lip, &ce->clip_idx); + if (rc == FW_ENOMEM) { + /* CLIP full, no point in retrying. */ + rc = 0; + goto done; + } + if (rc != 0) { + inet_ntop(AF_INET6, &cde->lip, &ip[0], + sizeof(ip)); + CH_ERR(sc, "add_lip(%s) failed: %d\n", + ip, rc); + goto done; + } + MPASS(ce->clip_idx != -1); + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + ce->pending = false; + } + } else { + /* + * Entry was queued for deletion from the HW CLIP. + */ + + if (ce->refcount == 0 && cde->krn_ref == 0) { + /* + * Delete from the HW CLIP. Delete should never + * fail so we always log an error. But if the + * failure is that the entry wasn't found in the + * CLIP then we carry on as if it was deleted. + */ + rc = del_lip(sc, &cde->lip); + if (rc != 0) + CH_ERR(sc, "del_lip(%s) failed: %d\n", + ip, rc); + if (rc == FW_EPROTO) + rc = 0; + if (rc != 0) + goto done; + + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + LIST_REMOVE(ce, link); + free(ce, M_CXGBE); + if (--cde->adp_ref == 0) { + LIST_REMOVE(cde, link); + free(cde, M_CXGBE); + } + } else { + /* No need to delete from HW CLIP. */ + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + ce->pending = false; + } + } + } done: - mtx_unlock(&sc->clip_table_lock); - IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); + end_synchronized_op(sc, LOCK_HELD); + return (rc); +} + +static void +update_clip_table(struct adapter *sc, void *arg __unused) +{ + bool reschedule; + + if (sc->clip_table == NULL) + return; + + reschedule = false; + mtx_lock(&clip_db_lock); + if (sc->clip_gen != clip_db_gen && update_sw_clip_table(sc) != 0) + reschedule = true; + if (!TAILQ_EMPTY(&sc->clip_pending) && update_hw_clip_table(sc) != 0) + reschedule = true; + mtx_unlock(&clip_db_lock); + if (reschedule) + taskqueue_enqueue_timeout(taskqueue_thread, &sc->clip_task, + -hz / 4); +} + +/* + * Update the CLIP table of the specified adapter. + */ +static void +t4_clip_task(void *sc, int count) +{ + update_clip_table(sc, NULL); } void t4_destroy_clip_table(struct adapter *sc) { struct clip_entry *ce, *ce_temp; + int i; - if (mtx_initialized(&sc->clip_table_lock)) { - mtx_lock(&sc->clip_table_lock); - TAILQ_FOREACH_SAFE(ce, &sc->clip_table, link, ce_temp) { - KASSERT(ce->refcount == 0, - ("%s: CLIP entry %p still in use (%d)", __func__, - ce, ce->refcount)); - TAILQ_REMOVE(&sc->clip_table, ce, link); + mtx_lock(&clip_db_lock); + if (sc->clip_table == NULL) + goto done; /* CLIP was never initialized. */ + for (i = 0; i <= sc->clip_mask; i++) { + LIST_FOREACH_SAFE(ce, &sc->clip_table[i], link, ce_temp) { + MPASS(ce->refcount == 0); + MPASS(ce->cde->adp_ref > 0); #if 0 - delete_lip(sc, &ce->lip); + del_lip(sc, &ce->lip); #endif + LIST_REMOVE(ce, link); + if (--ce->cde->adp_ref == 0 && ce->cde->krn_ref == 0) { + LIST_REMOVE(ce->cde, link); + free(ce->cde, M_CXGBE); + } free(ce, M_CXGBE); } - mtx_unlock(&sc->clip_table_lock); - mtx_destroy(&sc->clip_table_lock); } + hashdestroy(&sc->clip_table, M_CXGBE, sc->clip_mask); + sc->clip_table = NULL; +done: + mtx_unlock(&clip_db_lock); } static void t4_ifaddr_event(void *arg __unused, struct ifnet *ifp, struct ifaddr *ifa, int event) { + struct in6_addr *in6; + if (t4_clip_db_auto == 0) + return; /* Automatic updates not allowed. */ if (ifa->ifa_addr->sa_family != AF_INET6) return; + if (ifp->if_flags & IFF_LOOPBACK) + return; + in6 = &((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr; + if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_MULTICAST(in6)) + return; - atomic_add_rel_int(&in6_ifaddr_gen, 1); - taskqueue_enqueue_timeout(taskqueue_thread, &clip_task, -hz / 4); + taskqueue_enqueue(taskqueue_thread, &clip_db_task); } int @@ -360,7 +743,7 @@ sysctl_clip(SYSCTL_HANDLER_ARGS) struct adapter *sc = arg1; struct clip_entry *ce; struct sbuf *sb; - int rc, header = 0; + int i, rc, header = 0; char ip[INET6_ADDRSTRLEN]; rc = sysctl_wire_old_buffer(req, 0); @@ -371,17 +754,25 @@ sysctl_clip(SYSCTL_HANDLER_ARGS) if (sb == NULL) return (ENOMEM); - mtx_lock(&sc->clip_table_lock); - TAILQ_FOREACH(ce, &sc->clip_table, link) { - if (header == 0) { - sbuf_printf(sb, "%-40s %-5s", "IP address", "Users"); - header = 1; + mtx_lock(&clip_db_lock); + for (i = 0; i <= sc->clip_mask; i++) { + LIST_FOREACH(ce, &sc->clip_table[i], link) { + if (header == 0) { + sbuf_printf(sb, "%-4s %-4s %s", "Indx", "Refs", + "IP address"); + header = 1; + } + inet_ntop(AF_INET6, &ce->cde->lip, &ip[0], sizeof(ip)); + if (ce->clip_idx == -1) { + sbuf_printf(sb, "\n%-4s %-4d %s", "-", + ce->refcount, ip); + } else { + sbuf_printf(sb, "\n%-4d %-4d %s", ce->clip_idx, + ce->refcount, ip); + } } - inet_ntop(AF_INET6, &ce->lip, &ip[0], sizeof(ip)); - - sbuf_printf(sb, "\n%-40s %5u", ip, ce->refcount); } - mtx_unlock(&sc->clip_table_lock); + mtx_unlock(&clip_db_lock); rc = sbuf_finish(sb); sbuf_delete(sb); @@ -389,11 +780,73 @@ sysctl_clip(SYSCTL_HANDLER_ARGS) return (rc); } +static int +sysctl_clip_db(SYSCTL_HANDLER_ARGS) +{ + struct clip_db_entry *cde; + struct sbuf *sb; + int i, rc, header = 0; + char ip[INET6_ADDRSTRLEN]; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); + if (sb == NULL) + return (ENOMEM); + + mtx_lock(&clip_db_lock); + for (i = 0; i <= clip_db_mask; i++) { + LIST_FOREACH(cde, &clip_db[i], link) { + MPASS(cde->tmp_ref == 0); + if (header == 0) { + sbuf_printf(sb, "%-4s %-4s %s", "Kref", "Aref", + "IP address"); + header = 1; + } + inet_ntop(AF_INET6, &cde->lip, &ip[0], sizeof(ip)); + sbuf_printf(sb, "\n%-4d %-4d %s", cde->krn_ref, + cde->adp_ref, ip); + } + } + mtx_unlock(&clip_db_lock); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_clip_db_auto(SYSCTL_HANDLER_ARGS) +{ + int rc, val; + + val = t4_clip_db_auto; + rc = sysctl_handle_int(oidp, &val, 0, req); + if (rc != 0 || req->newptr == NULL) + return (rc); + + if (val == 0 || val == 1) + t4_clip_db_auto = val; + else { + /* + * Writing a value other than 0 or 1 forces a one-time update of + * the clip_db directly in the sysctl and not in some taskqueue. + */ + t4_clip_db_task(NULL, 0); + } + + return (0); +} + void t4_clip_modload(void) { - - TIMEOUT_TASK_INIT(taskqueue_thread, &clip_task, 0, t4_clip_task, NULL); + mtx_init(&clip_db_lock, "clip_db", NULL, MTX_DEF); + clip_db = hashinit(CLIP_HASH_SIZE, M_CXGBE, &clip_db_mask); + TASK_INIT(&clip_db_task, 0, t4_clip_db_task, NULL); ifaddr_evhandler = EVENTHANDLER_REGISTER(ifaddr_event_ext, t4_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); } @@ -401,8 +854,9 @@ t4_clip_modload(void) void t4_clip_modunload(void) { - EVENTHANDLER_DEREGISTER(ifaddr_event_ext, ifaddr_evhandler); - taskqueue_cancel_timeout(taskqueue_thread, &clip_task, NULL); + taskqueue_drain(taskqueue_thread, &clip_db_task); + hashdestroy(&clip_db, M_CXGBE, clip_db_mask); + mtx_destroy(&clip_db_lock); } #endif diff --git a/sys/dev/cxgbe/t4_clip.h b/sys/dev/cxgbe/t4_clip.h index 8d9acdb86fa5..9dc3d39f3266 100644 --- a/sys/dev/cxgbe/t4_clip.h +++ b/sys/dev/cxgbe/t4_clip.h @@ -32,19 +32,18 @@ #ifndef __T4_CLIP_H #define __T4_CLIP_H -struct clip_entry { - TAILQ_ENTRY(clip_entry) link; - struct in6_addr lip; /* local IPv6 address */ - u_int refcount; -}; +#define CLIP_HASH_SIZE 32 +struct clip_entry; +struct in6_addr; void t4_clip_modload(void); void t4_clip_modunload(void); void t4_init_clip_table(struct adapter *); void t4_destroy_clip_table(struct adapter *); -struct clip_entry *t4_hold_lip(struct adapter *, struct in6_addr *, - struct clip_entry *); -void t4_release_lip(struct adapter *, struct clip_entry *); +struct clip_entry *t4_get_clip_entry(struct adapter *, struct in6_addr *, bool); +void t4_hold_clip_entry(struct adapter *, struct clip_entry *); +void t4_release_clip_entry(struct adapter *, struct clip_entry *); +int t4_release_clip_addr(struct adapter *, struct in6_addr *); int sysctl_clip(SYSCTL_HANDLER_ARGS); diff --git a/sys/dev/cxgbe/t4_ioctl.h b/sys/dev/cxgbe/t4_ioctl.h index ff2c5ef80a14..f3bb7d8b4aa4 100644 --- a/sys/dev/cxgbe/t4_ioctl.h +++ b/sys/dev/cxgbe/t4_ioctl.h @@ -64,6 +64,8 @@ enum { T4_LOAD_BOOTCFG, /* flash bootcfg */ T4_CUDBG_DUMP, /* debug dump of chip state */ T4_SET_FILTER_MASK, /* set filter mask (hashfilter mode) */ + T4_HOLD_CLIP_ADDR, /* add ref on an IP in the CLIP */ + T4_RELEASE_CLIP_ADDR, /* remove ref from an IP in the CLIP */ }; struct t4_reg { @@ -405,6 +407,12 @@ struct t4_offload_policy { struct offload_rule *rule; }; +/* Address/mask entry in the CLIP. FW_CLIP2_CMD is aware of the mask. */ +struct t4_clip_addr { + uint8_t addr[16]; + uint8_t mask[16]; +}; + #define CHELSIO_T4_GETREG _IOWR('f', T4_GETREG, struct t4_reg) #define CHELSIO_T4_SETREG _IOW('f', T4_SETREG, struct t4_reg) #define CHELSIO_T4_REGDUMP _IOWR('f', T4_REGDUMP, struct t4_regdump) @@ -431,4 +439,6 @@ struct t4_offload_policy { #define CHELSIO_T4_CUDBG_DUMP _IOWR('f', T4_CUDBG_DUMP, struct t4_cudbg_dump) #define CHELSIO_T4_SET_OFLD_POLICY _IOW('f', T4_SET_OFLD_POLICY, struct t4_offload_policy) #define CHELSIO_T4_SET_FILTER_MASK _IOW('f', T4_SET_FILTER_MASK, uint32_t) +#define CHELSIO_T4_HOLD_CLIP_ADDR _IOW('f', T4_HOLD_CLIP_ADDR, struct t4_clip_addr) +#define CHELSIO_T4_RELEASE_CLIP_ADDR _IOW('f', T4_RELEASE_CLIP_ADDR, struct t4_clip_addr) #endif diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 3cfab1ef04e2..51fc6504e5c2 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -838,6 +838,8 @@ static int set_offload_policy(struct adapter *, struct t4_offload_policy *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); static int clear_stats(struct adapter *, u_int); +static int hold_clip_addr(struct adapter *, struct t4_clip_addr *); +static int release_clip_addr(struct adapter *, struct t4_clip_addr *); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, bool); static void t4_async_event(void *, int); @@ -11910,6 +11912,35 @@ clear_stats(struct adapter *sc, u_int port_id) return (0); } +static int +hold_clip_addr(struct adapter *sc, struct t4_clip_addr *ca) +{ +#ifdef INET6 + struct in6_addr in6; + + bcopy(&ca->addr[0], &in6.s6_addr[0], sizeof(in6.s6_addr)); + if (t4_get_clip_entry(sc, &in6, true) != NULL) + return (0); + else + return (EIO); +#else + return (ENOTSUP); +#endif +} + +static int +release_clip_addr(struct adapter *sc, struct t4_clip_addr *ca) +{ +#ifdef INET6 + struct in6_addr in6; + + bcopy(&ca->addr[0], &in6.s6_addr[0], sizeof(in6.s6_addr)); + return (t4_release_clip_addr(sc, &in6)); +#else + return (ENOTSUP); +#endif +} + int t4_os_find_pci_capability(struct adapter *sc, int cap) { @@ -12181,6 +12212,12 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, case CHELSIO_T4_SET_OFLD_POLICY: rc = set_offload_policy(sc, (struct t4_offload_policy *)data); break; + case CHELSIO_T4_HOLD_CLIP_ADDR: + rc = hold_clip_addr(sc, (struct t4_clip_addr *)data); + break; + case CHELSIO_T4_RELEASE_CLIP_ADDR: + rc = release_clip_addr(sc, (struct t4_clip_addr *)data); + break; default: rc = ENOTTY; } diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c index c71b9694bd3b..f4aa84d6514f 100644 --- a/sys/dev/cxgbe/tom/t4_connect.c +++ b/sys/dev/cxgbe/tom/t4_connect.c @@ -300,7 +300,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh, if ((inp->inp_vflag & INP_IPV6) == 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); - toep->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL); + toep->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true); if (toep->ce == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOENT); @@ -394,7 +394,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh, if (toep->l2te) t4_l2t_release(toep->l2te); if (toep->ce) - t4_release_lip(sc, toep->ce); + t4_release_clip_entry(sc, toep->ce); free_toepcb(toep); } diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c index 0245acfe005b..8623079fe429 100644 --- a/sys/dev/cxgbe/tom/t4_listen.c +++ b/sys/dev/cxgbe/tom/t4_listen.c @@ -211,7 +211,7 @@ alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi) if (inp->inp_vflag & INP_IPV6 && !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) { - lctx->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL); + lctx->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true); if (lctx->ce == NULL) { free(lctx, M_CXGBE); return (NULL); @@ -244,7 +244,7 @@ free_lctx(struct adapter *sc, struct listen_ctx *lctx) __func__, lctx->stid, lctx, lctx->inp); if (lctx->ce) - t4_release_lip(sc, lctx->ce); + t4_release_clip_entry(sc, lctx->ce); free_stid(sc, lctx); free(lctx, M_CXGBE); @@ -1522,8 +1522,18 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, /* Come up with something that syncache_expand should be ok with. */ synqe_to_protohdrs(sc, synqe, cpl, &inc, &th, &to); - if (inc.inc_flags & INC_ISIPV6) - toep->ce = t4_hold_lip(sc, &inc.inc6_laddr, lctx->ce); + if (inc.inc_flags & INC_ISIPV6) { + if (lctx->ce == NULL) { + toep->ce = t4_get_clip_entry(sc, &inc.inc6_laddr, true); + if (toep->ce == NULL) { + free_toepcb(toep); + goto reset; /* RST without a CLIP entry? */ + } + } else { + t4_hold_clip_entry(sc, lctx->ce); + toep->ce = lctx->ce; + } + } so = inp->inp_socket; KASSERT(so != NULL, ("%s: socket is NULL", __func__)); diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index 173357404ebe..97693ab74000 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -348,7 +348,7 @@ release_offload_resources(struct toepcb *toep) } if (toep->ce) - t4_release_lip(sc, toep->ce); + t4_release_clip_entry(sc, toep->ce); if (toep->params.tc_idx != -1) t4_release_cl_rl(sc, toep->vi->pi->port_id, toep->params.tc_idx);