diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index b3b214ce3c96..27655ec2fe59 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -68,6 +69,15 @@ MALLOC_DECLARE(M_CXGBE); #define CXGBE_UNIMPLEMENTED(s) \ panic("%s (%s, line %d) not implemented yet.", s, __FILE__, __LINE__) +/* + * Same as LIST_HEAD from queue.h. This is to avoid conflict with LinuxKPI's + * LIST_HEAD when building iw_cxgbe. + */ +#define CXGBE_LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + #ifndef SYSCTL_ADD_UQUAD #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD #define sysctl_handle_64 sysctl_handle_quad @@ -886,9 +896,11 @@ struct adapter { struct port_info *port[MAX_NPORTS]; uint8_t chan_map[MAX_NCHAN]; /* channel -> port */ - struct mtx clip_table_lock; - TAILQ_HEAD(, clip_entry) clip_table; + CXGBE_LIST_HEAD(, clip_entry) *clip_table; + TAILQ_HEAD(, clip_entry) clip_pending; /* these need hw update. */ + u_long clip_mask; int clip_gen; + struct timeout_task clip_task; void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; diff --git a/sys/dev/cxgbe/crypto/t4_kern_tls.c b/sys/dev/cxgbe/crypto/t4_kern_tls.c index 957d0202fa3f..99d0d33cf128 100644 --- a/sys/dev/cxgbe/crypto/t4_kern_tls.c +++ b/sys/dev/cxgbe/crypto/t4_kern_tls.c @@ -379,7 +379,7 @@ send_ktls_act_open_req(struct adapter *sc, struct vi_info *vi, isipv6 = (inp->inp_vflag & INP_IPV6) != 0; if (isipv6) { - tlsp->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL); + tlsp->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true); if (tlsp->ce == NULL) return (ENOENT); } @@ -2333,7 +2333,7 @@ cxgbe_tls_tag_free(struct m_snd_tag *mst) if (tlsp->tid >= 0) release_tid(sc, tlsp->tid, tlsp->ctrlq); if (tlsp->ce) - t4_release_lip(sc, tlsp->ce); + t4_release_clip_entry(sc, tlsp->ce); if (tlsp->tx_key_addr >= 0) free_keyid(tlsp, tlsp->tx_key_addr); diff --git a/sys/dev/cxgbe/t4_clip.c b/sys/dev/cxgbe/t4_clip.c index ad26d212315e..18d78a9e830b 100644 --- a/sys/dev/cxgbe/t4_clip.c +++ b/sys/dev/cxgbe/t4_clip.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * - * Copyright (c) 2012 Chelsio Communications, Inc. + * Copyright (c) 2012-2021 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * @@ -50,81 +50,233 @@ __FBSDID("$FreeBSD$"); #include "common/common.h" #include "t4_clip.h" +/* + * Code to deal with the Compressed Local IPv6 (CLIP) table in the ASIC. + * + * The driver maintains a global CLIP database (clip_db) of IPv6 addresses and a + * per-adapter CLIP table (sc->clip_table) with entries that point to an IPv6 in + * the clip_db. All access is protected by a single global lock (clip_db_lock). + * The correct lock order is clip lock before synchronized op. + * + * By default (hw.cxgbe.clip_db_auto=1) all local IPv6 addresses are added to + * the db. Addresses are also added on-demand when the driver allocates an + * entry for a filter, TOE tid, etc. krn_ref counts the number of times an + * address appears in the system. adp_ref counts the number of adapters that + * have that address in their CLIP table. If both are 0 then the entry is + * evicted from the db. Consumers of the CLIP table entry (filters, TOE tids) + * are tracked in ce->refcount. Driver ioctls let external consumers add/remove + * addresses from the CLIP table. + */ + #if defined(INET6) -static int add_lip(struct adapter *, struct in6_addr *); -static int delete_lip(struct adapter *, struct in6_addr *); -static struct clip_entry *search_lip(struct adapter *, struct in6_addr *); -static void update_clip(struct adapter *, void *); -static void t4_clip_task(void *, int); -static void update_clip_table(struct adapter *); +struct clip_db_entry { + LIST_ENTRY(clip_db_entry) link; /* clip_db hash linkage */ + struct in6_addr lip; + u_int krn_ref; /* # of times this IP6 appears in list of all IP6 */ + u_int adp_ref; /* # of adapters with this IP6 in their CLIP */ + u_int tmp_ref; /* Used only during refresh */ +}; + +struct clip_entry { + LIST_ENTRY(clip_entry) link; /* clip_table hash linkage */ + TAILQ_ENTRY(clip_entry) plink; /* clip_pending linkage */ + struct clip_db_entry *cde; + int16_t clip_idx; /* index in the hw table */ + bool pending; /* in clip_pending list */ + int refcount; +}; -static int in6_ifaddr_gen; static eventhandler_tag ifaddr_evhandler; -static struct timeout_task clip_task; +static struct mtx clip_db_lock; +static LIST_HEAD(, clip_db_entry) *clip_db; +static u_long clip_db_mask; +static int clip_db_gen; +static struct task clip_db_task; -static int -add_lip(struct adapter *sc, struct in6_addr *lip) +static int add_lip(struct adapter *, struct in6_addr *, int16_t *); +static int del_lip(struct adapter *, struct in6_addr *); +static void t4_clip_db_task(void *, int); +static void t4_clip_task(void *, int); +static void update_clip_db(void); +static int update_sw_clip_table(struct adapter *); +static int update_hw_clip_table(struct adapter *); +static void update_clip_table(struct adapter *, void *); +static int sysctl_clip_db(SYSCTL_HANDLER_ARGS); +static int sysctl_clip_db_auto(SYSCTL_HANDLER_ARGS); +static struct clip_db_entry *lookup_clip_db_entry(struct in6_addr *, bool); +static struct clip_entry *lookup_clip_entry(struct adapter *, struct in6_addr *, + bool); + +SYSCTL_PROC(_hw_cxgbe, OID_AUTO, clip_db, CTLTYPE_STRING | CTLFLAG_RD | + CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, 0, sysctl_clip_db, "A", + "CLIP database"); + +int t4_clip_db_auto = 1; +SYSCTL_PROC(_hw_cxgbe, OID_AUTO, clip_db_auto, CTLTYPE_INT | CTLFLAG_RWTUN | + CTLFLAG_MPSAFE, NULL, 0, sysctl_clip_db_auto, "I", + "Add local IPs to CLIP db automatically (0 = no, 1 = yes)"); + +static inline uint32_t +clip_hashfn(struct in6_addr *addr) { - struct fw_clip_cmd c; + return (fnv_32_buf(addr, sizeof(*addr), FNV1_32_INIT) & clip_db_mask); +} - ASSERT_SYNCHRONIZED_OP(sc); - mtx_assert(&sc->clip_table_lock, MA_OWNED); +static inline struct clip_db_entry * +alloc_clip_db_entry(struct in6_addr *in6) +{ + struct clip_db_entry *cde; - memset(&c, 0, sizeof(c)); - c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | - F_FW_CMD_WRITE); - c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); - c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; - c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; + cde = malloc(sizeof(*cde), M_CXGBE, M_NOWAIT | M_ZERO); + if (__predict_true(cde != NULL)) + memcpy(&cde->lip, in6, sizeof(cde->lip)); - return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); + return (cde); +} + +static inline struct clip_entry * +alloc_clip_entry(struct clip_db_entry *cde) +{ + struct clip_entry *ce; + + mtx_assert(&clip_db_lock, MA_OWNED); + + ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT | M_ZERO); + if (__predict_true(ce != NULL)) { + ce->cde = cde; + cde->adp_ref++; + ce->clip_idx = -1; + } + + return (ce); +} + +/* + * Look up the IP6 address in the CLIP db. If add is set then an entry for the + * IP6 will be added to the db. + */ +static struct clip_db_entry * +lookup_clip_db_entry(struct in6_addr *in6, bool add) +{ + struct clip_db_entry *cde; + const int bucket = clip_hashfn(in6); + + mtx_assert(&clip_db_lock, MA_OWNED); + + LIST_FOREACH(cde, &clip_db[bucket], link) { + if (IN6_ARE_ADDR_EQUAL(&cde->lip, in6)) + return (cde); + } + + /* Not found. Create a new entry if requested. */ + if (add) { + cde = alloc_clip_db_entry(in6); + if (cde != NULL) + LIST_INSERT_HEAD(&clip_db[bucket], cde, link); + } + + return (cde); +} + +/* + * Look up the IP6 address in the CLIP db. If add is set then an entry for the + * IP6 will be added to the db. + */ +static struct clip_entry * +lookup_clip_entry(struct adapter *sc, struct in6_addr *in6, bool add) +{ + struct clip_db_entry *cde; + struct clip_entry *ce; + const int bucket = clip_hashfn(in6); + + mtx_assert(&clip_db_lock, MA_OWNED); + + cde = lookup_clip_db_entry(in6, add); + if (cde == NULL) + return (NULL); + + LIST_FOREACH(ce, &sc->clip_table[bucket], link) { + if (ce->cde == cde) + return (ce); + } + + /* Not found. Create a new entry if requested. */ + if (add) { + ce = alloc_clip_entry(cde); + if (ce != NULL) { + LIST_INSERT_HEAD(&sc->clip_table[bucket], ce, link); + TAILQ_INSERT_TAIL(&sc->clip_pending, ce, plink); + ce->pending = true; + } + } + + return (ce); } static int -delete_lip(struct adapter *sc, struct in6_addr *lip) +add_lip(struct adapter *sc, struct in6_addr *lip, int16_t *idx) +{ + struct fw_clip_cmd c; + int rc; + + ASSERT_SYNCHRONIZED_OP(sc); + + memset(&c, 0, sizeof(c)); + c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | + F_FW_CMD_WRITE); + c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); + c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; + c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; + + rc = -t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c); + if (rc == 0 && idx != NULL) + *idx = G_FW_CLIP_CMD_INDEX(ntohl(c.alloc_to_len16)); + return (rc); +} + +static int +del_lip(struct adapter *sc, struct in6_addr *lip) { struct fw_clip_cmd c; ASSERT_SYNCHRONIZED_OP(sc); - mtx_assert(&sc->clip_table_lock, MA_OWNED); memset(&c, 0, sizeof(c)); c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); - c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); - c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; - c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; + c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); + c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; + c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); } - -static struct clip_entry * -search_lip(struct adapter *sc, struct in6_addr *lip) -{ - struct clip_entry *ce; - - mtx_assert(&sc->clip_table_lock, MA_OWNED); - - TAILQ_FOREACH(ce, &sc->clip_table, link) { - if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) - return (ce); - } - - return (NULL); -} #endif struct clip_entry * -t4_hold_lip(struct adapter *sc, struct in6_addr *lip, struct clip_entry *ce) +t4_get_clip_entry(struct adapter *sc, struct in6_addr *in6, bool add) { - #ifdef INET6 - mtx_lock(&sc->clip_table_lock); - if (ce == NULL) - ce = search_lip(sc, lip); - if (ce != NULL) - ce->refcount++; - mtx_unlock(&sc->clip_table_lock); + struct clip_entry *ce; + bool schedule = false; + + mtx_lock(&clip_db_lock); + ce = lookup_clip_entry(sc, in6, add); + if (ce != NULL) { + MPASS(ce->cde->adp_ref > 0); + if (++ce->refcount == 1 && ce->pending && ce->clip_idx != -1) { + /* + * Valid entry that was waiting to be deleted. It is in + * use now so take it off the pending list. + */ + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + ce->pending = false; + } + if (ce->clip_idx == -1 && update_hw_clip_table(sc) != 0) + schedule = true; + } + mtx_unlock(&clip_db_lock); + if (schedule) + taskqueue_enqueue_timeout(taskqueue_thread, &sc->clip_task, 0); return (ce); #else @@ -133,29 +285,110 @@ t4_hold_lip(struct adapter *sc, struct in6_addr *lip, struct clip_entry *ce) } void -t4_release_lip(struct adapter *sc, struct clip_entry *ce) +t4_hold_clip_entry(struct adapter *sc, struct clip_entry *ce) { +#ifdef INET6 + MPASS(ce != NULL); + MPASS(ce->cde->adp_ref > 0); + + mtx_lock(&clip_db_lock); + MPASS(ce->refcount > 0); /* Caller should already have a reference */ + ce->refcount++; + mtx_unlock(&clip_db_lock); +#endif +} #ifdef INET6 - mtx_lock(&sc->clip_table_lock); - KASSERT(search_lip(sc, &ce->lip) == ce, - ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); - KASSERT(ce->refcount > 0, - ("%s: CLIP entry %p has refcount 0", __func__, ce)); - --ce->refcount; - mtx_unlock(&sc->clip_table_lock); +static void +release_clip_entry_locked(struct adapter *sc, struct clip_entry *ce) +{ + struct clip_db_entry *cde; + + mtx_assert(&clip_db_lock, MA_OWNED); + MPASS(ce->refcount > 0); + cde = ce->cde; + MPASS(cde->adp_ref > 0); + if (--ce->refcount == 0 && cde->krn_ref == 0) { + if (ce->clip_idx == -1) { + /* Was never written to the hardware. */ + MPASS(ce->pending); + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + LIST_REMOVE(ce, link); + free(ce, M_CXGBE); + if (--cde->adp_ref == 0) { + LIST_REMOVE(cde, link); + free(cde, M_CXGBE); + } + } else { + /* + * Valid entry is now unused, add to the pending list + * for deletion. Its refcount was 1 on entry so it + * can't already be pending. + */ + MPASS(!ce->pending); + TAILQ_INSERT_HEAD(&sc->clip_pending, ce, plink); + ce->pending = true; + } + } +} #endif + +void +t4_release_clip_entry(struct adapter *sc, struct clip_entry *ce) +{ +#ifdef INET6 + MPASS(ce != NULL); + + mtx_lock(&clip_db_lock); + release_clip_entry_locked(sc, ce); + /* + * This isn't a manual release via the ioctl. No need to update the + * hw right now even if the release resulted in the entry being queued + * for deletion. + */ + mtx_unlock(&clip_db_lock); +#endif +} + +int +t4_release_clip_addr(struct adapter *sc, struct in6_addr *in6) +{ + int rc = ENOTSUP; +#ifdef INET6 + struct clip_entry *ce; + bool schedule = false; + + mtx_lock(&clip_db_lock); + ce = lookup_clip_entry(sc, in6, false); + if (ce == NULL) + rc = ENOENT; + else if (ce->refcount == 0) + rc = EIO; + else { + release_clip_entry_locked(sc, ce); + if (update_hw_clip_table(sc) != 0) + schedule = true; + rc = 0; + } + mtx_unlock(&clip_db_lock); + if (schedule) + taskqueue_enqueue_timeout(taskqueue_thread, &sc->clip_task, 0); +#endif + return (rc); } #ifdef INET6 void t4_init_clip_table(struct adapter *sc) { - - mtx_init(&sc->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); - TAILQ_INIT(&sc->clip_table); + TAILQ_INIT(&sc->clip_pending); + TIMEOUT_TASK_INIT(taskqueue_thread, &sc->clip_task, 0, t4_clip_task, sc); sc->clip_gen = -1; + sc->clip_table = hashinit(CLIP_HASH_SIZE, M_CXGBE, &sc->clip_mask); + /* Both the hashes must use the same bucket for the same key. */ + if (sc->clip_table != NULL) + MPASS(sc->clip_mask == clip_db_mask); /* * Don't bother forcing an update of the clip table when the * adapter is initialized. Before an interface can be used it @@ -164,194 +397,344 @@ t4_init_clip_table(struct adapter *sc) */ } +/* + * Returns true if any additions or deletions were made to the CLIP DB. + */ static void -update_clip(struct adapter *sc, void *arg __unused) -{ - - if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4clip")) - return; - - if (mtx_initialized(&sc->clip_table_lock) && !hw_off_limits(sc)) - update_clip_table(sc); - - end_synchronized_op(sc, LOCK_HELD); -} - -static void -t4_clip_task(void *arg, int count) -{ - - t4_iterate(update_clip, NULL); -} - -static void -update_clip_table(struct adapter *sc) +update_clip_db(void) { + VNET_ITERATOR_DECL(vnet_iter); struct rm_priotracker in6_ifa_tracker; + struct in6_addr *in6, tin6; struct in6_ifaddr *ia; - struct in6_addr *lip, tlip; - TAILQ_HEAD(, clip_entry) stale; - struct clip_entry *ce, *ce_temp; - struct vi_info *vi; - int rc, gen, i, j; - uintptr_t last_vnet; - - ASSERT_SYNCHRONIZED_OP(sc); + struct clip_db_entry *cde, *cde_tmp; + int i, addel; + VNET_LIST_RLOCK(); IN6_IFADDR_RLOCK(&in6_ifa_tracker); - mtx_lock(&sc->clip_table_lock); - - gen = atomic_load_acq_int(&in6_ifaddr_gen); - if (gen == sc->clip_gen) - goto done; - - TAILQ_INIT(&stale); - TAILQ_CONCAT(&stale, &sc->clip_table, link); - - /* - * last_vnet optimizes the common cases where all if_vnet = NULL (no - * VIMAGE) or all if_vnet = vnet0. - */ - last_vnet = (uintptr_t)(-1); - for_each_port(sc, i) - for_each_vi(sc->port[i], j, vi) { - if (IS_DOOMED(vi)) - continue; - - if (last_vnet == (uintptr_t)vi->ifp->if_vnet) - continue; - - /* XXX: races with if_vmove */ - CURVNET_SET(vi->ifp->if_vnet); + mtx_lock(&clip_db_lock); + VNET_FOREACH(vnet_iter) { + CURVNET_SET_QUIET(vnet_iter); CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { - lip = &ia->ia_addr.sin6_addr; - - KASSERT(!IN6_IS_ADDR_MULTICAST(lip), - ("%s: mcast address in in6_ifaddr list", __func__)); - - if (IN6_IS_ADDR_LOOPBACK(lip)) + if (ia->ia_ifp->if_flags & IFF_LOOPBACK) + continue; + in6 = &ia->ia_addr.sin6_addr; + KASSERT(!IN6_IS_ADDR_MULTICAST(in6), + ("%s: mcast address in in6_ifaddr list", __func__)); + if (IN6_IS_ADDR_LOOPBACK(in6)) continue; - if (IN6_IS_SCOPE_EMBED(lip)) { - /* Remove the embedded scope */ - tlip = *lip; - lip = &tlip; - in6_clearscope(lip); - } - /* - * XXX: how to weed out the link local address for the - * loopback interface? It's fe80::1 usually (always?). - */ - /* - * If it's in the main list then we already know it's - * not stale. - */ - TAILQ_FOREACH(ce, &sc->clip_table, link) { - if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) - goto next; + if (IN6_IS_SCOPE_EMBED(in6)) { + tin6 = *in6; + in6 = &tin6; + in6_clearscope(in6); } - - /* - * If it's in the stale list we should move it to the - * main list. - */ - TAILQ_FOREACH(ce, &stale, link) { - if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) { - TAILQ_REMOVE(&stale, ce, link); - TAILQ_INSERT_TAIL(&sc->clip_table, ce, - link); - goto next; - } - } - - /* A new IP6 address; add it to the CLIP table */ - ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); - memcpy(&ce->lip, lip, sizeof(ce->lip)); - ce->refcount = 0; - rc = add_lip(sc, lip); - if (rc == 0) - TAILQ_INSERT_TAIL(&sc->clip_table, ce, link); - else { - char ip[INET6_ADDRSTRLEN]; - - inet_ntop(AF_INET6, &ce->lip, &ip[0], - sizeof(ip)); - if (sc->flags & KERN_TLS_ON || - sc->active_ulds != 0) { - log(LOG_ERR, - "%s: could not add %s (%d)\n", - __func__, ip, rc); - } - free(ce, M_CXGBE); - } -next: - continue; + cde = lookup_clip_db_entry(in6, true); + if (cde == NULL) + continue; + cde->tmp_ref++; } CURVNET_RESTORE(); - last_vnet = (uintptr_t)vi->ifp->if_vnet; } - /* - * Remove stale addresses (those no longer in V_in6_ifaddrhead) that are - * no longer referenced by the driver. - */ - TAILQ_FOREACH_SAFE(ce, &stale, link, ce_temp) { - if (ce->refcount == 0) { - rc = delete_lip(sc, &ce->lip); - if (rc == 0) { - TAILQ_REMOVE(&stale, ce, link); - free(ce, M_CXGBE); - } else { - char ip[INET6_ADDRSTRLEN]; + addel = 0; + for (i = 0; i <= clip_db_mask; i++) { + LIST_FOREACH_SAFE(cde, &clip_db[i], link, cde_tmp) { + if (cde->krn_ref == 0 && cde->tmp_ref > 0) { + addel++; /* IP6 addr added. */ + } else if (cde->krn_ref > 0 && cde->tmp_ref == 0) { + if (cde->adp_ref == 0) { + LIST_REMOVE(cde, link); + free(cde, M_CXGBE); + continue; + } + addel++; /* IP6 addr deleted. */ + } + cde->krn_ref = cde->tmp_ref; + cde->tmp_ref = 0; + } + } + if (addel > 0) + clip_db_gen++; + mtx_unlock(&clip_db_lock); + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); + VNET_LIST_RUNLOCK(); - inet_ntop(AF_INET6, &ce->lip, &ip[0], - sizeof(ip)); - log(LOG_ERR, "%s: could not delete %s (%d)\n", - __func__, ip, rc); +} + +/* + * Update the CLIP db and then update the CLIP tables on all the adapters. + */ +static void +t4_clip_db_task(void *arg, int count) +{ + update_clip_db(); + t4_iterate(update_clip_table, NULL); +} + +/* + * Refresh the sw CLIP table for this adapter from the global CLIP db. Entries + * that need to be added or deleted from the hardware CLIP table are placed on a + * pending list but the hardware is not touched. The pending list is something + * reasonable even if this fails so it's ok to apply that to the hardware. + */ +static int +update_sw_clip_table(struct adapter *sc) +{ + struct clip_db_entry *cde; + struct clip_entry *ce, *ce_temp; + int i; + bool found; + + mtx_assert(&clip_db_lock, MA_OWNED); + + /* + * We are about to rebuild the pending list from scratch. Deletions are + * placed before additions because that's how we want to submit them to + * the hardware. + */ + TAILQ_INIT(&sc->clip_pending); + + /* + * Walk the sw CLIP table first. We want to reset every entry's pending + * status as we're rebuilding the pending list. + */ + for (i = 0; i <= clip_db_mask; i++) { + LIST_FOREACH_SAFE(ce, &sc->clip_table[i], link, ce_temp) { + cde = ce->cde; + MPASS(cde->adp_ref > 0); + if (ce->refcount != 0 || cde->krn_ref != 0) { + /* + * Entry should stay in the CLIP. + */ + + if (ce->clip_idx != -1) { + ce->pending = false; + } else { + /* Was never added, carry forward. */ + MPASS(ce->pending); + TAILQ_INSERT_TAIL(&sc->clip_pending, ce, + plink); + } + continue; + } + + /* + * Entry should be removed from the CLIP. + */ + + if (ce->clip_idx != -1) { + ce->pending = true; + TAILQ_INSERT_HEAD(&sc->clip_pending, ce, plink); + } else { + /* Was never added, free right now. */ + MPASS(ce->pending); + LIST_REMOVE(ce, link); + free(ce, M_CXGBE); + if (--cde->adp_ref == 0) { + LIST_REMOVE(cde, link); + free(cde, M_CXGBE); + } } } } - /* The ones that are still referenced need to stay in the CLIP table */ - TAILQ_CONCAT(&sc->clip_table, &stale, link); - sc->clip_gen = gen; + for (i = 0; i <= clip_db_mask; i++) { + LIST_FOREACH(cde, &clip_db[i], link) { + if (cde->krn_ref == 0) + continue; + + found = false; + LIST_FOREACH(ce, &sc->clip_table[i], link) { + if (ce->cde == cde) { + found = true; + break; + } + } + if (found) + continue; + ce = alloc_clip_entry(cde); + if (ce == NULL) + return (ENOMEM); + LIST_INSERT_HEAD(&sc->clip_table[i], ce, link); + TAILQ_INSERT_TAIL(&sc->clip_pending, ce, plink); + ce->pending = true; + } + } + + sc->clip_gen = clip_db_gen; + return (0); +} + +static int +update_hw_clip_table(struct adapter *sc) +{ + struct clip_db_entry *cde; + struct clip_entry *ce; + int rc; + char ip[INET6_ADDRSTRLEN]; + + mtx_assert(&clip_db_lock, MA_OWNED); + rc = begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4clip"); + if (rc != 0) + return (rc); + if (hw_off_limits(sc)) + goto done; /* with rc = 0, we don't want to reschedule. */ + while (!TAILQ_EMPTY(&sc->clip_pending)) { + ce = TAILQ_FIRST(&sc->clip_pending); + MPASS(ce->pending); + cde = ce->cde; + MPASS(cde->adp_ref > 0); + + if (ce->clip_idx == -1) { + /* + * Entry was queued for addition to the HW CLIP. + */ + + if (ce->refcount == 0 && cde->krn_ref == 0) { + /* No need to add to HW CLIP. */ + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + LIST_REMOVE(ce, link); + free(ce, M_CXGBE); + if (--cde->adp_ref == 0) { + LIST_REMOVE(cde, link); + free(cde, M_CXGBE); + } + } else { + /* Add to the HW CLIP. */ + rc = add_lip(sc, &cde->lip, &ce->clip_idx); + if (rc == FW_ENOMEM) { + /* CLIP full, no point in retrying. */ + rc = 0; + goto done; + } + if (rc != 0) { + inet_ntop(AF_INET6, &cde->lip, &ip[0], + sizeof(ip)); + CH_ERR(sc, "add_lip(%s) failed: %d\n", + ip, rc); + goto done; + } + MPASS(ce->clip_idx != -1); + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + ce->pending = false; + } + } else { + /* + * Entry was queued for deletion from the HW CLIP. + */ + + if (ce->refcount == 0 && cde->krn_ref == 0) { + /* + * Delete from the HW CLIP. Delete should never + * fail so we always log an error. But if the + * failure is that the entry wasn't found in the + * CLIP then we carry on as if it was deleted. + */ + rc = del_lip(sc, &cde->lip); + if (rc != 0) + CH_ERR(sc, "del_lip(%s) failed: %d\n", + ip, rc); + if (rc == FW_EPROTO) + rc = 0; + if (rc != 0) + goto done; + + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + LIST_REMOVE(ce, link); + free(ce, M_CXGBE); + if (--cde->adp_ref == 0) { + LIST_REMOVE(cde, link); + free(cde, M_CXGBE); + } + } else { + /* No need to delete from HW CLIP. */ + TAILQ_REMOVE(&sc->clip_pending, ce, plink); + ce->pending = false; + } + } + } done: - mtx_unlock(&sc->clip_table_lock); - IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); + end_synchronized_op(sc, LOCK_HELD); + return (rc); +} + +static void +update_clip_table(struct adapter *sc, void *arg __unused) +{ + bool reschedule; + + if (sc->clip_table == NULL) + return; + + reschedule = false; + mtx_lock(&clip_db_lock); + if (sc->clip_gen != clip_db_gen && update_sw_clip_table(sc) != 0) + reschedule = true; + if (!TAILQ_EMPTY(&sc->clip_pending) && update_hw_clip_table(sc) != 0) + reschedule = true; + mtx_unlock(&clip_db_lock); + if (reschedule) + taskqueue_enqueue_timeout(taskqueue_thread, &sc->clip_task, + -hz / 4); +} + +/* + * Update the CLIP table of the specified adapter. + */ +static void +t4_clip_task(void *sc, int count) +{ + update_clip_table(sc, NULL); } void t4_destroy_clip_table(struct adapter *sc) { struct clip_entry *ce, *ce_temp; + int i; - if (mtx_initialized(&sc->clip_table_lock)) { - mtx_lock(&sc->clip_table_lock); - TAILQ_FOREACH_SAFE(ce, &sc->clip_table, link, ce_temp) { - KASSERT(ce->refcount == 0, - ("%s: CLIP entry %p still in use (%d)", __func__, - ce, ce->refcount)); - TAILQ_REMOVE(&sc->clip_table, ce, link); + mtx_lock(&clip_db_lock); + if (sc->clip_table == NULL) + goto done; /* CLIP was never initialized. */ + for (i = 0; i <= sc->clip_mask; i++) { + LIST_FOREACH_SAFE(ce, &sc->clip_table[i], link, ce_temp) { + MPASS(ce->refcount == 0); + MPASS(ce->cde->adp_ref > 0); #if 0 - delete_lip(sc, &ce->lip); + del_lip(sc, &ce->lip); #endif + LIST_REMOVE(ce, link); + if (--ce->cde->adp_ref == 0 && ce->cde->krn_ref == 0) { + LIST_REMOVE(ce->cde, link); + free(ce->cde, M_CXGBE); + } free(ce, M_CXGBE); } - mtx_unlock(&sc->clip_table_lock); - mtx_destroy(&sc->clip_table_lock); } + hashdestroy(&sc->clip_table, M_CXGBE, sc->clip_mask); + sc->clip_table = NULL; +done: + mtx_unlock(&clip_db_lock); } static void t4_ifaddr_event(void *arg __unused, struct ifnet *ifp, struct ifaddr *ifa, int event) { + struct in6_addr *in6; + if (t4_clip_db_auto == 0) + return; /* Automatic updates not allowed. */ if (ifa->ifa_addr->sa_family != AF_INET6) return; + if (ifp->if_flags & IFF_LOOPBACK) + return; + in6 = &((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr; + if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_MULTICAST(in6)) + return; - atomic_add_rel_int(&in6_ifaddr_gen, 1); - taskqueue_enqueue_timeout(taskqueue_thread, &clip_task, -hz / 4); + taskqueue_enqueue(taskqueue_thread, &clip_db_task); } int @@ -360,7 +743,7 @@ sysctl_clip(SYSCTL_HANDLER_ARGS) struct adapter *sc = arg1; struct clip_entry *ce; struct sbuf *sb; - int rc, header = 0; + int i, rc, header = 0; char ip[INET6_ADDRSTRLEN]; rc = sysctl_wire_old_buffer(req, 0); @@ -371,17 +754,25 @@ sysctl_clip(SYSCTL_HANDLER_ARGS) if (sb == NULL) return (ENOMEM); - mtx_lock(&sc->clip_table_lock); - TAILQ_FOREACH(ce, &sc->clip_table, link) { - if (header == 0) { - sbuf_printf(sb, "%-40s %-5s", "IP address", "Users"); - header = 1; + mtx_lock(&clip_db_lock); + for (i = 0; i <= sc->clip_mask; i++) { + LIST_FOREACH(ce, &sc->clip_table[i], link) { + if (header == 0) { + sbuf_printf(sb, "%-4s %-4s %s", "Indx", "Refs", + "IP address"); + header = 1; + } + inet_ntop(AF_INET6, &ce->cde->lip, &ip[0], sizeof(ip)); + if (ce->clip_idx == -1) { + sbuf_printf(sb, "\n%-4s %-4d %s", "-", + ce->refcount, ip); + } else { + sbuf_printf(sb, "\n%-4d %-4d %s", ce->clip_idx, + ce->refcount, ip); + } } - inet_ntop(AF_INET6, &ce->lip, &ip[0], sizeof(ip)); - - sbuf_printf(sb, "\n%-40s %5u", ip, ce->refcount); } - mtx_unlock(&sc->clip_table_lock); + mtx_unlock(&clip_db_lock); rc = sbuf_finish(sb); sbuf_delete(sb); @@ -389,11 +780,73 @@ sysctl_clip(SYSCTL_HANDLER_ARGS) return (rc); } +static int +sysctl_clip_db(SYSCTL_HANDLER_ARGS) +{ + struct clip_db_entry *cde; + struct sbuf *sb; + int i, rc, header = 0; + char ip[INET6_ADDRSTRLEN]; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); + if (sb == NULL) + return (ENOMEM); + + mtx_lock(&clip_db_lock); + for (i = 0; i <= clip_db_mask; i++) { + LIST_FOREACH(cde, &clip_db[i], link) { + MPASS(cde->tmp_ref == 0); + if (header == 0) { + sbuf_printf(sb, "%-4s %-4s %s", "Kref", "Aref", + "IP address"); + header = 1; + } + inet_ntop(AF_INET6, &cde->lip, &ip[0], sizeof(ip)); + sbuf_printf(sb, "\n%-4d %-4d %s", cde->krn_ref, + cde->adp_ref, ip); + } + } + mtx_unlock(&clip_db_lock); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_clip_db_auto(SYSCTL_HANDLER_ARGS) +{ + int rc, val; + + val = t4_clip_db_auto; + rc = sysctl_handle_int(oidp, &val, 0, req); + if (rc != 0 || req->newptr == NULL) + return (rc); + + if (val == 0 || val == 1) + t4_clip_db_auto = val; + else { + /* + * Writing a value other than 0 or 1 forces a one-time update of + * the clip_db directly in the sysctl and not in some taskqueue. + */ + t4_clip_db_task(NULL, 0); + } + + return (0); +} + void t4_clip_modload(void) { - - TIMEOUT_TASK_INIT(taskqueue_thread, &clip_task, 0, t4_clip_task, NULL); + mtx_init(&clip_db_lock, "clip_db", NULL, MTX_DEF); + clip_db = hashinit(CLIP_HASH_SIZE, M_CXGBE, &clip_db_mask); + TASK_INIT(&clip_db_task, 0, t4_clip_db_task, NULL); ifaddr_evhandler = EVENTHANDLER_REGISTER(ifaddr_event_ext, t4_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); } @@ -401,8 +854,9 @@ t4_clip_modload(void) void t4_clip_modunload(void) { - EVENTHANDLER_DEREGISTER(ifaddr_event_ext, ifaddr_evhandler); - taskqueue_cancel_timeout(taskqueue_thread, &clip_task, NULL); + taskqueue_drain(taskqueue_thread, &clip_db_task); + hashdestroy(&clip_db, M_CXGBE, clip_db_mask); + mtx_destroy(&clip_db_lock); } #endif diff --git a/sys/dev/cxgbe/t4_clip.h b/sys/dev/cxgbe/t4_clip.h index 8d9acdb86fa5..9dc3d39f3266 100644 --- a/sys/dev/cxgbe/t4_clip.h +++ b/sys/dev/cxgbe/t4_clip.h @@ -32,19 +32,18 @@ #ifndef __T4_CLIP_H #define __T4_CLIP_H -struct clip_entry { - TAILQ_ENTRY(clip_entry) link; - struct in6_addr lip; /* local IPv6 address */ - u_int refcount; -}; +#define CLIP_HASH_SIZE 32 +struct clip_entry; +struct in6_addr; void t4_clip_modload(void); void t4_clip_modunload(void); void t4_init_clip_table(struct adapter *); void t4_destroy_clip_table(struct adapter *); -struct clip_entry *t4_hold_lip(struct adapter *, struct in6_addr *, - struct clip_entry *); -void t4_release_lip(struct adapter *, struct clip_entry *); +struct clip_entry *t4_get_clip_entry(struct adapter *, struct in6_addr *, bool); +void t4_hold_clip_entry(struct adapter *, struct clip_entry *); +void t4_release_clip_entry(struct adapter *, struct clip_entry *); +int t4_release_clip_addr(struct adapter *, struct in6_addr *); int sysctl_clip(SYSCTL_HANDLER_ARGS); diff --git a/sys/dev/cxgbe/t4_ioctl.h b/sys/dev/cxgbe/t4_ioctl.h index ff2c5ef80a14..f3bb7d8b4aa4 100644 --- a/sys/dev/cxgbe/t4_ioctl.h +++ b/sys/dev/cxgbe/t4_ioctl.h @@ -64,6 +64,8 @@ enum { T4_LOAD_BOOTCFG, /* flash bootcfg */ T4_CUDBG_DUMP, /* debug dump of chip state */ T4_SET_FILTER_MASK, /* set filter mask (hashfilter mode) */ + T4_HOLD_CLIP_ADDR, /* add ref on an IP in the CLIP */ + T4_RELEASE_CLIP_ADDR, /* remove ref from an IP in the CLIP */ }; struct t4_reg { @@ -405,6 +407,12 @@ struct t4_offload_policy { struct offload_rule *rule; }; +/* Address/mask entry in the CLIP. FW_CLIP2_CMD is aware of the mask. */ +struct t4_clip_addr { + uint8_t addr[16]; + uint8_t mask[16]; +}; + #define CHELSIO_T4_GETREG _IOWR('f', T4_GETREG, struct t4_reg) #define CHELSIO_T4_SETREG _IOW('f', T4_SETREG, struct t4_reg) #define CHELSIO_T4_REGDUMP _IOWR('f', T4_REGDUMP, struct t4_regdump) @@ -431,4 +439,6 @@ struct t4_offload_policy { #define CHELSIO_T4_CUDBG_DUMP _IOWR('f', T4_CUDBG_DUMP, struct t4_cudbg_dump) #define CHELSIO_T4_SET_OFLD_POLICY _IOW('f', T4_SET_OFLD_POLICY, struct t4_offload_policy) #define CHELSIO_T4_SET_FILTER_MASK _IOW('f', T4_SET_FILTER_MASK, uint32_t) +#define CHELSIO_T4_HOLD_CLIP_ADDR _IOW('f', T4_HOLD_CLIP_ADDR, struct t4_clip_addr) +#define CHELSIO_T4_RELEASE_CLIP_ADDR _IOW('f', T4_RELEASE_CLIP_ADDR, struct t4_clip_addr) #endif diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 3cfab1ef04e2..51fc6504e5c2 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -838,6 +838,8 @@ static int set_offload_policy(struct adapter *, struct t4_offload_policy *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); static int clear_stats(struct adapter *, u_int); +static int hold_clip_addr(struct adapter *, struct t4_clip_addr *); +static int release_clip_addr(struct adapter *, struct t4_clip_addr *); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, bool); static void t4_async_event(void *, int); @@ -11910,6 +11912,35 @@ clear_stats(struct adapter *sc, u_int port_id) return (0); } +static int +hold_clip_addr(struct adapter *sc, struct t4_clip_addr *ca) +{ +#ifdef INET6 + struct in6_addr in6; + + bcopy(&ca->addr[0], &in6.s6_addr[0], sizeof(in6.s6_addr)); + if (t4_get_clip_entry(sc, &in6, true) != NULL) + return (0); + else + return (EIO); +#else + return (ENOTSUP); +#endif +} + +static int +release_clip_addr(struct adapter *sc, struct t4_clip_addr *ca) +{ +#ifdef INET6 + struct in6_addr in6; + + bcopy(&ca->addr[0], &in6.s6_addr[0], sizeof(in6.s6_addr)); + return (t4_release_clip_addr(sc, &in6)); +#else + return (ENOTSUP); +#endif +} + int t4_os_find_pci_capability(struct adapter *sc, int cap) { @@ -12181,6 +12212,12 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, case CHELSIO_T4_SET_OFLD_POLICY: rc = set_offload_policy(sc, (struct t4_offload_policy *)data); break; + case CHELSIO_T4_HOLD_CLIP_ADDR: + rc = hold_clip_addr(sc, (struct t4_clip_addr *)data); + break; + case CHELSIO_T4_RELEASE_CLIP_ADDR: + rc = release_clip_addr(sc, (struct t4_clip_addr *)data); + break; default: rc = ENOTTY; } diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c index c71b9694bd3b..f4aa84d6514f 100644 --- a/sys/dev/cxgbe/tom/t4_connect.c +++ b/sys/dev/cxgbe/tom/t4_connect.c @@ -300,7 +300,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh, if ((inp->inp_vflag & INP_IPV6) == 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); - toep->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL); + toep->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true); if (toep->ce == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOENT); @@ -394,7 +394,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh, if (toep->l2te) t4_l2t_release(toep->l2te); if (toep->ce) - t4_release_lip(sc, toep->ce); + t4_release_clip_entry(sc, toep->ce); free_toepcb(toep); } diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c index 0245acfe005b..8623079fe429 100644 --- a/sys/dev/cxgbe/tom/t4_listen.c +++ b/sys/dev/cxgbe/tom/t4_listen.c @@ -211,7 +211,7 @@ alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi) if (inp->inp_vflag & INP_IPV6 && !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) { - lctx->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL); + lctx->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true); if (lctx->ce == NULL) { free(lctx, M_CXGBE); return (NULL); @@ -244,7 +244,7 @@ free_lctx(struct adapter *sc, struct listen_ctx *lctx) __func__, lctx->stid, lctx, lctx->inp); if (lctx->ce) - t4_release_lip(sc, lctx->ce); + t4_release_clip_entry(sc, lctx->ce); free_stid(sc, lctx); free(lctx, M_CXGBE); @@ -1522,8 +1522,18 @@ do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, /* Come up with something that syncache_expand should be ok with. */ synqe_to_protohdrs(sc, synqe, cpl, &inc, &th, &to); - if (inc.inc_flags & INC_ISIPV6) - toep->ce = t4_hold_lip(sc, &inc.inc6_laddr, lctx->ce); + if (inc.inc_flags & INC_ISIPV6) { + if (lctx->ce == NULL) { + toep->ce = t4_get_clip_entry(sc, &inc.inc6_laddr, true); + if (toep->ce == NULL) { + free_toepcb(toep); + goto reset; /* RST without a CLIP entry? */ + } + } else { + t4_hold_clip_entry(sc, lctx->ce); + toep->ce = lctx->ce; + } + } so = inp->inp_socket; KASSERT(so != NULL, ("%s: socket is NULL", __func__)); diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index 173357404ebe..97693ab74000 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -348,7 +348,7 @@ release_offload_resources(struct toepcb *toep) } if (toep->ce) - t4_release_lip(sc, toep->ce); + t4_release_clip_entry(sc, toep->ce); if (toep->params.tc_idx != -1) t4_release_cl_rl(sc, toep->vi->pi->port_id, toep->params.tc_idx);