diff --git a/contrib/ofed/libcxgb4/cq.c b/contrib/ofed/libcxgb4/cq.c index 76081bfe59c6..1b79cb5d21a1 100644 --- a/contrib/ofed/libcxgb4/cq.c +++ b/contrib/ofed/libcxgb4/cq.c @@ -437,7 +437,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, if (!*cqe_flushed && CQE_STATUS(hw_cqe)) dump_cqe(hw_cqe); - BUG_ON((*cqe_flushed == 0) && !SW_CQE(hw_cqe)); + BUG_ON((cqe_flushed == 0) && !SW_CQE(hw_cqe)); goto proc_cqe; } diff --git a/contrib/ofed/libcxgb4/dev.c b/contrib/ofed/libcxgb4/dev.c index 14af8e3e7657..1e48478f7466 100644 --- a/contrib/ofed/libcxgb4/dev.c +++ b/contrib/ofed/libcxgb4/dev.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "libcxgb4.h" #include "cxgb4-abi.h" @@ -194,6 +195,17 @@ static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev, rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *)); if (!rhp->cqid2ptr) goto err_unmap; + + /* Disable userspace WC if architecture/adapter does not + * support WC. + * Note: To forcefully disable WC in kernel driver use the + * loader tunable "hw.cxl.write_combine=0" + */ + if (t5_en_wc && !context->status_page->wc_supported) { + fprintf(stderr, "iw_cxgb4 driver doesn't support Write " + "Combine, so regular DB writes will be used\n"); + t5_en_wc = 0; + } } return &context->ibv_ctx; @@ -400,11 +412,44 @@ int c4iw_abi_version = 1; static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path, int abi_version) { - char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[32], *cp; + char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp; + char dev_str[IBV_SYSFS_PATH_MAX]; struct c4iw_dev *dev; unsigned vendor, device, fw_maj, fw_min; int i; + char devnum; + char ib_param[16]; +#ifndef __linux__ + if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev", + ibdev, sizeof ibdev) < 0) + return NULL; + + devnum = atoi(&ibdev[5]); + + if (ibdev[0] == 't' && ibdev[1] >= '4' && ibdev[1] <= '6' && + strstr(&ibdev[2], "nex") && devnum >= 0) { + snprintf(dev_str, sizeof(dev_str), "/dev/t%cnex/%d", ibdev[1], + devnum); + } else + return NULL; + + if (ibv_read_sysfs_file(dev_str, "\%pnpinfo", value, sizeof value) < 0) + return NULL; + else { + if (strstr(value, "vendor=")) { + strncpy(ib_param, strstr(value, "vendor=") + + strlen("vendor="), 6); + sscanf(ib_param, "%i", &vendor); + } + + if (strstr(value, "device=")) { + strncpy(ib_param, strstr(value, "device=") + + strlen("device="), 6); + sscanf(ib_param, "%i", &device); + } + } +#else if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", value, sizeof value) < 0) return NULL; @@ -414,6 +459,7 @@ static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path, value, sizeof value) < 0) return NULL; sscanf(value, "%i", &device); +#endif for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) if (vendor == hca_table[i].vendor && @@ -425,6 +471,11 @@ static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path, found: c4iw_abi_version = abi_version; +#ifndef __linux__ + if (ibv_read_sysfs_file(dev_str, "firmware_version", + value, sizeof value) < 0) + return NULL; +#else /* * Verify that the firmware major number matches. Major number * mismatches are fatal. Minor number mismatches are tolerated. @@ -438,6 +489,7 @@ found: ibv_get_sysfs_path(), ibdev); if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0) return NULL; +#endif cp = strtok(value+1, "."); sscanf(cp, "%i", &fw_maj); diff --git a/contrib/ofed/libcxgb4/qp.c b/contrib/ofed/libcxgb4/qp.c index e4f281edbb17..bd2c38528643 100644 --- a/contrib/ofed/libcxgb4/qp.c +++ b/contrib/ofed/libcxgb4/qp.c @@ -44,10 +44,13 @@ struct c4iw_stats c4iw_stats; static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16) { - u64 *src, *dst; + void *src, *dst; + uintptr_t end; + int total, len; - src = (u64 *)wqe; - dst = (u64 *)((u8 *)wq->sq.queue + wq->sq.wq_pidx * T4_EQ_ENTRY_SIZE); + src = &wqe->flits[0]; + dst = &wq->sq.queue->flits[wq->sq.wq_pidx * + (T4_EQ_ENTRY_SIZE / sizeof(__be64))]; if (t4_sq_onchip(wq)) { len16 = align(len16, 4); @@ -57,17 +60,18 @@ static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16) * happens */ mmio_wc_start(); } - while (len16) { - *dst++ = *src++; - if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) - dst = (u64 *)wq->sq.queue; - *dst++ = *src++; - if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) - dst = (u64 *)wq->sq.queue; - len16--; - /* NOTE len16 cannot be large enough to write to the - same sq.queue memory twice in this loop */ + /* NOTE len16 cannot be large enough to write to the + same sq.queue memory twice in this loop */ + total = len16 * 16; + end = (uintptr_t)&wq->sq.queue[wq->sq.size]; + if (__predict_true((uintptr_t)dst + total <= end)) { + /* Won't wrap around. */ + memcpy(dst, src, total); + } else { + len = end - (uintptr_t)dst; + memcpy(dst, src, len); + memcpy(wq->sq.queue, src + len, total - len); } if (t4_sq_onchip(wq)) @@ -76,18 +80,23 @@ static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16) static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16) { - u64 *src, *dst; + void *src, *dst; + uintptr_t end; + int total, len; - src = (u64 *)wqe; - dst = (u64 *)((u8 *)wq->rq.queue + wq->rq.wq_pidx * T4_EQ_ENTRY_SIZE); - while (len16) { - *dst++ = *src++; - if (dst >= (u64 *)&wq->rq.queue[wq->rq.size]) - dst = (u64 *)wq->rq.queue; - *dst++ = *src++; - if (dst >= (u64 *)&wq->rq.queue[wq->rq.size]) - dst = (u64 *)wq->rq.queue; - len16--; + src = &wqe->flits[0]; + dst = &wq->rq.queue->flits[wq->rq.wq_pidx * + (T4_EQ_ENTRY_SIZE / sizeof(__be64))]; + + total = len16 * 16; + end = (uintptr_t)&wq->rq.queue[wq->rq.size]; + if (__predict_true((uintptr_t)dst + total <= end)) { + /* Won't wrap around. */ + memcpy(dst, src, total); + } else { + len = end - (uintptr_t)dst; + memcpy(dst, src, len); + memcpy(wq->rq.queue, src + len, total - len); } } diff --git a/contrib/ofed/libcxgb4/t4.h b/contrib/ofed/libcxgb4/t4.h index fe2ad6a519a0..05984e87d569 100644 --- a/contrib/ofed/libcxgb4/t4.h +++ b/contrib/ofed/libcxgb4/t4.h @@ -87,7 +87,7 @@ #define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1) #define T4_MAX_NUM_STAG (1<<15) #define T4_MAX_MR_SIZE (~0ULL - 1) -#define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ +#define T4_PAGESIZE_MASK 0xffffffff000 /* 4KB-8TB */ #define T4_STAG_UNSET 0xffffffff #define T4_FW_MAJ 0 @@ -723,7 +723,7 @@ static inline void t4_reset_cq_in_error(struct t4_cq *cq) struct t4_dev_status_page { u8 db_off; - u8 pad1; + u8 wc_supported; u16 pad2; u32 pad3; u64 qp_start; diff --git a/contrib/ofed/libcxgb4/verbs.c b/contrib/ofed/libcxgb4/verbs.c index 251a337d2b41..04d765dff3f6 100644 --- a/contrib/ofed/libcxgb4/verbs.c +++ b/contrib/ofed/libcxgb4/verbs.c @@ -468,7 +468,7 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, } qhp->wq.sq.queue = mmap(NULL, qhp->wq.sq.memsize, - PROT_WRITE, MAP_SHARED, + PROT_READ|PROT_WRITE, MAP_SHARED, pd->context->cmd_fd, resp.sq_key); if (qhp->wq.sq.queue == MAP_FAILED) goto err4; @@ -490,7 +490,7 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, qhp->wq.rq.udb += 2; } qhp->wq.rq.queue = mmap(NULL, qhp->wq.rq.memsize, - PROT_WRITE, MAP_SHARED, + PROT_READ|PROT_WRITE, MAP_SHARED, pd->context->cmd_fd, resp.rq_key); if (qhp->wq.rq.queue == MAP_FAILED) goto err6; diff --git a/sys/compat/linuxkpi/common/include/linux/dma-mapping.h b/sys/compat/linuxkpi/common/include/linux/dma-mapping.h index 1b486ec80908..24508f683578 100644 --- a/sys/compat/linuxkpi/common/include/linux/dma-mapping.h +++ b/sys/compat/linuxkpi/common/include/linux/dma-mapping.h @@ -127,7 +127,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, size_t align; void *mem; - if (dev->dma_mask) + if (dev != NULL && dev->dma_mask) high = *dev->dma_mask; else if (flag & GFP_DMA32) high = BUS_SPACE_MAXADDR_32BIT; diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index e4a14a339ff2..a297e08738e6 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -801,6 +801,7 @@ struct adapter { void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; + struct iw_tunables iwt; void *iwarp_softc; /* (struct c4iw_dev *) */ void *iscsi_ulp_softc; /* (struct cxgbei_data *) */ void *ccr_softc; /* (struct ccr_softc *) */ diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h index c34f57ad9c18..ce86048cac0f 100644 --- a/sys/dev/cxgbe/common/common.h +++ b/sys/dev/cxgbe/common/common.h @@ -68,6 +68,8 @@ enum { FEC_RESERVED = 1 << 2, }; +enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS }; + struct port_stats { u64 tx_octets; /* total # of octets in good frames */ u64 tx_frames; /* all good frames */ @@ -843,5 +845,8 @@ int t4vf_get_sge_params(struct adapter *adapter); int t4vf_get_rss_glb_config(struct adapter *adapter); int t4vf_get_vfres(struct adapter *adapter); int t4vf_prep_adapter(struct adapter *adapter); +int t4_bar2_sge_qregs(struct adapter *adapter, unsigned int qid, + enum t4_bar2_qtype qtype, int user, u64 *pbar2_qoffset, + unsigned int *pbar2_qid); #endif /* __CHELSIO_COMMON_H */ diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c index bb7491af4c91..878d2dfa7ddc 100644 --- a/sys/dev/cxgbe/common/t4_hw.c +++ b/sys/dev/cxgbe/common/t4_hw.c @@ -8080,6 +8080,98 @@ int t4_shutdown_adapter(struct adapter *adapter) return 0; } +/** + * t4_bar2_sge_qregs - return BAR2 SGE Queue register information + * @adapter: the adapter + * @qid: the Queue ID + * @qtype: the Ingress or Egress type for @qid + * @user: true if this request is for a user mode queue + * @pbar2_qoffset: BAR2 Queue Offset + * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues + * + * Returns the BAR2 SGE Queue Registers information associated with the + * indicated Absolute Queue ID. These are passed back in return value + * pointers. @qtype should be T4_BAR2_QTYPE_EGRESS for Egress Queue + * and T4_BAR2_QTYPE_INGRESS for Ingress Queues. + * + * This may return an error which indicates that BAR2 SGE Queue + * registers aren't available. If an error is not returned, then the + * following values are returned: + * + * *@pbar2_qoffset: the BAR2 Offset of the @qid Registers + * *@pbar2_qid: the BAR2 SGE Queue ID or 0 of @qid + * + * If the returned BAR2 Queue ID is 0, then BAR2 SGE registers which + * require the "Inferred Queue ID" ability may be used. E.g. the + * Write Combining Doorbell Buffer. If the BAR2 Queue ID is not 0, + * then these "Inferred Queue ID" register may not be used. + */ +int t4_bar2_sge_qregs(struct adapter *adapter, + unsigned int qid, + enum t4_bar2_qtype qtype, + int user, + u64 *pbar2_qoffset, + unsigned int *pbar2_qid) +{ + unsigned int page_shift, page_size, qpp_shift, qpp_mask; + u64 bar2_page_offset, bar2_qoffset; + unsigned int bar2_qid, bar2_qid_offset, bar2_qinferred; + + /* T4 doesn't support BAR2 SGE Queue registers for kernel + * mode queues. + */ + if (!user && is_t4(adapter)) + return -EINVAL; + + /* Get our SGE Page Size parameters. + */ + page_shift = adapter->params.sge.page_shift; + page_size = 1 << page_shift; + + /* Get the right Queues per Page parameters for our Queue. + */ + qpp_shift = (qtype == T4_BAR2_QTYPE_EGRESS + ? adapter->params.sge.eq_s_qpp + : adapter->params.sge.iq_s_qpp); + qpp_mask = (1 << qpp_shift) - 1; + + /* Calculate the basics of the BAR2 SGE Queue register area: + * o The BAR2 page the Queue registers will be in. + * o The BAR2 Queue ID. + * o The BAR2 Queue ID Offset into the BAR2 page. + */ + bar2_page_offset = ((u64)(qid >> qpp_shift) << page_shift); + bar2_qid = qid & qpp_mask; + bar2_qid_offset = bar2_qid * SGE_UDB_SIZE; + + /* If the BAR2 Queue ID Offset is less than the Page Size, then the + * hardware will infer the Absolute Queue ID simply from the writes to + * the BAR2 Queue ID Offset within the BAR2 Page (and we need to use a + * BAR2 Queue ID of 0 for those writes). Otherwise, we'll simply + * write to the first BAR2 SGE Queue Area within the BAR2 Page with + * the BAR2 Queue ID and the hardware will infer the Absolute Queue ID + * from the BAR2 Page and BAR2 Queue ID. + * + * One important censequence of this is that some BAR2 SGE registers + * have a "Queue ID" field and we can write the BAR2 SGE Queue ID + * there. But other registers synthesize the SGE Queue ID purely + * from the writes to the registers -- the Write Combined Doorbell + * Buffer is a good example. These BAR2 SGE Registers are only + * available for those BAR2 SGE Register areas where the SGE Absolute + * Queue ID can be inferred from simple writes. + */ + bar2_qoffset = bar2_page_offset; + bar2_qinferred = (bar2_qid_offset < page_size); + if (bar2_qinferred) { + bar2_qoffset += bar2_qid_offset; + bar2_qid = 0; + } + + *pbar2_qoffset = bar2_qoffset; + *pbar2_qid = bar2_qid; + return 0; +} + /** * t4_init_devlog_params - initialize adapter->params.devlog * @adap: the adapter diff --git a/sys/dev/cxgbe/iw_cxgbe/cm.c b/sys/dev/cxgbe/iw_cxgbe/cm.c index 841c34f0c830..e5ec3c672c6b 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cm.c +++ b/sys/dev/cxgbe/iw_cxgbe/cm.c @@ -46,8 +46,11 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include +#include +#include #include #include #include @@ -78,6 +81,8 @@ static struct work_struct c4iw_task; static struct workqueue_struct *c4iw_taskq; static LIST_HEAD(err_cqe_list); static spinlock_t err_cqe_lock; +static LIST_HEAD(listen_port_list); +static DEFINE_MUTEX(listen_port_mutex); static void process_req(struct work_struct *ctx); static void start_ep_timer(struct c4iw_ep *ep); @@ -85,12 +90,7 @@ static int stop_ep_timer(struct c4iw_ep *ep); static int set_tcpinfo(struct c4iw_ep *ep); static void process_timeout(struct c4iw_ep *ep); static void process_err_cqes(void); -static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc); -static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); -static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); static void *alloc_ep(int size, gfp_t flags); -static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, struct nhop4_extended *pnh4); static void close_socket(struct socket *so); static int send_mpa_req(struct c4iw_ep *ep); static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); @@ -120,6 +120,15 @@ static int process_terminate(struct c4iw_ep *ep); static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m); static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events); +static struct listen_port_info * +add_ep_to_listenlist(struct c4iw_listen_ep *lep); +static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep); +static struct c4iw_listen_ep * +find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so); +static int get_ifnet_from_raddr(struct sockaddr_storage *raddr, + struct ifnet **ifp); +static void process_newconn(struct c4iw_listen_ep *master_lep, + struct socket *new_so); #define START_EP_TIMER(ep) \ do { \ CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \ @@ -134,6 +143,34 @@ static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events); stop_ep_timer(ep); \ }) +#define GET_LOCAL_ADDR(pladdr, so) \ + do { \ + struct sockaddr_storage *__a = NULL; \ + struct inpcb *__inp = sotoinpcb(so); \ + KASSERT(__inp != NULL, \ + ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \ + if (__inp->inp_vflag & INP_IPV4) \ + in_getsockaddr(so, (struct sockaddr **)&__a); \ + else \ + in6_getsockaddr(so, (struct sockaddr **)&__a); \ + *(pladdr) = *__a; \ + free(__a, M_SONAME); \ + } while (0) + +#define GET_REMOTE_ADDR(praddr, so) \ + do { \ + struct sockaddr_storage *__a = NULL; \ + struct inpcb *__inp = sotoinpcb(so); \ + KASSERT(__inp != NULL, \ + ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \ + if (__inp->inp_vflag & INP_IPV4) \ + in_getpeeraddr(so, (struct sockaddr **)&__a); \ + else \ + in6_getpeeraddr(so, (struct sockaddr **)&__a); \ + *(praddr) = *__a; \ + free(__a, M_SONAME); \ + } while (0) + #ifdef KTR static char *states[] = { "idle", @@ -152,7 +189,6 @@ static char *states[] = { }; #endif - static void deref_cm_id(struct c4iw_ep_common *epc) { epc->cm_id->rem_ref(epc->cm_id); @@ -179,13 +215,184 @@ static void ref_qp(struct c4iw_ep *ep) set_bit(QP_REFED, &ep->com.history); c4iw_qp_add_ref(&ep->com.qp->ibqp); } +/* allocated per TCP port while listening */ +struct listen_port_info { + uint16_t port_num; /* TCP port address */ + struct list_head list; /* belongs to listen_port_list */ + struct list_head lep_list; /* per port lep list */ + uint32_t refcnt; /* number of lep's listening */ +}; + +/* + * Following two lists are used to manage INADDR_ANY listeners: + * 1)listen_port_list + * 2)lep_list + * + * Below is the INADDR_ANY listener lists overview on a system with a two port + * adapter: + * |------------------| + * |listen_port_list | + * |------------------| + * | + * | |-----------| |-----------| + * | | port_num:X| | port_num:X| + * |--------------|-list------|-------|-list------|-------.... + * | lep_list----| | lep_list----| + * | refcnt | | | refcnt | | + * | | | | | | + * | | | | | | + * |-----------| | |-----------| | + * | | + * | | + * | | + * | | lep1 lep2 + * | | |----------------| |----------------| + * | |----| listen_ep_list |----| listen_ep_list | + * | |----------------| |----------------| + * | + * | + * | lep1 lep2 + * | |----------------| |----------------| + * |---| listen_ep_list |----| listen_ep_list | + * |----------------| |----------------| + * + * Because of two port adapter, the number of lep's are two(lep1 & lep2) for + * each TCP port number. + * + * Here 'lep1' is always marked as Master lep, because solisten() is always + * called through first lep. + * + */ +static struct listen_port_info * +add_ep_to_listenlist(struct c4iw_listen_ep *lep) +{ + uint16_t port; + struct listen_port_info *port_info = NULL; + struct sockaddr_storage *laddr = &lep->com.local_addr; + + port = (laddr->ss_family == AF_INET) ? + ((struct sockaddr_in *)laddr)->sin_port : + ((struct sockaddr_in6 *)laddr)->sin6_port; + + mutex_lock(&listen_port_mutex); + + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) + goto found_port; + + port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK); + port_info->port_num = port; + port_info->refcnt = 0; + + list_add_tail(&port_info->list, &listen_port_list); + INIT_LIST_HEAD(&port_info->lep_list); + +found_port: + port_info->refcnt++; + list_add_tail(&lep->listen_ep_list, &port_info->lep_list); + mutex_unlock(&listen_port_mutex); + return port_info; +} + +static int +rem_ep_from_listenlist(struct c4iw_listen_ep *lep) +{ + uint16_t port; + struct listen_port_info *port_info = NULL; + struct sockaddr_storage *laddr = &lep->com.local_addr; + int refcnt = 0; + + port = (laddr->ss_family == AF_INET) ? + ((struct sockaddr_in *)laddr)->sin_port : + ((struct sockaddr_in6 *)laddr)->sin6_port; + + mutex_lock(&listen_port_mutex); + + /* get the port_info structure based on the lep's port address */ + list_for_each_entry(port_info, &listen_port_list, list) { + if (port_info->port_num == port) { + port_info->refcnt--; + refcnt = port_info->refcnt; + /* remove the current lep from the listen list */ + list_del(&lep->listen_ep_list); + if (port_info->refcnt == 0) { + /* Remove this entry from the list as there + * are no more listeners for this port_num. + */ + list_del(&port_info->list); + kfree(port_info); + } + break; + } + } + mutex_unlock(&listen_port_mutex); + return refcnt; +} + +/* + * Find the lep that belongs to the ifnet on which the SYN frame was received. + */ +struct c4iw_listen_ep * +find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so) +{ + struct adapter *adap = NULL; + struct c4iw_listen_ep *lep = NULL; + struct sockaddr_storage remote = { 0 }; + struct ifnet *new_conn_ifp = NULL; + struct listen_port_info *port_info = NULL; + int err = 0, i = 0, + found_portinfo = 0, found_lep = 0; + uint16_t port; + + /* STEP 1: get 'ifnet' based on socket's remote address */ + GET_REMOTE_ADDR(&remote, so); + + err = get_ifnet_from_raddr(&remote, &new_conn_ifp); + if (err) { + CTR4(KTR_IW_CXGBE, "%s: Failed to get ifnet, sock %p, " + "master_lep %p err %d", + __func__, so, master_lep, err); + return (NULL); + } + + /* STEP 2: Find 'port_info' with listener local port address. */ + port = (master_lep->com.local_addr.ss_family == AF_INET) ? + ((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port : + ((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port; + + + mutex_lock(&listen_port_mutex); + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) { + found_portinfo =1; + break; + } + if (!found_portinfo) + goto out; + + /* STEP 3: Traverse through list of lep's that are bound to the current + * TCP port address and find the lep that belongs to the ifnet on which + * the SYN frame was received. + */ + list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) { + adap = lep->com.dev->rdev.adap; + for_each_port(adap, i) { + if (new_conn_ifp == adap->port[i]->vi[0].ifp) { + found_lep =1; + goto out; + } + } + } +out: + mutex_unlock(&listen_port_mutex); + return found_lep ? lep : (NULL); +} static void process_timeout(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int abort = 1; - mutex_lock(&ep->com.mutex); CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__, ep, ep->hwtid, ep->com.state); set_bit(TIMEDOUT, &ep->com.history); @@ -221,7 +428,6 @@ static void process_timeout(struct c4iw_ep *ep) , __func__, ep, ep->hwtid, ep->com.state); abort = 0; } - mutex_unlock(&ep->com.mutex); if (abort) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); c4iw_put_ep(&ep->com); @@ -273,14 +479,16 @@ process_req(struct work_struct *ctx) ep_events = epc->ep_events; epc->ep_events = 0; spin_unlock_irqrestore(&req_lock, flag); - CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, events 0x%x", __func__, - epc->so, epc, ep_events); + mutex_lock(&epc->mutex); + CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x", + __func__, epc->so, epc, states[epc->state], ep_events); if (ep_events & C4IW_EVENT_TERM) process_terminate((struct c4iw_ep *)epc); if (ep_events & C4IW_EVENT_TIMEOUT) process_timeout((struct c4iw_ep *)epc); if (ep_events & C4IW_EVENT_SOCKET) process_socket_event((struct c4iw_ep *)epc); + mutex_unlock(&epc->mutex); c4iw_put_ep(epc); process_err_cqes(); spin_lock_irqsave(&req_lock, flag); @@ -321,55 +529,67 @@ done: return (rc); } - static int -find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, struct nhop4_extended *pnh4) +get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp) { - struct in_addr addr; - int err; + int err = 0; - CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip, - peer_ip, ntohs(local_port), ntohs(peer_port)); + if (raddr->ss_family == AF_INET) { + struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr; + struct nhop4_extended nh4 = {0}; - addr.s_addr = peer_ip; - err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4); + err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr, + NHR_REF, 0, &nh4); + *ifp = nh4.nh_ifp; + if (err) + fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); + } else { + struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr; + struct nhop6_extended nh6 = {0}; + struct in6_addr addr6; + uint32_t scopeid; - CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err); + memset(&addr6, 0, sizeof(addr6)); + in6_splitscope((struct in6_addr *)&raddr6->sin6_addr, + &addr6, &scopeid); + err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid, + NHR_REF, 0, &nh6); + *ifp = nh6.nh_ifp; + if (err) + fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6); + } + + CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err); return err; } static void close_socket(struct socket *so) { - uninit_iwarp_socket(so); - sodisconnect(so); + soclose(so); } static void process_peer_close(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int disconnect = 1; int release = 0; CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); - mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: - CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING", + CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD", __func__, ep); - __state_set(&ep->com, CLOSING); - break; - + /* Fallthrough */ case MPA_REQ_SENT: - CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING", + CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD", __func__, ep); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; connect_reply_upcall(ep, -ECONNABORTED); disconnect = 0; @@ -388,21 +608,20 @@ process_peer_close(struct c4iw_ep *ep) */ CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING", __func__, ep); - __state_set(&ep->com, CLOSING); - c4iw_get_ep(&ep->com); + ep->com.state = CLOSING; break; case MPA_REP_SENT: CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING", __func__, ep); - __state_set(&ep->com, CLOSING); + ep->com.state = CLOSING; break; case FPDU_MODE: CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING", __func__, ep); START_EP_TIMER(ep); - __state_set(&ep->com, CLOSING); + ep->com.state = CLOSING; attrs.next_state = C4IW_QP_STATE_CLOSING; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); @@ -418,7 +637,7 @@ process_peer_close(struct c4iw_ep *ep) case CLOSING: CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND", __func__, ep); - __state_set(&ep->com, MORIBUND); + ep->com.state = MORIBUND; disconnect = 0; break; @@ -433,7 +652,7 @@ process_peer_close(struct c4iw_ep *ep) } close_socket(ep->com.so); close_complete_upcall(ep, 0); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; release = 1; disconnect = 0; break; @@ -450,7 +669,6 @@ process_peer_close(struct c4iw_ep *ep) break; } - mutex_unlock(&ep->com.mutex); if (disconnect) { @@ -469,11 +687,10 @@ process_peer_close(struct c4iw_ep *ep) static void process_conn_error(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int ret; int state; - mutex_lock(&ep->com.mutex); state = ep->com.state; CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s", __func__, ep, ep->com.so, ep->com.so->so_error, @@ -483,6 +700,7 @@ process_conn_error(struct c4iw_ep *ep) case MPA_REQ_WAIT: STOP_EP_TIMER(ep); + c4iw_put_ep(&ep->parent_ep->com); break; case MPA_REQ_SENT: @@ -496,13 +714,6 @@ process_conn_error(struct c4iw_ep *ep) break; case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. - */ - c4iw_get_ep(&ep->com); break; case MORIBUND: @@ -531,7 +742,6 @@ process_conn_error(struct c4iw_ep *ep) case DEAD: CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!", __func__, ep->com.so->so_error); - mutex_unlock(&ep->com.mutex); return; default: @@ -541,10 +751,9 @@ process_conn_error(struct c4iw_ep *ep) if (state != ABORTING) { close_socket(ep->com.so); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; c4iw_put_ep(&ep->com); } - mutex_unlock(&ep->com.mutex); CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep); return; } @@ -552,14 +761,13 @@ process_conn_error(struct c4iw_ep *ep) static void process_close_complete(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int release = 0; CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); /* The cm_id may be null if we failed to connect */ - mutex_lock(&ep->com.mutex); set_bit(CLOSE_CON_RPL, &ep->com.history); switch (ep->com.state) { @@ -567,7 +775,7 @@ process_close_complete(struct c4iw_ep *ep) case CLOSING: CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND", __func__, ep); - __state_set(&ep->com, MORIBUND); + ep->com.state = MORIBUND; break; case MORIBUND: @@ -588,7 +796,7 @@ process_close_complete(struct c4iw_ep *ep) close_socket(ep->com.so); close_complete_upcall(ep, 0); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; release = 1; break; @@ -605,12 +813,11 @@ process_close_complete(struct c4iw_ep *ep) panic("%s:pcc6 %p unknown ep state", __func__, ep); break; } - mutex_unlock(&ep->com.mutex); if (release) { CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep); - c4iw_put_ep(&ep->com); + release_ep_resources(ep); } CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep); return; @@ -639,49 +846,56 @@ setiwsockopt(struct socket *so) static void init_iwarp_socket(struct socket *so, void *arg) { - - SOCKBUF_LOCK(&so->so_rcv); - soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); - so->so_state |= SS_NBIO; - SOCKBUF_UNLOCK(&so->so_rcv); + if (SOLISTENING(so)) { + SOLISTEN_LOCK(so); + solisten_upcall_set(so, c4iw_so_upcall, arg); + so->so_state |= SS_NBIO; + SOLISTEN_UNLOCK(so); + } else { + SOCKBUF_LOCK(&so->so_rcv); + soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); + so->so_state |= SS_NBIO; + SOCKBUF_UNLOCK(&so->so_rcv); + } } static void uninit_iwarp_socket(struct socket *so) { - - SOCKBUF_LOCK(&so->so_rcv); - soupcall_clear(so, SO_RCV); - SOCKBUF_UNLOCK(&so->so_rcv); + if (SOLISTENING(so)) { + SOLISTEN_LOCK(so); + solisten_upcall_set(so, NULL, NULL); + SOLISTEN_UNLOCK(so); + } else { + SOCKBUF_LOCK(&so->so_rcv); + soupcall_clear(so, SO_RCV); + SOCKBUF_UNLOCK(&so->so_rcv); + } } static void process_data(struct c4iw_ep *ep) { - struct sockaddr_in *local, *remote; int disconnect = 0; CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__, ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv)); - switch (state_read(&ep->com)) { + switch (ep->com.state) { case MPA_REQ_SENT: disconnect = process_mpa_reply(ep); break; case MPA_REQ_WAIT: - in_getsockaddr(ep->com.so, (struct sockaddr **)&local); - in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); - ep->com.local_addr = *local; - ep->com.remote_addr = *remote; - free(local, M_SONAME); - free(remote, M_SONAME); disconnect = process_mpa_request(ep); + if (disconnect) + /* Refered in process_newconn() */ + c4iw_put_ep(&ep->parent_ep->com); break; default: if (sbused(&ep->com.so->so_rcv)) log(LOG_ERR, "%s: Unexpected streaming data. ep %p, " "state %d, so %p, so_state 0x%x, sbused %u\n", - __func__, ep, state_read(&ep->com), ep->com.so, + __func__, ep, ep->com.state, ep->com.so, ep->com.so->so_state, sbused(&ep->com.so->so_rcv)); break; } @@ -705,58 +919,122 @@ process_connected(struct c4iw_ep *ep) return; err: close_socket(so); - state_set(&ep->com, DEAD); + ep->com.state = DEAD; c4iw_put_ep(&ep->com); return; } -void -process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so) +static inline int c4iw_zero_addr(struct sockaddr *addr) { - struct c4iw_ep *child_ep; - struct sockaddr_in *local; - struct sockaddr_in *remote; - struct c4iw_ep *parent_ep = parent_cm_id->provider_data; + struct in6_addr *ip6; + + if (addr->sa_family == AF_INET) + return IN_ZERONET( + ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr)); + else { + ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; + return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | + ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0; + } +} + +static inline int c4iw_loopback_addr(struct sockaddr *addr) +{ + if (addr->sa_family == AF_INET) + return IN_LOOPBACK( + ntohl(((struct sockaddr_in *) addr)->sin_addr.s_addr)); + else + return IN6_IS_ADDR_LOOPBACK( + &((struct sockaddr_in6 *) addr)->sin6_addr); +} + +static inline int c4iw_any_addr(struct sockaddr *addr) +{ + return c4iw_zero_addr(addr) || c4iw_loopback_addr(addr); +} + +static void +process_newconn(struct c4iw_listen_ep *master_lep, struct socket *new_so) +{ + struct c4iw_listen_ep *real_lep = NULL; + struct c4iw_ep *new_ep = NULL; + struct sockaddr_in *remote = NULL; int ret = 0; - MPASS(child_so != NULL); + MPASS(new_so != NULL); - child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); + if (c4iw_any_addr((struct sockaddr *)&master_lep->com.local_addr)) { + /* Here we need to find the 'real_lep' that belongs to the + * incomming socket's network interface, such that the newly + * created 'ep' can be attached to the real 'lep'. + */ + real_lep = find_real_listen_ep(master_lep, new_so); + if (real_lep == NULL) { + CTR2(KTR_IW_CXGBE, "%s: Could not find the real listen " + "ep for sock: %p", __func__, new_so); + log(LOG_ERR,"%s: Could not find the real listen ep for " + "sock: %p\n", __func__, new_so); + /* FIXME: properly free the 'new_so' in failure case. + * Use of soabort() and soclose() are not legal + * here(before soaccept()). + */ + return; + } + } else /* for Non-Wildcard address, master_lep is always the real_lep */ + real_lep = master_lep; - CTR5(KTR_IW_CXGBE, - "%s: parent so %p, parent ep %p, child so %p, child ep %p", - __func__, parent_ep->com.so, parent_ep, child_so, child_ep); + new_ep = alloc_ep(sizeof(*new_ep), GFP_KERNEL); - in_getsockaddr(child_so, (struct sockaddr **)&local); - in_getpeeraddr(child_so, (struct sockaddr **)&remote); + CTR6(KTR_IW_CXGBE, "%s: master_lep %p, real_lep: %p, new ep %p, " + "listening so %p, new so %p", __func__, master_lep, real_lep, + new_ep, master_lep->com.so, new_so); - child_ep->com.local_addr = *local; - child_ep->com.remote_addr = *remote; - child_ep->com.dev = parent_ep->com.dev; - child_ep->com.so = child_so; - child_ep->com.cm_id = NULL; - child_ep->com.thread = parent_ep->com.thread; - child_ep->parent_ep = parent_ep; + new_ep->com.dev = real_lep->com.dev; + new_ep->com.so = new_so; + new_ep->com.cm_id = NULL; + new_ep->com.thread = real_lep->com.thread; + new_ep->parent_ep = real_lep; - free(local, M_SONAME); + GET_LOCAL_ADDR(&new_ep->com.local_addr, new_so); + GET_REMOTE_ADDR(&new_ep->com.remote_addr, new_so); + c4iw_get_ep(&real_lep->com); + init_timer(&new_ep->timer); + new_ep->com.state = MPA_REQ_WAIT; + START_EP_TIMER(new_ep); + + setiwsockopt(new_so); + ret = soaccept(new_so, (struct sockaddr **)&remote); + if (ret != 0) { + CTR4(KTR_IW_CXGBE, + "%s:listen sock:%p, new sock:%p, ret:%d\n", + __func__, master_lep->com.so, new_so, ret); + if (remote != NULL) + free(remote, M_SONAME); + uninit_iwarp_socket(new_so); + soclose(new_so); + c4iw_put_ep(&new_ep->com); + c4iw_put_ep(&real_lep->com); + return; + } free(remote, M_SONAME); - setiwsockopt(child_so); - init_iwarp_socket(child_so, &child_ep->com); - c4iw_get_ep(&parent_ep->com); - init_timer(&child_ep->timer); - state_set(&child_ep->com, MPA_REQ_WAIT); - START_EP_TIMER(child_ep); + /* MPA request might have been queued up on the socket already, so we + * initialize the socket/upcall_handler under lock to prevent processing + * MPA request on another thread(via process_req()) simultaniously. + */ + c4iw_get_ep(&new_ep->com); /* Dereferenced at the end below, this is to + avoid freeing of ep before ep unlock. */ + mutex_lock(&new_ep->com.mutex); + init_iwarp_socket(new_so, &new_ep->com); - /* maybe the request has already been queued up on the socket... */ - ret = process_mpa_request(child_ep); - if (ret == 2) + ret = process_mpa_request(new_ep); + if (ret) { /* ABORT */ - c4iw_ep_disconnect(child_ep, 1, GFP_KERNEL); - else if (ret == 1) - /* CLOSE */ - c4iw_ep_disconnect(child_ep, 0, GFP_KERNEL); - + c4iw_ep_disconnect(new_ep, 1, GFP_KERNEL); + c4iw_put_ep(&real_lep->com); + } + mutex_unlock(&new_ep->com.mutex); + c4iw_put_ep(&new_ep->com); return; } @@ -790,6 +1068,12 @@ c4iw_so_upcall(struct socket *so, void *arg, int waitflag) ep->com.entry.tqe_prev); MPASS(ep->com.so == so); + /* + * Wake up any threads waiting in rdma_init()/rdma_fini(), + * with locks held. + */ + if (so->so_error) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); add_ep_to_req_list(ep, C4IW_EVENT_SOCKET); return (SU_OK); @@ -820,9 +1104,15 @@ terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) static void process_socket_event(struct c4iw_ep *ep) { - int state = state_read(&ep->com); + int state = ep->com.state; struct socket *so = ep->com.so; + if (ep->com.state == DEAD) { + CTR3(KTR_IW_CXGBE, "%s: Pending socket event discarded " + "ep %p ep_state %s", __func__, ep, states[state]); + return; + } + CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, " "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state, so->so_error, so->so_rcv.sb_state, ep, states[state]); @@ -833,10 +1123,29 @@ process_socket_event(struct c4iw_ep *ep) } if (state == LISTEN) { - /* socket listening events are handled at IWCM */ - CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__, - ep->com.state, ep); - BUG(); + struct c4iw_listen_ep *lep = (struct c4iw_listen_ep *)ep; + struct socket *listen_so = so, *new_so = NULL; + int error = 0; + + SOLISTEN_LOCK(listen_so); + do { + error = solisten_dequeue(listen_so, &new_so, + SOCK_NONBLOCK); + if (error) { + CTR4(KTR_IW_CXGBE, "%s: lep %p listen_so %p " + "error %d", __func__, lep, listen_so, + error); + return; + } + process_newconn(lep, new_so); + + /* solisten_dequeue() unlocks while return, so aquire + * lock again for sol_qlen and also for next iteration. + */ + SOLISTEN_LOCK(listen_so); + } while (listen_so->sol_qlen); + SOLISTEN_UNLOCK(listen_so); + return; } @@ -955,34 +1264,6 @@ stop_ep_timer(struct c4iw_ep *ep) return 1; } -static enum -c4iw_ep_state state_read(struct c4iw_ep_common *epc) -{ - enum c4iw_ep_state state; - - mutex_lock(&epc->mutex); - state = epc->state; - mutex_unlock(&epc->mutex); - - return (state); -} - -static void -__state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) -{ - - epc->state = new; -} - -static void -state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) -{ - - mutex_lock(&epc->mutex); - __state_set(epc, new); - mutex_unlock(&epc->mutex); -} - static void * alloc_ep(int size, gfp_t gfp) { @@ -1061,8 +1342,8 @@ send_mpa_req(struct c4iw_ep *ep) } if (mpa_rev_to_use == 2) { - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); @@ -1114,7 +1395,7 @@ send_mpa_req(struct c4iw_ep *ep) } START_EP_TIMER(ep); - state_set(&ep->com, MPA_REQ_SENT); + ep->com.state = MPA_REQ_SENT; ep->mpa_attr.initiator = 1; CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err); return 0; @@ -1157,8 +1438,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons(((u16)ep->ird) | (peer2peer ? MPA_V2_PEER2PEER_MODEL : 0)); @@ -1173,7 +1454,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) if (ep->plen) memcpy(mpa->private_data + - sizeof(struct mpa_v2_conn_params), pdata, plen); + sizeof(struct mpa_v2_conn_params), pdata, plen); CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep, mpa_v2_params.ird, mpa_v2_params.ord, ep->plen); } else @@ -1277,7 +1558,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) free(mpa, M_CXGBE); - state_set(&ep->com, MPA_REP_SENT); + ep->com.state = MPA_REP_SENT; ep->snd_seq += mpalen; err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); @@ -1334,17 +1615,17 @@ send_abort(struct c4iw_ep *ep) } uninit_iwarp_socket(so); - sodisconnect(so); + soclose(so); set_bit(ABORT_CONN, &ep->com.history); /* * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT * request it has sent. But the current TOE driver is not propagating * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work- - * around de-refer 'ep' (which was refered before sending ABORT request) - * here instead of doing it in abort_rpl() handler of iw_cxgbe driver. + * around de-refererece 'ep' here instead of doing it in abort_rpl() + * handler(not yet implemented) of iw_cxgbe driver. */ - c4iw_put_ep(&ep->com); + release_ep_resources(ep); return (0); } @@ -1403,6 +1684,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep); /* this means MPA_v2 is used */ + event.ord = ep->ird; + event.ird = ep->ord; event.private_data_len = ep->plen - sizeof(struct mpa_v2_conn_params); event.private_data = ep->mpa_pkt + @@ -1412,6 +1695,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep); /* this means MPA_v1 is used */ + event.ord = c4iw_max_read_depth; + event.ird = c4iw_max_read_depth; event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); @@ -1453,7 +1738,6 @@ static int connect_request_upcall(struct c4iw_ep *ep) event.local_addr = ep->com.local_addr; event.remote_addr = ep->com.remote_addr; event.provider_data = ep; - event.so = ep->com.so; if (!ep->tried_with_mpa_v1) { /* this means MPA_v2 is used */ @@ -1475,11 +1759,18 @@ static int connect_request_upcall(struct c4iw_ep *ep) c4iw_get_ep(&ep->com); ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, &event); - if(ret) + if(ret) { + CTR3(KTR_IW_CXGBE, "%s: ep %p, Failure while notifying event to" + " IWCM, err:%d", __func__, ep, ret); c4iw_put_ep(&ep->com); + } else + /* Dereference parent_ep only in success case. + * In case of failure, parent_ep is dereferenced by the caller + * of process_mpa_request(). + */ + c4iw_put_ep(&ep->parent_ep->com); set_bit(CONNREQ_UPCALL, &ep->com.history); - c4iw_put_ep(&ep->parent_ep->com); return ret; } @@ -1490,8 +1781,8 @@ static void established_upcall(struct c4iw_ep *ep) CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; - event.ird = ep->ird; - event.ord = ep->ord; + event.ird = ep->ord; + event.ord = ep->ird; if (ep->com.cm_id) { @@ -1503,6 +1794,8 @@ static void established_upcall(struct c4iw_ep *ep) } +#define RELAXED_IRD_NEGOTIATION 1 + /* * process_mpa_reply - process streaming mode MPA reply * @@ -1522,7 +1815,7 @@ static int process_mpa_reply(struct c4iw_ep *ep) u16 plen; u16 resp_ird, resp_ord; u8 rtr_mismatch = 0, insuff_ird = 0; - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; enum c4iw_qp_attr_mask mask; int err; struct mbuf *top, *m; @@ -1666,7 +1959,7 @@ static int process_mpa_reply(struct c4iw_ep *ep) * start reply message including private data. And * the MPA header is valid. */ - state_set(&ep->com, FPDU_MODE); + ep->com.state = FPDU_MODE; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; @@ -1694,13 +1987,25 @@ static int process_mpa_reply(struct c4iw_ep *ep) * not required since ird/ord stuff has been taken * care of in c4iw_accept_cr */ - if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) { - - CTR2(KTR_IW_CXGBE, "%s:pmre %p", __func__, ep); + if (ep->ird < resp_ord) { + if (RELAXED_IRD_NEGOTIATION && resp_ord <= + ep->com.dev->rdev.adap->params.max_ordird_qp) + ep->ird = resp_ord; + else + insuff_ird = 1; + } else if (ep->ird > resp_ord) { + ep->ird = resp_ord; + } + if (ep->ord > resp_ird) { + if (RELAXED_IRD_NEGOTIATION) + ep->ord = resp_ird; + else + insuff_ird = 1; + } + if (insuff_ird) { err = -ENOMEM; ep->ird = resp_ord; ep->ord = resp_ird; - insuff_ird = 1; } if (ntohs(mpa_v2_params->ird) & @@ -1799,8 +2104,9 @@ static int process_mpa_reply(struct c4iw_ep *ep) attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_NOMATCH_RTR; attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; disconnect = 1; goto out; @@ -1820,8 +2126,9 @@ static int process_mpa_reply(struct c4iw_ep *ep) attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_INSUFF_IRD; attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; disconnect = 1; goto out; @@ -1853,12 +2160,13 @@ static int process_mpa_request(struct c4iw_ep *ep) { struct mpa_message *mpa; + struct mpa_v2_conn_params *mpa_v2_params; u16 plen; int flags = MSG_DONTWAIT; int rc; struct iovec iov; struct uio uio; - enum c4iw_ep_state state = state_read(&ep->com); + enum c4iw_ep_state state = ep->com.state; CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]); @@ -1947,24 +2255,29 @@ process_mpa_request(struct c4iw_ep *ep) ep->mpa_attr.enhanced_rdma_conn = mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; if (ep->mpa_attr.enhanced_rdma_conn) { - struct mpa_v2_conn_params *mpa_v2_params; - u16 ird, ord; - - mpa_v2_params = (void *)&ep->mpa_pkt[sizeof(*mpa)]; - ird = ntohs(mpa_v2_params->ird); - ord = ntohs(mpa_v2_params->ord); - - ep->ird = ird & MPA_V2_IRD_ORD_MASK; - ep->ord = ord & MPA_V2_IRD_ORD_MASK; - if (ird & MPA_V2_PEER2PEER_MODEL && peer2peer) { - if (ord & MPA_V2_RDMA_WRITE_RTR) { - ep->mpa_attr.p2p_type = - FW_RI_INIT_P2PTYPE_RDMA_WRITE; - } else if (ord & MPA_V2_RDMA_READ_RTR) { - ep->mpa_attr.p2p_type = - FW_RI_INIT_P2PTYPE_READ_REQ; + mpa_v2_params = (struct mpa_v2_conn_params *) + (ep->mpa_pkt + sizeof(*mpa)); + ep->ird = ntohs(mpa_v2_params->ird) & + MPA_V2_IRD_ORD_MASK; + ep->ird = min_t(u32, ep->ird, + cur_max_read_depth(ep->com.dev)); + ep->ord = ntohs(mpa_v2_params->ord) & + MPA_V2_IRD_ORD_MASK; + ep->ord = min_t(u32, ep->ord, + cur_max_read_depth(ep->com.dev)); + CTR3(KTR_IW_CXGBE, "%s initiator ird %u ord %u\n", + __func__, ep->ird, ep->ord); + if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) + if (peer2peer) { + if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_WRITE_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_RDMA_WRITE; + else if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_READ_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_READ_REQ; } - } } } else if (mpa->revision == 1 && peer2peer) ep->mpa_attr.p2p_type = p2p_type; @@ -1977,22 +2290,15 @@ process_mpa_request(struct c4iw_ep *ep) ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - state_set(&ep->com, MPA_REQ_RCVD); + ep->com.state = MPA_REQ_RCVD; STOP_EP_TIMER(ep); /* drive upcall */ - mutex_lock(&ep->parent_ep->com.mutex); - if (ep->parent_ep->com.state != DEAD) { + if (ep->parent_ep->com.state != DEAD) if (connect_request_upcall(ep)) - goto err_unlock_parent; - } else - goto err_unlock_parent; - mutex_unlock(&ep->parent_ep->com.mutex); + goto err_out; return 0; -err_unlock_parent: - mutex_unlock(&ep->parent_ep->com.mutex); - goto err_out; err_stop_timer: STOP_EP_TIMER(ep); err_out: @@ -2008,13 +2314,16 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { int err; struct c4iw_ep *ep = to_ep(cm_id); - CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep); int abort = 0; - if ((state_read(&ep->com) == DEAD) || - (state_read(&ep->com) != MPA_REQ_RCVD)) { + mutex_lock(&ep->com.mutex); + CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep); + + if ((ep->com.state == DEAD) || + (ep->com.state != MPA_REQ_RCVD)) { CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return -ECONNRESET; } @@ -2030,8 +2339,9 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep); abort = send_mpa_reject(ep, pdata, pdata_len); } - stop_ep_timer(ep); + STOP_EP_TIMER(ep); err = c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); CTR3(KTR_IW_CXGBE, "%s:crc4 %p, err: %d", __func__, ep, err); return 0; @@ -2040,23 +2350,24 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err; - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; enum c4iw_qp_attr_mask mask; struct c4iw_ep *ep = to_ep(cm_id); struct c4iw_dev *h = to_c4iw_dev(cm_id->device); struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); int abort = 0; + mutex_lock(&ep->com.mutex); CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep); - if (state_read(&ep->com) == DEAD) { + if ((ep->com.state == DEAD) || + (ep->com.state != MPA_REQ_RCVD)) { CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep); err = -ECONNRESET; goto err_out; } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); BUG_ON(!qp); set_bit(ULP_ACCEPT, &ep->com.history); @@ -2074,47 +2385,42 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep); if (conn_param->ord > ep->ird) { - - CTR2(KTR_IW_CXGBE, "%s:cac4 %p", __func__, ep); - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - send_mpa_reject(ep, conn_param->private_data, - conn_param->private_data_len); - err = -ENOMEM; - goto err_abort; - } - - if (conn_param->ird > ep->ord) { - - CTR2(KTR_IW_CXGBE, "%s:cac5 %p", __func__, ep); - - if (!ep->ord) { - - CTR2(KTR_IW_CXGBE, "%s:cac6 %p", __func__, ep); - conn_param->ird = 1; - } - else { - CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep); + if (RELAXED_IRD_NEGOTIATION) { + conn_param->ord = ep->ird; + } else { + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + send_mpa_reject(ep, conn_param->private_data, + conn_param->private_data_len); + err = -ENOMEM; + goto err_abort; + } + } + if (conn_param->ird < ep->ord) { + if (RELAXED_IRD_NEGOTIATION && + ep->ord <= h->rdev.adap->params.max_ordird_qp) { + conn_param->ird = ep->ord; + } else { err = -ENOMEM; goto err_abort; } } - } ep->ird = conn_param->ird; ep->ord = conn_param->ord; - if (ep->mpa_attr.version != 2) { - - CTR2(KTR_IW_CXGBE, "%s:cac8 %p", __func__, ep); - - if (peer2peer && ep->ird == 0) { - - CTR2(KTR_IW_CXGBE, "%s:cac9 %p", __func__, ep); + if (ep->mpa_attr.version == 1) { + if (peer2peer && ep->ird == 0) + ep->ird = 1; + } else { + if (peer2peer && + (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) && + (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0) ep->ird = 1; - } } + CTR4(KTR_IW_CXGBE, "%s %d ird %d ord %d\n", __func__, __LINE__, + ep->ird, ep->ord); ep->com.cm_id = cm_id; ref_cm_id(&ep->com); @@ -2137,23 +2443,21 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) C4IW_QP_ATTR_MAX_ORD; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); - if (err) { - - CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); + CTR3(KTR_IW_CXGBE, "%s:caca %p, err: %d", __func__, ep, err); goto err_defef_cm_id; } + err = send_mpa_reply(ep, conn_param->private_data, conn_param->private_data_len); - if (err) { - - CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); + CTR3(KTR_IW_CXGBE, "%s:cacb %p, err: %d", __func__, ep, err); goto err_defef_cm_id; } - state_set(&ep->com, FPDU_MODE); + ep->com.state = FPDU_MODE; established_upcall(ep); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep); return 0; @@ -2164,22 +2468,59 @@ err_abort: err_out: if (abort) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep); return err; } +static int +c4iw_sock_create(struct sockaddr_storage *laddr, struct socket **so) +{ + int ret; + int size; + struct socket *sock = NULL; + ret = sock_create_kern(laddr->ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); + if (ret) { + CTR2(KTR_IW_CXGBE, "%s:Failed to create TCP socket. err %d", + __func__, ret); + return ret; + } + + ret = sobind(sock, (struct sockaddr *)laddr, curthread); + if (ret) { + CTR2(KTR_IW_CXGBE, "%s:Failed to bind socket. err %p", + __func__, ret); + sock_release(sock); + return ret; + } + + size = laddr->ss_family == AF_INET6 ? + sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in); + ret = sock_getname(sock, (struct sockaddr *)laddr, &size, 0); + if (ret) { + CTR2(KTR_IW_CXGBE, "%s:sock_getname failed. err %p", + __func__, ret); + sock_release(sock); + return ret; + } + + *so = sock; + return 0; +} int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err = 0; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_ep *ep = NULL; - struct nhop4_extended nh4; + struct ifnet *nh_ifp; /* Logical egress interface */ CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id); + if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { @@ -2188,6 +2529,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto out; } ep = alloc_ep(sizeof(*ep), GFP_KERNEL); + init_timer(&ep->timer); ep->plen = conn_param->private_data_len; @@ -2215,97 +2557,161 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep); err = -EINVAL; - goto fail2; + goto fail; } ref_qp(ep); ep->com.thread = curthread; - ep->com.so = cm_id->so; - - /* find a route */ - err = find_route( - cm_id->local_addr.sin_addr.s_addr, - cm_id->remote_addr.sin_addr.s_addr, - cm_id->local_addr.sin_port, - cm_id->remote_addr.sin_port, 0, &nh4); + err = get_ifnet_from_raddr(&cm_id->remote_addr, &nh_ifp); if (err) { CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep); printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); - err = -EHOSTUNREACH; - goto fail2; + err = EHOSTUNREACH; + return err; } - if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE) || - TOEDEV(nh4.nh_ifp) == NULL) { + if (!(nh_ifp->if_capenable & IFCAP_TOE) || + TOEDEV(nh_ifp) == NULL) { err = -ENOPROTOOPT; - goto fail3; + goto fail; } - fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); - - setiwsockopt(cm_id->so); - state_set(&ep->com, CONNECTING); + ep->com.state = CONNECTING; ep->tos = 0; ep->com.local_addr = cm_id->local_addr; ep->com.remote_addr = cm_id->remote_addr; + + err = c4iw_sock_create(&cm_id->local_addr, &ep->com.so); + if (err) + goto fail; + + setiwsockopt(ep->com.so); err = -soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, ep->com.thread); - if (!err) { - init_iwarp_socket(cm_id->so, &ep->com); + init_iwarp_socket(ep->com.so, &ep->com); goto out; - } else { - goto fail2; - } + } else + goto fail_free_so; -fail3: - fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); -fail2: +fail_free_so: + sock_release(ep->com.so); +fail: deref_cm_id(&ep->com); c4iw_put_ep(&ep->com); - ep = NULL; /* CTR shouldn't display already-freed ep. */ + ep = NULL; out: - CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep); + CTR2(KTR_IW_CXGBE, "%s:ccE ret:%d", __func__, err); return err; } /* - * iwcm->create_listen_ep. Returns -errno on failure. + * iwcm->create_listen. Returns -errno on failure. */ int -c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog) +c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) { struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); - struct c4iw_listen_ep *ep; - struct socket *so = cm_id->so; + struct c4iw_listen_ep *lep = NULL; + struct listen_port_info *port_info = NULL; + int rc = 0; - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - ep->com.cm_id = cm_id; - ref_cm_id(&ep->com); - ep->com.dev = dev; - ep->backlog = backlog; - ep->com.local_addr = cm_id->local_addr; - ep->com.thread = curthread; - state_set(&ep->com, LISTEN); - ep->com.so = so; + CTR3(KTR_IW_CXGBE, "%s: cm_id %p, backlog %s", __func__, cm_id, + backlog); + lep = alloc_ep(sizeof(*lep), GFP_KERNEL); + lep->com.cm_id = cm_id; + ref_cm_id(&lep->com); + lep->com.dev = dev; + lep->backlog = backlog; + lep->com.local_addr = cm_id->local_addr; + lep->com.thread = curthread; + cm_id->provider_data = lep; + lep->com.state = LISTEN; - cm_id->provider_data = ep; - return (0); + /* In case of INDADDR_ANY, ibcore creates cmid for each device and + * invokes iw_cxgbe listener callbacks assuming that iw_cxgbe creates + * HW listeners for each device seperately. But toecore expects single + * solisten() call with INADDR_ANY address to create HW listeners on + * all devices for a given port number. So iw_cxgbe driver calls + * solisten() only once for INADDR_ANY(usually done at first time + * listener callback from ibcore). And all the subsequent INADDR_ANY + * listener callbacks from ibcore(for the same port address) do not + * invoke solisten() as first listener callback has already created + * listeners for all other devices(via solisten). + */ + if (c4iw_any_addr((struct sockaddr *)&lep->com.local_addr)) { + port_info = add_ep_to_listenlist(lep); + /* skip solisten() if refcnt > 1, as the listeners were + * alredy created by 'Master lep' + */ + if (port_info->refcnt > 1) { + /* As there will be only one listener socket for a TCP + * port, copy Master lep's socket pointer to other lep's + * that are belonging to same TCP port. + */ + struct c4iw_listen_ep *head_lep = + container_of(port_info->lep_list.next, + struct c4iw_listen_ep, listen_ep_list); + lep->com.so = head_lep->com.so; + goto out; + } + } + rc = c4iw_sock_create(&cm_id->local_addr, &lep->com.so); + if (rc) { + CTR2(KTR_IW_CXGBE, "%s:Failed to create socket. err %d", + __func__, rc); + goto fail; + } + + rc = solisten(lep->com.so, backlog, curthread); + if (rc) { + CTR3(KTR_IW_CXGBE, "%s:Failed to listen on sock:%p. err %d", + __func__, lep->com.so, rc); + goto fail_free_so; + } + init_iwarp_socket(lep->com.so, &lep->com); +out: + return 0; + +fail_free_so: + sock_release(lep->com.so); +fail: + if (port_info) + rem_ep_from_listenlist(lep); + deref_cm_id(&lep->com); + c4iw_put_ep(&lep->com); + return rc; } -void -c4iw_destroy_listen_ep(struct iw_cm_id *cm_id) +int +c4iw_destroy_listen(struct iw_cm_id *cm_id) { - struct c4iw_listen_ep *ep = to_listen_ep(cm_id); + struct c4iw_listen_ep *lep = to_listen_ep(cm_id); - CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, state %s", __func__, cm_id, - cm_id->so, states[ep->com.state]); + mutex_lock(&lep->com.mutex); + CTR3(KTR_IW_CXGBE, "%s: cm_id %p, state %s", __func__, cm_id, + states[lep->com.state]); - state_set(&ep->com, DEAD); - deref_cm_id(&ep->com); - c4iw_put_ep(&ep->com); + lep->com.state = DEAD; + if (c4iw_any_addr((struct sockaddr *)&lep->com.local_addr)) { + /* if no refcount then close listen socket */ + if (!rem_ep_from_listenlist(lep)) + close_socket(lep->com.so); + } else + close_socket(lep->com.so); + deref_cm_id(&lep->com); + mutex_unlock(&lep->com.mutex); + c4iw_put_ep(&lep->com); + return 0; +} - return; +int __c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) +{ + int ret; + mutex_lock(&ep->com.mutex); + ret = c4iw_ep_disconnect(ep, abrupt, gfp); + mutex_unlock(&ep->com.mutex); + return ret; } int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) @@ -2315,7 +2721,6 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) int fatal = 0; struct c4iw_rdev *rdev; - mutex_lock(&ep->com.mutex); CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep); @@ -2326,11 +2731,17 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep); fatal = 1; close_complete_upcall(ep, -ECONNRESET); + send_abort(ep); ep->com.state = DEAD; } CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep, states[ep->com.state]); + /* + * Ref the ep here in case we have fatal errors causing the + * ep to be released and freed. + */ + c4iw_get_ep(&ep->com); switch (ep->com.state) { case MPA_REQ_WAIT: @@ -2374,7 +2785,6 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) break; } - mutex_unlock(&ep->com.mutex); if (close) { @@ -2394,7 +2804,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) set_bit(EP_DISC_CLOSE, &ep->com.history); if (!ep->parent_ep) - __state_set(&ep->com, MORIBUND); + ep->com.state = MORIBUND; sodisconnect(ep->com.so); } @@ -2407,7 +2817,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) close_complete_upcall(ep, -EIO); } if (ep->com.qp) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; attrs.next_state = C4IW_QP_STATE_ERROR; ret = c4iw_modify_qp(ep->com.dev, ep->com.qp, @@ -2422,6 +2832,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) ep->com.state = DEAD; CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep); } + c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep); return ret; } @@ -2499,7 +2910,7 @@ static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl) static int process_terminate(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep); diff --git a/sys/dev/cxgbe/iw_cxgbe/cq.c b/sys/dev/cxgbe/iw_cxgbe/cq.c index 54eb026a19d5..317b64ae9199 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cq.c +++ b/sys/dev/cxgbe/iw_cxgbe/cq.c @@ -53,6 +53,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, struct c4iw_dev_ucontext *uctx) { struct adapter *sc = rdev->adap; + struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev); struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; @@ -80,10 +81,12 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, t4_wrq_tx(sc, wr); - c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__); kfree(cq->sw_queue); - contigfree(cq->queue, cq->memsize, M_DEVBUF); + dma_free_coherent(rhp->ibdev.dma_device, + cq->memsize, cq->queue, + dma_unmap_addr(cq, mapping)); c4iw_put_cqid(rdev, cq->cqid, uctx); return 0; } @@ -93,6 +96,7 @@ create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, struct c4iw_dev_ucontext *uctx) { struct adapter *sc = rdev->adap; + struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev); struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; @@ -100,6 +104,7 @@ create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, struct c4iw_wr_wait wr_wait; int ret; struct wrqe *wr; + u64 cq_bar2_qoffset = 0; cq->cqid = c4iw_get_cqid(rdev, uctx); if (!cq->cqid) { @@ -114,17 +119,13 @@ create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, goto err2; } } - - cq->queue = contigmalloc(cq->memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul, - PAGE_SIZE, 0); - if (cq->queue) - cq->dma_addr = vtophys(cq->queue); - else { + cq->queue = dma_alloc_coherent(rhp->ibdev.dma_device, cq->memsize, + &cq->dma_addr, GFP_KERNEL); + if (!cq->queue) { ret = -ENOMEM; - goto err3; + goto err3; } - - pci_unmap_addr_set(cq, mapping, cq->dma_addr); + dma_unmap_addr_set(cq, mapping, cq->dma_addr); memset(cq->queue, 0, cq->memsize); /* build fw_ri_res_wr */ @@ -166,26 +167,30 @@ create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, t4_wrq_tx(sc, wr); CTR2(KTR_IW_CXGBE, "%s wait_event wr_wait %p", __func__, &wr_wait); - ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__); if (ret) goto err4; cq->gen = 1; - cq->gts = (void *)((unsigned long)rman_get_virtual(sc->regs_res) + - sc->sge_gts_reg); cq->rdev = rdev; - if (user) { - cq->ugts = (u64)((char*)rman_get_virtual(sc->udbs_res) + - (cq->cqid << rdev->cqshift)); - cq->ugts &= PAGE_MASK; - CTR5(KTR_IW_CXGBE, - "%s: UGTS %p cqid %x cqshift %d page_mask %x", __func__, - cq->ugts, cq->cqid, rdev->cqshift, PAGE_MASK); - } + /* Determine the BAR2 queue offset and qid. */ + t4_bar2_sge_qregs(rdev->adap, cq->cqid, T4_BAR2_QTYPE_INGRESS, user, + &cq_bar2_qoffset, &cq->bar2_qid); + + /* If user mapping then compute the page-aligned physical + * address for mapping. + */ + if (user) + cq->bar2_pa = (rdev->bar2_pa + cq_bar2_qoffset) & PAGE_MASK; + else + cq->bar2_va = (void __iomem *)((u64)rdev->bar2_kva + + cq_bar2_qoffset); + return 0; err4: - contigfree(cq->queue, cq->memsize, M_DEVBUF); + dma_free_coherent(rhp->ibdev.dma_device, cq->memsize, cq->queue, + dma_unmap_addr(cq, mapping)); err3: kfree(cq->sw_queue); err2: @@ -245,43 +250,188 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, t4_swcq_produce(cq); } -int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count) +static void advance_oldest_read(struct t4_wq *wq); + +int c4iw_flush_sq(struct c4iw_qp *qhp) { int flushed = 0; - struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count]; - int in_use = wq->sq.in_use - count; + struct t4_wq *wq = &qhp->wq; + struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); + struct t4_cq *cq = &chp->cq; + int idx; + struct t4_swsqe *swsqe; - BUG_ON(in_use < 0); - while (in_use--) { - swsqe->signaled = 0; + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + idx = wq->sq.flush_cidx; + BUG_ON(idx >= wq->sq.size); + while (idx != wq->sq.pidx) { + swsqe = &wq->sq.sw_sq[idx]; + BUG_ON(swsqe->flushed); + swsqe->flushed = 1; insert_sq_cqe(wq, cq, swsqe); - swsqe++; - if (swsqe == (wq->sq.sw_sq + wq->sq.size)) - swsqe = wq->sq.sw_sq; + if (wq->sq.oldest_read == swsqe) { + BUG_ON(swsqe->opcode != FW_RI_READ_REQ); + advance_oldest_read(wq); + } flushed++; + if (++idx == wq->sq.size) + idx = 0; } + wq->sq.flush_cidx += flushed; + if (wq->sq.flush_cidx >= wq->sq.size) + wq->sq.flush_cidx -= wq->sq.size; return flushed; } +static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) +{ + struct t4_swsqe *swsqe; + int cidx; + + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + cidx = wq->sq.flush_cidx; + BUG_ON(cidx > wq->sq.size); + + while (cidx != wq->sq.pidx) { + swsqe = &wq->sq.sw_sq[cidx]; + if (!swsqe->signaled) { + if (++cidx == wq->sq.size) + cidx = 0; + } else if (swsqe->complete) { + + BUG_ON(swsqe->flushed); + + /* + * Insert this completed cqe into the swcq. + */ + CTR3(KTR_IW_CXGBE, + "%s moving cqe into swcq sq idx %u cq idx %u\n", + __func__, cidx, cq->sw_pidx); + swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); + cq->sw_queue[cq->sw_pidx] = swsqe->cqe; + t4_swcq_produce(cq); + swsqe->flushed = 1; + if (++cidx == wq->sq.size) + cidx = 0; + wq->sq.flush_cidx = cidx; + } else + break; + } +} + +static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, + struct t4_cqe *read_cqe) +{ + read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; + read_cqe->len = htonl(wq->sq.oldest_read->read_len); + read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | + V_CQE_SWCQE(SW_CQE(hw_cqe)) | + V_CQE_OPCODE(FW_RI_READ_REQ) | + V_CQE_TYPE(1)); + read_cqe->bits_type_ts = hw_cqe->bits_type_ts; +} + +static void advance_oldest_read(struct t4_wq *wq) +{ + + u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; + + if (rptr == wq->sq.size) + rptr = 0; + while (rptr != wq->sq.pidx) { + wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; + + if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) + return; + if (++rptr == wq->sq.size) + rptr = 0; + } + wq->sq.oldest_read = NULL; +} + /* * Move all CQEs from the HWCQ into the SWCQ. + * Deal with out-of-order and/or completions that complete + * prior unsignalled WRs. */ -void c4iw_flush_hw_cq(struct t4_cq *cq) +void c4iw_flush_hw_cq(struct c4iw_cq *chp) { - struct t4_cqe *cqe = NULL, *swcqe; + struct t4_cqe *hw_cqe, *swcqe, read_cqe; + struct c4iw_qp *qhp; + struct t4_swsqe *swsqe; int ret; - CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, cq, cq->cqid); - ret = t4_next_hw_cqe(cq, &cqe); + CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, &chp->cq, + chp->cq.cqid); + ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + + /* + * This logic is similar to poll_cq(), but not quite the same + * unfortunately. Need to move pertinent HW CQEs to the SW CQ but + * also do any translation magic that poll_cq() normally does. + */ while (!ret) { - CTR3(KTR_IW_CXGBE, "%s flushing hwcq cidx 0x%x swcq pidx 0x%x", - __func__, cq->cidx, cq->sw_pidx); - swcqe = &cq->sw_queue[cq->sw_pidx]; - *swcqe = *cqe; - swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); - t4_swcq_produce(cq); - t4_hwcq_consume(cq); - ret = t4_next_hw_cqe(cq, &cqe); + qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); + + /* + * drop CQEs with no associated QP + */ + if (qhp == NULL) + goto next_cqe; + + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) + goto next_cqe; + + if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { + + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) + goto next_cqe; + + /* drop peer2peer RTR reads. + */ + if (CQE_WRID_STAG(hw_cqe) == 1) + goto next_cqe; + + /* + * Eat completions for unsignaled read WRs. + */ + if (!qhp->wq.sq.oldest_read->signaled) { + advance_oldest_read(&qhp->wq); + goto next_cqe; + } + + /* + * Don't write to the HWCQ, create a new read req CQE + * in local memory and move it into the swcq. + */ + create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); + hw_cqe = &read_cqe; + advance_oldest_read(&qhp->wq); + } + + /* if its a SQ completion, then do the magic to move all the + * unsignaled and now in-order completions into the swcq. + */ + if (SQ_TYPE(hw_cqe)) { + swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; + swsqe->cqe = *hw_cqe; + swsqe->complete = 1; + flush_completed_wrs(&qhp->wq, &chp->cq); + } else { + swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; + *swcqe = *hw_cqe; + swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); + t4_swcq_produce(&chp->cq); + } +next_cqe: + t4_hwcq_consume(&chp->cq); + ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); } } @@ -301,25 +451,6 @@ static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) return 1; } -void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count) -{ - struct t4_cqe *cqe; - u32 ptr; - - *count = 0; - ptr = cq->sw_cidx; - while (ptr != cq->sw_pidx) { - cqe = &cq->sw_queue[ptr]; - if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && - wq->sq.oldest_read)) && - (CQE_QPID(cqe) == wq->sq.qid)) - (*count)++; - if (++ptr == cq->size) - ptr = 0; - } - CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count); -} - void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) { struct t4_cqe *cqe; @@ -339,71 +470,6 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count); } -static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) -{ - struct t4_swsqe *swsqe; - u16 ptr = wq->sq.cidx; - int count = wq->sq.in_use; - int unsignaled = 0; - - swsqe = &wq->sq.sw_sq[ptr]; - while (count--) - if (!swsqe->signaled) { - if (++ptr == wq->sq.size) - ptr = 0; - swsqe = &wq->sq.sw_sq[ptr]; - unsignaled++; - } else if (swsqe->complete) { - - /* - * Insert this completed cqe into the swcq. - */ - CTR3(KTR_IW_CXGBE, - "%s moving cqe into swcq sq idx %u cq idx %u", - __func__, ptr, cq->sw_pidx); - swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); - cq->sw_queue[cq->sw_pidx] = swsqe->cqe; - t4_swcq_produce(cq); - swsqe->signaled = 0; - wq->sq.in_use -= unsignaled; - break; - } else - break; -} - -static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, - struct t4_cqe *read_cqe) -{ - read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; - read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len); - read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | - V_CQE_SWCQE(SW_CQE(hw_cqe)) | - V_CQE_OPCODE(FW_RI_READ_REQ) | - V_CQE_TYPE(1)); - read_cqe->bits_type_ts = hw_cqe->bits_type_ts; -} - -/* - * Return a ptr to the next read wr in the SWSQ or NULL. - */ -static void advance_oldest_read(struct t4_wq *wq) -{ - - u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; - - if (rptr == wq->sq.size) - rptr = 0; - while (rptr != wq->sq.pidx) { - wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; - - if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) - return; - if (++rptr == wq->sq.size) - rptr = 0; - } - wq->sq.oldest_read = NULL; -} - /* * poll_cq * @@ -449,6 +515,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, goto skip_cqe; } + /* + * skip hw cqe's if the wq is flushed. + */ + if (wq->flushed && !SW_CQE(hw_cqe)) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * skip TERMINATE cqes... + */ + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { + ret = -EAGAIN; + goto skip_cqe; + } + /* * Special cqe for drain WR completions... */ @@ -467,18 +549,37 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { - /* - * If this is an unsolicited read response, then the read + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* If this is an unsolicited read response, then the read * was generated by the kernel driver as part of peer-2-peer * connection setup. So ignore the completion. */ - if (!wq->sq.oldest_read) { + if (CQE_WRID_STAG(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) t4_set_wq_in_error(wq); ret = -EAGAIN; goto skip_cqe; } + /* + * Eat completions for unsignaled read WRs. + */ + if (!wq->sq.oldest_read->signaled) { + advance_oldest_read(wq); + ret = -EAGAIN; + goto skip_cqe; + } + /* * Don't write to the HWCQ, so create a new read req CQE * in local memory. @@ -489,14 +590,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, } if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { - *cqe_flushed = t4_wq_in_error(wq); + *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); t4_set_wq_in_error(wq); - goto proc_cqe; - } - - if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { - ret = -EAGAIN; - goto skip_cqe; } /* @@ -556,9 +651,26 @@ proc_cqe: * completion. */ if (SQ_TYPE(hw_cqe)) { - wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe); - CTR2(KTR_IW_CXGBE, "%s completing sq idx %u", - __func__, wq->sq.cidx); + int idx = CQE_WRID_SQ_IDX(hw_cqe); + BUG_ON(idx >= wq->sq.size); + + /* + * Account for any unsignaled completions completed by + * this signaled completion. In this case, cidx points + * to the first unsignaled one, and idx points to the + * signaled one. So adjust in_use based on this delta. + * if this is not completing any unsigned wrs, then the + * delta will be 0. Handle wrapping also! + */ + if (idx < wq->sq.cidx) + wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; + else + wq->sq.in_use -= idx - wq->sq.cidx; + BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); + + wq->sq.cidx = (uint16_t)idx; + CTR2(KTR_IW_CXGBE, "%s completing sq idx %u\n", + __func__, wq->sq.cidx); *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; t4_sq_consume(wq); } else { @@ -567,6 +679,7 @@ proc_cqe: *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; BUG_ON(t4_rq_empty(wq)); t4_rq_consume(wq); + goto skip_cqe; } flush_wq: @@ -645,6 +758,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; + c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); } } else { switch (CQE_OPCODE(&cqe)) { @@ -664,15 +778,16 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) case FW_RI_SEND_WITH_SE: wc->opcode = IB_WC_SEND; break; - case FW_RI_BIND_MW: - wc->opcode = IB_WC_BIND_MW; - break; - case FW_RI_LOCAL_INV: wc->opcode = IB_WC_LOCAL_INV; break; case FW_RI_FAST_REGISTER: - wc->opcode = IB_WC_FAST_REG_MR; + wc->opcode = IB_WC_REG_MR; + + /* Invalidate the MR if the fastreg failed */ + if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) + c4iw_invalidate_mr(qhp->rhp, + CQE_WRID_FR_STAG(&cqe)); break; case C4IW_DRAIN_OPCODE: wc->opcode = IB_WC_SEND; @@ -787,9 +902,11 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) } struct ib_cq * -c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, +c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_context, struct ib_udata *udata) { + int entries = attr->cqe; + int vector = attr->comp_vector; struct c4iw_dev *rhp; struct c4iw_cq *chp; struct c4iw_create_cq_resp uresp; @@ -797,9 +914,10 @@ c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, int ret; size_t memsize, hwentries; struct c4iw_mm_entry *mm, *mm2; - int entries = attr->cqe; CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries); + if (attr->flags) + return ERR_PTR(-EINVAL); rhp = to_c4iw_dev(ibdev); @@ -807,6 +925,7 @@ c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, if (!chp) return ERR_PTR(-ENOMEM); + if (ib_context) ucontext = to_c4iw_ucontext(ib_context); @@ -822,9 +941,9 @@ c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, entries = roundup(entries, 16); /* - * Make actual HW queue 2x to avoid cidx_inc overflows. + * Make actual HW queue 2x to avoid cdix_inc overflows. */ - hwentries = entries * 2; + hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size); /* * Make HW queue at least 64 entries so GTS updates aren't too @@ -838,16 +957,11 @@ c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, /* * memsize must be a multiple of the page size if its a user cq. */ - if (ucontext) { + if (ucontext) memsize = roundup(memsize, PAGE_SIZE); - hwentries = memsize / sizeof *chp->cq.queue; - while (hwentries > T4_MAX_IQ_SIZE) { - memsize -= PAGE_SIZE; - hwentries = memsize / sizeof *chp->cq.queue; - } - } chp->cq.size = hwentries; chp->cq.memsize = memsize; + chp->cq.vector = vector; ret = create_cq(&rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); @@ -866,6 +980,7 @@ c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, goto err2; if (ucontext) { + ret = -ENOMEM; mm = kmalloc(sizeof *mm, GFP_KERNEL); if (!mm) goto err3; @@ -895,7 +1010,7 @@ c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, insert_mmap(ucontext, mm); mm2->key = uresp.gts_key; - mm2->addr = chp->cq.ugts; + mm2->addr = chp->cq.bar2_pa; mm2->len = PAGE_SIZE; insert_mmap(ucontext, mm2); } @@ -926,16 +1041,16 @@ int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; - int ret; + int ret = 0; unsigned long flag; chp = to_c4iw_cq(ibcq); spin_lock_irqsave(&chp->lock, flag); - ret = t4_arm_cq(&chp->cq, - (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + t4_arm_cq(&chp->cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + if (flags & IB_CQ_REPORT_MISSED_EVENTS) + ret = t4_cq_notempty(&chp->cq); spin_unlock_irqrestore(&chp->lock, flag); - if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - ret = 0; return ret; } #endif diff --git a/sys/dev/cxgbe/iw_cxgbe/device.c b/sys/dev/cxgbe/iw_cxgbe/device.c index 3c51abc2f674..98833da61100 100644 --- a/sys/dev/cxgbe/iw_cxgbe/device.c +++ b/sys/dev/cxgbe/iw_cxgbe/device.c @@ -66,7 +66,7 @@ c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, kfree(entry); } - list_for_each_safe(pos, nxt, &uctx->qpids) { + list_for_each_safe(pos, nxt, &uctx->cqids) { entry = list_entry(pos, struct c4iw_qid_list, entry); list_del_init(&entry->entry); kfree(entry); @@ -89,22 +89,54 @@ c4iw_rdev_open(struct c4iw_rdev *rdev) struct adapter *sc = rdev->adap; struct sge_params *sp = &sc->params.sge; int rc; + unsigned short ucq_density = 1 << sp->iq_s_qpp; /* # of user CQs/page */ + unsigned short udb_density = 1 << sp->eq_s_qpp; /* # of user DB/page */ + c4iw_init_dev_ucontext(rdev, &rdev->uctx); - /* XXX: we can probably make this work */ - if (sp->eq_s_qpp > PAGE_SHIFT || sp->iq_s_qpp > PAGE_SHIFT) { - device_printf(sc->dev, - "doorbell density too high (eq %d, iq %d, pg %d).\n", - sp->eq_s_qpp, sp->eq_s_qpp, PAGE_SHIFT); + /* + * This implementation assumes udb_density == ucq_density! Eventually + * we might need to support this but for now fail the open. Also the + * cqid and qpid range must match for now. + */ + if (udb_density != ucq_density) { + device_printf(sc->dev, "unsupported udb/ucq densities %u/%u\n", + udb_density, ucq_density); + rc = -EINVAL; + goto err1; + } + if (sc->vres.qp.start != sc->vres.cq.start || + sc->vres.qp.size != sc->vres.cq.size) { + device_printf(sc->dev, "%s: unsupported qp and cq id ranges " + "qp start %u size %u cq start %u size %u\n", __func__, + sc->vres.qp.start, sc->vres.qp.size, sc->vres.cq.start, + sc->vres.cq.size); rc = -EINVAL; goto err1; } rdev->qpshift = PAGE_SHIFT - sp->eq_s_qpp; - rdev->qpmask = (1 << sp->eq_s_qpp) - 1; + rdev->qpmask = udb_density - 1; rdev->cqshift = PAGE_SHIFT - sp->iq_s_qpp; - rdev->cqmask = (1 << sp->iq_s_qpp) - 1; + rdev->cqmask = ucq_density - 1; + CTR5(KTR_IW_CXGBE, "%s dev %s stag start 0x%0x size 0x%0x num stags %d", + __func__, device_get_nameunit(sc->dev), sc->vres.stag.start, + sc->vres.stag.size, c4iw_num_stags(rdev)); + CTR5(KTR_IW_CXGBE, "%s pbl start 0x%0x size 0x%0x" + " rq start 0x%0x size 0x%0x", __func__, + sc->vres.pbl.start, sc->vres.pbl.size, + sc->vres.rq.start, sc->vres.rq.size); + CTR5(KTR_IW_CXGBE, "%s:qp qid start %u size %u cq qid start %u size %u", + __func__, sc->vres.qp.start, sc->vres.qp.size, + sc->vres.cq.start, sc->vres.cq.size); + /*TODO + CTR5(KTR_IW_CXGBE, "%s udb %pR db_reg %p gts_reg %p" + "qpmask 0x%x cqmask 0x%x", __func__, + db_reg,gts_reg,rdev->qpmask, rdev->cqmask); + */ + + if (c4iw_num_stags(rdev) == 0) { rc = -EINVAL; @@ -132,8 +164,34 @@ c4iw_rdev_open(struct c4iw_rdev *rdev) device_printf(sc->dev, "error %d initializing rqt pool\n", rc); goto err3; } + rdev->status_page = (struct t4_dev_status_page *) + __get_free_page(GFP_KERNEL); + if (!rdev->status_page) { + rc = -ENOMEM; + goto err4; + } + rdev->status_page->qp_start = sc->vres.qp.start; + rdev->status_page->qp_size = sc->vres.qp.size; + rdev->status_page->cq_start = sc->vres.cq.start; + rdev->status_page->cq_size = sc->vres.cq.size; + /* T5 and above devices don't need Doorbell recovery logic, + * so db_off is always set to '0'. + */ + rdev->status_page->db_off = 0; + + rdev->status_page->wc_supported = rdev->adap->iwt.wc_en; + + rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free"); + if (!rdev->free_workq) { + rc = -ENOMEM; + goto err5; + } return (0); +err5: + free_page((unsigned long)rdev->status_page); +err4: + c4iw_rqtpool_destroy(rdev); err3: c4iw_pblpool_destroy(rdev); err2: @@ -144,6 +202,7 @@ err1: static void c4iw_rdev_close(struct c4iw_rdev *rdev) { + free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); c4iw_rqtpool_destroy(rdev); c4iw_destroy_resource(&rdev->resource); @@ -173,6 +232,34 @@ c4iw_alloc(struct adapter *sc) } iwsc->rdev.adap = sc; + /* init various hw-queue params based on lld info */ + CTR3(KTR_IW_CXGBE, "%s: Ing. padding boundary is %d, " + "egrsstatuspagesize = %d", __func__, + sc->params.sge.pad_boundary, + sc->params.sge.spg_len); + + iwsc->rdev.hw_queue.t4_eq_status_entries = + sc->params.sge.spg_len / EQ_ESIZE; + iwsc->rdev.hw_queue.t4_max_eq_size = 65520; + iwsc->rdev.hw_queue.t4_max_iq_size = 65520; + iwsc->rdev.hw_queue.t4_max_rq_size = 8192 - + iwsc->rdev.hw_queue.t4_eq_status_entries - 1; + iwsc->rdev.hw_queue.t4_max_sq_size = + iwsc->rdev.hw_queue.t4_max_eq_size - + iwsc->rdev.hw_queue.t4_eq_status_entries - 1; + iwsc->rdev.hw_queue.t4_max_qp_depth = + iwsc->rdev.hw_queue.t4_max_rq_size; + iwsc->rdev.hw_queue.t4_max_cq_depth = + iwsc->rdev.hw_queue.t4_max_iq_size - 2; + iwsc->rdev.hw_queue.t4_stat_len = iwsc->rdev.adap->params.sge.spg_len; + + /* As T5 and above devices support BAR2 kernel doorbells & WC, we map + * all of BAR2, for both User and Kernel Doorbells-GTS. + */ + iwsc->rdev.bar2_kva = (void __iomem *)((u64)iwsc->rdev.adap->udbs_base); + iwsc->rdev.bar2_pa = vtophys(iwsc->rdev.adap->udbs_base); + iwsc->rdev.bar2_len = rman_get_size(iwsc->rdev.adap->udbs_res); + rc = c4iw_rdev_open(&iwsc->rdev); if (rc != 0) { device_printf(sc->dev, "Unable to open CXIO rdev (%d)\n", rc); @@ -185,6 +272,7 @@ c4iw_alloc(struct adapter *sc) idr_init(&iwsc->mmidr); spin_lock_init(&iwsc->lock); mutex_init(&iwsc->rdev.stats.lock); + iwsc->avail_ird = iwsc->rdev.adap->params.max_ird_adapter; return (iwsc); } @@ -208,6 +296,12 @@ c4iw_activate(struct adapter *sc) ASSERT_SYNCHRONIZED_OP(sc); + if (is_t4(sc)) { + device_printf(sc->dev, "No iWARP support for T4 devices, " + "please install T5 or above devices.\n"); + return (ENOSYS); + } + if (uld_active(sc, ULD_IWARP)) { KASSERT(0, ("%s: RDMA already eanbled on sc %p", __func__, sc)); return (0); @@ -330,7 +424,7 @@ c4iw_modevent(module_t mod, int cmd, void *arg) case MOD_LOAD: rc = c4iw_mod_load(); if (rc == 0) - printf("iw_cxgbe: Chelsio T4/T5/T6 RDMA driver loaded.\n"); + printf("iw_cxgbe: Chelsio T5/T6 RDMA driver loaded.\n"); break; case MOD_UNLOAD: diff --git a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h index f16540c5df73..2f649f5f8c50 100644 --- a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h +++ b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h @@ -115,6 +115,7 @@ struct c4iw_dev_ucontext { enum c4iw_rdev_flags { T4_FATAL_ERROR = (1<<0), + T4_STATUS_PAGE_DISABLED = (1<<1), }; struct c4iw_stat { @@ -133,6 +134,17 @@ struct c4iw_stats { struct c4iw_stat rqt; }; +struct c4iw_hw_queue { + int t4_eq_status_entries; + int t4_max_eq_size; + int t4_max_iq_size; + int t4_max_rq_size; + int t4_max_sq_size; + int t4_max_qp_depth; + int t4_max_cq_depth; + int t4_stat_len; +}; + struct c4iw_rdev { struct adapter *adap; struct c4iw_resource resource; @@ -145,6 +157,12 @@ struct c4iw_rdev { vmem_t *pbl_arena; u32 flags; struct c4iw_stats stats; + struct c4iw_hw_queue hw_queue; + struct t4_dev_status_page *status_page; + unsigned long bar2_pa; + void __iomem *bar2_kva; + unsigned int bar2_len; + struct workqueue_struct *free_workq; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -178,7 +196,7 @@ static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret) static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, struct c4iw_wr_wait *wr_waitp, - u32 hwtid, u32 qpid, const char *func) + u32 hwtid, u32 qpid, struct socket *so, const char *func) { struct adapter *sc = rdev->adap; unsigned to = C4IW_WR_TO; @@ -193,6 +211,17 @@ c4iw_wait_for_reply(struct c4iw_rdev *rdev, struct c4iw_wr_wait *wr_waitp, getmicrotime(&t1); do { + /* If waiting for reply in rdma_init()/rdma_fini() threads, then + * check if there are any connection errors. + */ + if (so && so->so_error) { + wr_waitp->ret = -ECONNRESET; + CTR5(KTR_IW_CXGBE, "%s - Connection ERROR %u for sock %p" + "tid %u qpid %u", func, + so->so_error, so, hwtid, qpid); + break; + } + ret = wait_for_completion_timeout(&wr_waitp->completion, to); if (!ret) { getmicrotime(&t2); @@ -233,6 +262,7 @@ struct c4iw_dev { struct idr mmidr; spinlock_t lock; struct dentry *debugfs_root; + u32 avail_ird; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -313,6 +343,13 @@ static inline void remove_handle_nolock(struct c4iw_dev *rhp, _remove_handle(rhp, idr, id, 0); } +extern int c4iw_max_read_depth; + +static inline int cur_max_read_depth(struct c4iw_dev *dev) +{ + return min(dev->rdev.adap->params.max_ordird_qp, c4iw_max_read_depth); +} + struct c4iw_pd { struct ib_pd ibpd; u32 pdid; @@ -348,6 +385,10 @@ struct c4iw_mr { struct c4iw_dev *rhp; u64 kva; struct tpt_attributes attr; + u64 *mpl; + dma_addr_t mpl_addr; + u32 max_mpl_len; + u32 mpl_len; }; static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) @@ -367,20 +408,6 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) return container_of(ibmw, struct c4iw_mw, ibmw); } -struct c4iw_fr_page_list { - struct ib_fast_reg_page_list ibpl; - DECLARE_PCI_UNMAP_ADDR(mapping); - dma_addr_t dma_addr; - struct c4iw_dev *dev; - int size; -}; - -static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( - struct ib_fast_reg_page_list *ibpl) -{ - return container_of(ibpl, struct c4iw_fr_page_list, ibpl); -} - struct c4iw_cq { struct ib_cq ibcq; struct c4iw_dev *rhp; @@ -432,6 +459,7 @@ struct c4iw_qp_attributes { u8 ecode; u16 sq_db_inc; u16 rq_db_inc; + u8 send_term; }; struct c4iw_qp { @@ -442,10 +470,12 @@ struct c4iw_qp { struct t4_wq wq; spinlock_t lock; struct mutex mutex; - atomic_t refcnt; + struct kref kref; wait_queue_head_t wait; struct timer_list timer; int sq_sig_all; + struct work_struct free_work; + struct c4iw_ucontext *ucontext; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) @@ -459,6 +489,7 @@ struct c4iw_ucontext { u32 key; spinlock_t mmap_lock; struct list_head mmaps; + struct kref kref; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) @@ -466,6 +497,17 @@ static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) return container_of(c, struct c4iw_ucontext, ibucontext); } +void _c4iw_free_ucontext(struct kref *kref); + +static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext) +{ + kref_put(&ucontext->kref, _c4iw_free_ucontext); +} +static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext) +{ + kref_get(&ucontext->kref); +} + struct c4iw_mm_entry { struct list_head entry; u64 addr; @@ -734,7 +776,8 @@ enum c4iw_ep_flags { RELEASE_RESOURCES = 2, CLOSE_SENT = 3, TIMEOUT = 4, - QP_REFERENCED = 5 + QP_REFERENCED = 5, + STOP_MPA_TIMER = 7, }; enum c4iw_ep_history { @@ -776,8 +819,8 @@ struct c4iw_ep_common { enum c4iw_ep_state state; struct kref kref; struct mutex mutex; - struct sockaddr_in local_addr; - struct sockaddr_in remote_addr; + struct sockaddr_storage local_addr; + struct sockaddr_storage remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; unsigned long history; @@ -792,11 +835,13 @@ struct c4iw_listen_ep { struct c4iw_ep_common com; unsigned int stid; int backlog; + struct list_head listen_ep_list; /* list of all listener ep's bound + to one port address */ }; struct c4iw_ep { struct c4iw_ep_common com; - struct c4iw_ep *parent_ep; + struct c4iw_listen_ep *parent_ep; struct timer_list timer; unsigned int atid; u32 hwtid; @@ -852,6 +897,8 @@ typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct mbuf *m); int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t); +void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid, + struct c4iw_dev_ucontext *uctx); u32 c4iw_get_resource(struct c4iw_id_table *id_table); void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); @@ -875,41 +922,30 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); -int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, - struct ib_mw_bind *mw_bind); int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog); -void c4iw_destroy_listen_ep(struct iw_cm_id *cm_id); +int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); +int c4iw_destroy_listen(struct iw_cm_id *cm_id); int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); void c4iw_qp_rem_ref(struct ib_qp *qp); -void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list); -struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl( - struct ib_device *device, - int page_list_len); -struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth); +struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg); +int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 - virt, int acc, struct ib_udata *udata, int mr_id); + virt, int acc, struct ib_udata *udata); struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); -struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, - int acc, - u64 *iova_start); -int c4iw_reregister_phys_mem(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, - int acc, u64 *iova_start); int c4iw_dereg_mr(struct ib_mr *ib_mr); +void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); int c4iw_destroy_cq(struct ib_cq *ib_cq); -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_context, - struct ib_udata *udata); +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_context, + struct ib_udata *udata); int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_destroy_qp(struct ib_qp *ib_qp); @@ -926,12 +962,13 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size); u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); int c4iw_ofld_send(struct c4iw_rdev *rdev, struct mbuf *m); -void c4iw_flush_hw_cq(struct t4_cq *cq); +void c4iw_flush_hw_cq(struct c4iw_cq *cq); void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count); int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); +int __c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count); -int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count); +int c4iw_flush_sq(struct c4iw_qp *qhp); int c4iw_ev_handler(struct sge_iq *, const struct rsp_ctrl *); u16 c4iw_rqes_posted(struct c4iw_qp *qhp); int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe); @@ -942,9 +979,9 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, struct c4iw_dev_ucontext *uctx); void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); -void process_newconn(struct iw_cm_id *parent_cm_id, - struct socket *child_so); - +void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, + enum t4_bar2_qtype qtype, + unsigned int *pbar2_qid, u64 *pbar2_pa); extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; extern int c4iw_max_read_depth; diff --git a/sys/dev/cxgbe/iw_cxgbe/mem.c b/sys/dev/cxgbe/iw_cxgbe/mem.c index 27bd902a9640..7a62e82bf679 100644 --- a/sys/dev/cxgbe/iw_cxgbe/mem.c +++ b/sys/dev/cxgbe/iw_cxgbe/mem.c @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include #include "iw_cxgbe.h" +int use_dsgl = 1; #define T4_ULPTX_MIN_IO 32 #define C4IW_MAX_INLINE_SIZE 96 @@ -50,9 +51,7 @@ static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length) { - return ((is_t4(dev->rdev.adap) || - is_t5(dev->rdev.adap)) && - length >= 8*1024*1024*1024ULL); + return (is_t5(dev->rdev.adap) && length >= 8*1024*1024*1024ULL); } static int @@ -68,10 +67,8 @@ write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) u32 cmd; cmd = cpu_to_be32(V_ULPTX_CMD(ULP_TX_MEM_WRITE)); - if (is_t4(sc)) - cmd |= cpu_to_be32(F_ULP_MEMIO_ORDER); - else - cmd |= cpu_to_be32(F_T5_ULP_MEMIO_IMM); + + cmd |= cpu_to_be32(F_T5_ULP_MEMIO_IMM); addr &= 0x7FFFFFF; CTR3(KTR_IW_CXGBE, "%s addr 0x%x len %u", __func__, addr, len); @@ -124,7 +121,7 @@ write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) len -= C4IW_MAX_INLINE_SIZE; } - ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__); return ret; } @@ -277,32 +274,6 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, return ret; } -static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, - struct c4iw_mr *mhp, int shift, int npages) -{ - u32 stag; - int ret; - - if (npages > mhp->attr.pbl_size) - return -ENOMEM; - - stag = mhp->attr.stag; - ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid, - FW_RI_STAG_NSMR, mhp->attr.perms, - mhp->attr.mw_bind_enable, mhp->attr.zbva, - mhp->attr.va_fbo, mhp->attr.len, shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr); - if (ret) - return ret; - - ret = finish_mem_reg(mhp, stag); - if (ret) - dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - - return ret; -} - static int alloc_pbl(struct c4iw_mr *mhp, int npages) { mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev, @@ -316,223 +287,6 @@ static int alloc_pbl(struct c4iw_mr *mhp, int npages) return 0; } -static int build_phys_page_list(struct ib_phys_buf *buffer_list, - int num_phys_buf, u64 *iova_start, - u64 *total_size, int *npages, - int *shift, __be64 **page_list) -{ - u64 mask; - int i, j, n; - - mask = 0; - *total_size = 0; - for (i = 0; i < num_phys_buf; ++i) { - if (i != 0 && buffer_list[i].addr & ~PAGE_MASK) - return -EINVAL; - if (i != 0 && i != num_phys_buf - 1 && - (buffer_list[i].size & ~PAGE_MASK)) - return -EINVAL; - *total_size += buffer_list[i].size; - if (i > 0) - mask |= buffer_list[i].addr; - else - mask |= buffer_list[i].addr & PAGE_MASK; - if (i != num_phys_buf - 1) - mask |= buffer_list[i].addr + buffer_list[i].size; - else - mask |= (buffer_list[i].addr + buffer_list[i].size + - PAGE_SIZE - 1) & PAGE_MASK; - } - - /* Find largest page shift we can use to cover buffers */ - for (*shift = PAGE_SHIFT; *shift < PAGE_SHIFT + M_FW_RI_TPTE_PS; - ++(*shift)) - if ((1ULL << *shift) & mask) - break; - - buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); - buffer_list[0].addr &= ~0ull << *shift; - - *npages = 0; - for (i = 0; i < num_phys_buf; ++i) - *npages += (buffer_list[i].size + - (1ULL << *shift) - 1) >> *shift; - - if (!*npages) - return -EINVAL; - - *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL); - if (!*page_list) - return -ENOMEM; - - n = 0; - for (i = 0; i < num_phys_buf; ++i) - for (j = 0; - j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift; - ++j) - (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr + - ((u64) j << *shift)); - - CTR6(KTR_IW_CXGBE, - "%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d", __func__, - (unsigned long long)*iova_start, (unsigned long long)mask, *shift, - (unsigned long long)*total_size, *npages); - - return 0; - -} - -int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask, - struct ib_pd *pd, struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start) -{ - - struct c4iw_mr mh, *mhp; - struct c4iw_pd *php; - struct c4iw_dev *rhp; - __be64 *page_list = NULL; - int shift = 0; - u64 total_size = 0; - int npages = 0; - int ret; - - CTR3(KTR_IW_CXGBE, "%s ib_mr %p ib_pd %p", __func__, mr, pd); - - /* There can be no memory windows */ - if (atomic_read(&mr->usecnt)) - return -EINVAL; - - mhp = to_c4iw_mr(mr); - rhp = mhp->rhp; - php = to_c4iw_pd(mr->pd); - - /* make sure we are on the same adapter */ - if (rhp != php->rhp) - return -EINVAL; - - memcpy(&mh, mhp, sizeof *mhp); - - if (mr_rereg_mask & IB_MR_REREG_PD) - php = to_c4iw_pd(pd); - if (mr_rereg_mask & IB_MR_REREG_ACCESS) { - mh.attr.perms = c4iw_ib_to_tpt_access(acc); - mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) == - IB_ACCESS_MW_BIND; - } - if (mr_rereg_mask & IB_MR_REREG_TRANS) { - ret = build_phys_page_list(buffer_list, num_phys_buf, - iova_start, - &total_size, &npages, - &shift, &page_list); - if (ret) - return ret; - } - if (mr_exceeds_hw_limits(rhp, total_size)) { - kfree(page_list); - return -EINVAL; - } - ret = reregister_mem(rhp, php, &mh, shift, npages); - kfree(page_list); - if (ret) - return ret; - if (mr_rereg_mask & IB_MR_REREG_PD) - mhp->attr.pdid = php->pdid; - if (mr_rereg_mask & IB_MR_REREG_ACCESS) - mhp->attr.perms = c4iw_ib_to_tpt_access(acc); - if (mr_rereg_mask & IB_MR_REREG_TRANS) { - mhp->attr.zbva = 0; - mhp->attr.va_fbo = *iova_start; - mhp->attr.page_size = shift - 12; - mhp->attr.len = total_size; - mhp->attr.pbl_size = npages; - } - - return 0; -} - -struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, int acc, u64 *iova_start) -{ - __be64 *page_list; - int shift; - u64 total_size; - int npages; - struct c4iw_dev *rhp; - struct c4iw_pd *php; - struct c4iw_mr *mhp; - int ret; - - CTR2(KTR_IW_CXGBE, "%s ib_pd %p", __func__, pd); - php = to_c4iw_pd(pd); - rhp = php->rhp; - - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - /* First check that we have enough alignment */ - if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { - ret = -EINVAL; - goto err; - } - - if (num_phys_buf > 1 && - ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) { - ret = -EINVAL; - goto err; - } - - ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, - &total_size, &npages, &shift, - &page_list); - if (ret) - goto err; - - if (mr_exceeds_hw_limits(rhp, total_size)) { - kfree(page_list); - ret = -EINVAL; - goto err; - } - ret = alloc_pbl(mhp, npages); - if (ret) { - kfree(page_list); - goto err; - } - - ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr, - npages); - kfree(page_list); - if (ret) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - - mhp->attr.perms = c4iw_ib_to_tpt_access(acc); - mhp->attr.va_fbo = *iova_start; - mhp->attr.page_size = shift - 12; - - mhp->attr.len = total_size; - mhp->attr.pbl_size = npages; - ret = register_mem(rhp, php, mhp, shift); - if (ret) - goto err_pbl; - - return &mhp->ibmr; - -err_pbl: - c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, - mhp->attr.pbl_size << 3); - -err: - kfree(mhp); - return ERR_PTR(ret); - -} - struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) { struct c4iw_dev *rhp; @@ -556,12 +310,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) mhp->attr.zbva = 0; mhp->attr.va_fbo = 0; mhp->attr.page_size = 0; - mhp->attr.len = ~0UL; + mhp->attr.len = ~0ULL; mhp->attr.pbl_size = 0; ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, FW_RI_STAG_NSMR, mhp->attr.perms, - mhp->attr.mw_bind_enable, 0, 0, ~0UL, 0, 0, 0); + mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0); if (ret) goto err1; @@ -578,7 +332,7 @@ err1: } struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int acc, struct ib_udata *udata, int mr_id) + u64 virt, int acc, struct ib_udata *udata) { __be64 *pages; int shift, n, len; @@ -680,7 +434,8 @@ err: return ERR_PTR(err); } -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -689,6 +444,9 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) u32 stag = 0; int ret; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + php = to_c4iw_pd(pd); rhp = php->rhp; mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); @@ -732,7 +490,9 @@ int c4iw_dealloc_mw(struct ib_mw *mw) return 0; } -struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) +struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -740,28 +500,43 @@ struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) u32 mmid; u32 stag = 0; int ret = 0; + int length = roundup(max_num_sg * sizeof(u64), 32); php = to_c4iw_pd(pd); rhp = php->rhp; + + if (mr_type != IB_MR_TYPE_MEM_REG || + max_num_sg > t4_max_fr_depth( + rhp->rdev.adap->params.ulptx_memwrite_dsgl && use_dsgl)) + return ERR_PTR(-EINVAL); + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) { ret = -ENOMEM; goto err; } + mhp->mpl = dma_alloc_coherent(rhp->ibdev.dma_device, + length, &mhp->mpl_addr, GFP_KERNEL); + if (!mhp->mpl) { + ret = -ENOMEM; + goto err_mpl; + } + mhp->max_mpl_len = length; + mhp->rhp = rhp; - ret = alloc_pbl(mhp, pbl_depth); + ret = alloc_pbl(mhp, max_num_sg); if (ret) goto err1; - mhp->attr.pbl_size = pbl_depth; + mhp->attr.pbl_size = max_num_sg; ret = allocate_stag(&rhp->rdev, &stag, php->pdid, - mhp->attr.pbl_size, mhp->attr.pbl_addr); + mhp->attr.pbl_size, mhp->attr.pbl_addr); if (ret) goto err2; mhp->attr.pdid = php->pdid; mhp->attr.type = FW_RI_STAG_NSMR; mhp->attr.stag = stag; - mhp->attr.state = 1; + mhp->attr.state = 0; mmid = (stag) >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { @@ -769,8 +544,7 @@ struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) goto err3; } - CTR4(KTR_IW_CXGBE, "%s mmid 0x%x mhp %p stag 0x%x", __func__, mmid, mhp, - stag); + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); return &(mhp->ibmr); err3: dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, @@ -779,42 +553,36 @@ err2: c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); err1: + dma_free_coherent(rhp->ibdev.dma_device, + mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); +err_mpl: kfree(mhp); err: return ERR_PTR(ret); } - -struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, - int page_list_len) +static int c4iw_set_page(struct ib_mr *ibmr, u64 addr) { - struct c4iw_fr_page_list *c4pl; - struct c4iw_dev *dev = to_c4iw_dev(device); - bus_addr_t dma_addr; - int size = sizeof *c4pl + page_list_len * sizeof(u64); + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); - c4pl = contigmalloc(size, - M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); - if (c4pl) - dma_addr = vtophys(c4pl); - else - return ERR_PTR(-ENOMEM); + if (unlikely(mhp->mpl_len == mhp->max_mpl_len)) + return -ENOMEM; - pci_unmap_addr_set(c4pl, mapping, dma_addr); - c4pl->dma_addr = dma_addr; - c4pl->dev = dev; - c4pl->size = size; - c4pl->ibpl.page_list = (u64 *)(c4pl + 1); - c4pl->ibpl.max_page_list_len = page_list_len; + mhp->mpl[mhp->mpl_len++] = addr; - return &c4pl->ibpl; + return 0; } -void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) +int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) { - struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); - contigfree(c4pl, c4pl->size, M_DEVBUF); + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); + + mhp->mpl_len = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page); } + int c4iw_dereg_mr(struct ib_mr *ib_mr) { struct c4iw_dev *rhp; @@ -822,9 +590,6 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) u32 mmid; CTR2(KTR_IW_CXGBE, "%s ib_mr %p", __func__, ib_mr); - /* There can be no memory windows */ - if (atomic_read(&ib_mr->usecnt)) - return -EINVAL; mhp = to_c4iw_mr(ib_mr); rhp = mhp->rhp; @@ -843,4 +608,16 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) kfree(mhp); return 0; } + +void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey) +{ + struct c4iw_mr *mhp; + unsigned long flags; + + spin_lock_irqsave(&rhp->lock, flags); + mhp = get_mhp(rhp, rkey >> 8); + if (mhp) + mhp->attr.state = 0; + spin_unlock_irqrestore(&rhp->lock, flags); +} #endif diff --git a/sys/dev/cxgbe/iw_cxgbe/provider.c b/sys/dev/cxgbe/iw_cxgbe/provider.c index 0cc698a7e38f..06cb6c6eca5d 100644 --- a/sys/dev/cxgbe/iw_cxgbe/provider.c +++ b/sys/dev/cxgbe/iw_cxgbe/provider.c @@ -44,7 +44,7 @@ __FBSDID("$FreeBSD$"); #include "iw_cxgbe.h" #include "user.h" - +extern int use_dsgl; static int fastreg_support = 1; module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default = 1)"); @@ -78,24 +78,40 @@ static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, struct ib_wc *in_wc, - struct ib_grh *in_grh, struct ib_mad *in_mad, - struct ib_mad *out_mad) + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) + { return -ENOSYS; } -static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +void _c4iw_free_ucontext(struct kref *kref) { - struct c4iw_dev *rhp = to_c4iw_dev(context->device); - struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + struct c4iw_ucontext *ucontext; + struct c4iw_dev *rhp; struct c4iw_mm_entry *mm, *tmp; - CTR2(KTR_IW_CXGBE, "%s context %p", __func__, context); + ucontext = container_of(kref, struct c4iw_ucontext, kref); + rhp = to_c4iw_dev(ucontext->ibucontext.device); + + CTR2(KTR_IW_CXGBE, "%s ucontext %p", __func__, ucontext); list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) kfree(mm); c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); kfree(ucontext); +} + +static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +{ + struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + + CTR2(KTR_IW_CXGBE, "%s context %p", __func__, context); + c4iw_put_ucontext(ucontext); return 0; } @@ -104,23 +120,60 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, { struct c4iw_ucontext *context; struct c4iw_dev *rhp = to_c4iw_dev(ibdev); + static int warned; + struct c4iw_alloc_ucontext_resp uresp; + int ret = 0; + struct c4iw_mm_entry *mm = NULL; - CTR2(KTR_IW_CXGBE, "%s ibdev %p", __func__, ibdev); + PDBG("%s ibdev %p\n", __func__, ibdev); context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); + if (!context) { + ret = -ENOMEM; + goto err; + } + c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); - return &context->ibucontext; -} + kref_init(&context->kref); -#ifdef DOT5 -static inline pgprot_t t4_pgprot_wc(pgprot_t prot) -{ - return pgprot_writecombine(prot); + if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { + if (!warned++) + log(LOG_ERR, "%s Warning - downlevel libcxgb4 " + "(non-fatal), device status page disabled.\n", + __func__); + rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED; + } else { + + mm = kmalloc(sizeof *mm, GFP_KERNEL); + if (!mm) + goto err_free; + + uresp.status_page_size = PAGE_SIZE; + + spin_lock(&context->mmap_lock); + uresp.status_page_key = context->key; + context->key += PAGE_SIZE; + spin_unlock(&context->mmap_lock); + + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); + if (ret) + goto err_mm; + + mm->key = uresp.status_page_key; + mm->addr = vtophys(rhp->rdev.status_page); + mm->len = PAGE_SIZE; + insert_mmap(context, mm); + } + return &context->ibucontext; +err_mm: + kfree(mm); +err_free: + kfree(context); +err: + return ERR_PTR(ret); } -#endif static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { @@ -130,12 +183,10 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) int ret = 0; struct c4iw_mm_entry *mm; struct c4iw_ucontext *ucontext; - u64 addr, paddr; + u64 addr = 0; - u64 va_regs_res = 0, va_udbs_res = 0; - u64 len_regs_res = 0, len_udbs_res = 0; - - CTR3(KTR_IW_CXGBE, "%s:1 ctx %p vma %p", __func__, context, vma); + CTR4(KTR_IW_CXGBE, "%s:1 ctx %p vma %p, vm_start %u", __func__, + context, vma, vma->vm_start); CTR4(KTR_IW_CXGBE, "%s:1a pgoff 0x%lx key 0x%x len %d", __func__, vma->vm_pgoff, key, len); @@ -158,59 +209,16 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) addr = mm->addr; kfree(mm); - va_regs_res = (u64)rman_get_virtual(rdev->adap->regs_res); - len_regs_res = (u64)rman_get_size(rdev->adap->regs_res); - va_udbs_res = (u64)rman_get_virtual(rdev->adap->udbs_res); - len_udbs_res = (u64)rman_get_size(rdev->adap->udbs_res); + /* user DB-GTS registers if addr in udbs_res range, + * else WQ or CQ memory. + * */ + if (rdev->adap->iwt.wc_en && addr >= rdev->bar2_pa && + addr < rdev->bar2_pa + rdev->bar2_len) + vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); - CTR6(KTR_IW_CXGBE, - "%s:4 addr %p, masync region %p:%p, udb region %p:%p", __func__, - addr, va_regs_res, va_regs_res+len_regs_res, va_udbs_res, - va_udbs_res+len_udbs_res); - - if (addr >= va_regs_res && addr < va_regs_res + len_regs_res) { - CTR4(KTR_IW_CXGBE, "%s:5 MA_SYNC addr %p region %p, reglen %u", - __func__, addr, va_regs_res, len_regs_res); - /* - * MA_SYNC register... - */ - paddr = vtophys(addr); - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - ret = io_remap_pfn_range(vma, vma->vm_start, - paddr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } else { - - if (addr >= va_udbs_res && addr < va_udbs_res + len_udbs_res) { - /* - * Map user DB or OCQP memory... - */ - paddr = vtophys(addr); - CTR4(KTR_IW_CXGBE, - "%s:6 USER DB-GTS addr %p region %p, reglen %u", - __func__, addr, va_udbs_res, len_udbs_res); -#ifdef DOT5 - if (!is_t4(rdev->lldi.adapter_type) && map_udb_as_wc) - vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); - else -#endif - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - ret = io_remap_pfn_range(vma, vma->vm_start, - paddr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } else { - /* - * Map WQ or CQ contig dma memory... - */ - CTR4(KTR_IW_CXGBE, - "%s:7 WQ/CQ addr %p vm_start %u vma %p", __func__, - addr, vma->vm_start, vma); - ret = io_remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } - } - CTR4(KTR_IW_CXGBE, "%s:8 ctx %p vma %p ret %u", __func__, context, vma, + ret = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + CTR4(KTR_IW_CXGBE, "%s:4 ctx %p vma %p ret %u", __func__, context, vma, ret); return ret; } @@ -303,14 +311,17 @@ c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) } static int -c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct c4iw_dev *dev = to_c4iw_dev(ibdev); struct adapter *sc = dev->rdev.adap; - const int spg_ndesc = sc->params.sge.spg_len / EQ_ESIZE; CTR3(KTR_IW_CXGBE, "%s ibdev %p, props %p", __func__, ibdev, props); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, sc->port[0]->vi[0].hw_addr, ETHER_ADDR_LEN); @@ -322,7 +333,7 @@ c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props) props->vendor_part_id = pci_get_device(sc->dev); props->max_mr_size = T4_MAX_MR_SIZE; props->max_qp = sc->vres.qp.size / 2; - props->max_qp_wr = T4_MAX_QP_DEPTH(spg_ndesc); + props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; props->max_sge = T4_MAX_RECV_SGE; props->max_sge_rd = 1; props->max_res_rd_atom = sc->params.max_ird_adapter; @@ -330,11 +341,12 @@ c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props) c4iw_max_read_depth); props->max_qp_init_rd_atom = props->max_qp_rd_atom; props->max_cq = sc->vres.qp.size; - props->max_cqe = T4_MAX_CQ_DEPTH; + props->max_cqe = dev->rdev.hw_queue.t4_max_cq_depth; props->max_mr = c4iw_num_stags(&dev->rdev); props->max_pd = T4_MAX_NUM_PD; props->local_ca_ack_delay = 0; - props->max_fast_reg_page_list_len = T4_MAX_FR_DEPTH; + props->max_fast_reg_page_list_len = + t4_max_fr_depth(sc->params.ulptx_memwrite_dsgl && use_dsgl); return (0); } @@ -405,6 +417,22 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = c4iw_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} /* * Returns -errno on error. @@ -472,16 +500,12 @@ c4iw_register_device(struct c4iw_dev *dev) ibdev->resize_cq = c4iw_resize_cq; ibdev->poll_cq = c4iw_poll_cq; ibdev->get_dma_mr = c4iw_get_dma_mr; - ibdev->reg_phys_mr = c4iw_register_phys_mem; - ibdev->rereg_phys_mr = c4iw_reregister_phys_mem; ibdev->reg_user_mr = c4iw_reg_user_mr; ibdev->dereg_mr = c4iw_dereg_mr; ibdev->alloc_mw = c4iw_alloc_mw; - ibdev->bind_mw = c4iw_bind_mw; ibdev->dealloc_mw = c4iw_dealloc_mw; - ibdev->alloc_fast_reg_mr = c4iw_alloc_fast_reg_mr; - ibdev->alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl; - ibdev->free_fast_reg_page_list = c4iw_free_fastreg_pbl; + ibdev->alloc_mr = c4iw_alloc_mr; + ibdev->map_mr_sg = c4iw_map_mr_sg; ibdev->attach_mcast = c4iw_multicast_attach; ibdev->detach_mcast = c4iw_multicast_detach; ibdev->process_mad = c4iw_process_mad; @@ -498,9 +522,8 @@ c4iw_register_device(struct c4iw_dev *dev) iwcm->connect = c4iw_connect; iwcm->accept = c4iw_accept_cr; iwcm->reject = c4iw_reject_cr; - iwcm->create_listen_ep = c4iw_create_listen_ep; - iwcm->destroy_listen_ep = c4iw_destroy_listen_ep; - iwcm->newconn = process_newconn; + iwcm->create_listen = c4iw_create_listen; + iwcm->destroy_listen = c4iw_destroy_listen; iwcm->add_ref = c4iw_qp_add_ref; iwcm->rem_ref = c4iw_qp_rem_ref; iwcm->get_qp = c4iw_get_qp; diff --git a/sys/dev/cxgbe/iw_cxgbe/qp.c b/sys/dev/cxgbe/iw_cxgbe/qp.c index 233687691fca..9eada7ecb103 100644 --- a/sys/dev/cxgbe/iw_cxgbe/qp.c +++ b/sys/dev/cxgbe/iw_cxgbe/qp.c @@ -63,9 +63,34 @@ struct cpl_set_tcb_rpl; #include "iw_cxgbe.h" #include "user.h" - +extern int use_dsgl; static int creds(struct toepcb *toep, struct inpcb *inp, size_t wrsize); +static int max_fr_immd = T4_MAX_FR_IMMD;//SYSCTL parameter later... +static int alloc_ird(struct c4iw_dev *dev, u32 ird) +{ + int ret = 0; + + spin_lock_irq(&dev->lock); + if (ird <= dev->avail_ird) + dev->avail_ird -= ird; + else + ret = -ENOMEM; + spin_unlock_irq(&dev->lock); + + if (ret) + log(LOG_WARNING, "%s: device IRD resources exhausted\n", + device_get_nameunit(dev->rdev.adap->dev)); + + return ret; +} + +static void free_ird(struct c4iw_dev *dev, int ird) +{ + spin_lock_irq(&dev->lock); + dev->avail_ird += ird; + spin_unlock_irq(&dev->lock); +} static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) { @@ -75,43 +100,20 @@ static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) spin_unlock_irqrestore(&qhp->lock, flag); } -static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) -{ - - contigfree(sq->queue, sq->memsize, M_DEVBUF); -} - -static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) -{ - - dealloc_host_sq(rdev, sq); -} - -static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) -{ - sq->queue = contigmalloc(sq->memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul, - 4096, 0); - - if (sq->queue) - sq->dma_addr = vtophys(sq->queue); - else - return -ENOMEM; - sq->phys_addr = vtophys(sq->queue); - pci_unmap_addr_set(sq, mapping, sq->dma_addr); - CTR4(KTR_IW_CXGBE, "%s sq %p dma_addr %p phys_addr %p", __func__, - sq->queue, sq->dma_addr, sq->phys_addr); - return 0; -} - static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct c4iw_dev_ucontext *uctx) { + struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev); /* * uP clears EQ contexts when the connection exits rdma mode, * so no need to post a RESET WR for these EQs. */ - contigfree(wq->rq.queue, wq->rq.memsize, M_DEVBUF); - dealloc_sq(rdev, &wq->sq); + dma_free_coherent(rhp->ibdev.dma_device, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); + dma_free_coherent(rhp->ibdev.dma_device, + wq->sq.memsize, wq->sq.queue, + dma_unmap_addr(&wq->sq, mapping)); c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); kfree(wq->rq.sw_rq); kfree(wq->sq.sw_sq); @@ -125,73 +127,107 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct c4iw_dev_ucontext *uctx) { struct adapter *sc = rdev->adap; + struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev); int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; struct c4iw_wr_wait wr_wait; - int ret; + int ret = 0; int eqsize; struct wrqe *wr; - const int spg_ndesc = sc->params.sge.spg_len / EQ_ESIZE; + u64 sq_bar2_qoffset = 0, rq_bar2_qoffset = 0; wq->sq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->sq.qid) return -ENOMEM; wq->rq.qid = c4iw_get_qpid(rdev, uctx); - if (!wq->rq.qid) - goto err1; + if (!wq->rq.qid) { + ret = -ENOMEM; + goto free_sq_qid; + } if (!user) { wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, GFP_KERNEL); - if (!wq->sq.sw_sq) - goto err2; + if (!wq->sq.sw_sq) { + ret = -ENOMEM; + goto free_rq_qid; + } wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, GFP_KERNEL); - if (!wq->rq.sw_rq) - goto err3; + if (!wq->rq.sw_rq) { + ret = -ENOMEM; + goto free_sw_sq; + } } - /* RQT must be a power of 2. */ - wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); + /* + * RQT must be a power of 2 and at least 16 deep. + */ + wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); - if (!wq->rq.rqt_hwaddr) - goto err4; - - if (alloc_host_sq(rdev, &wq->sq)) - goto err5; - - memset(wq->sq.queue, 0, wq->sq.memsize); - pci_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); - - wq->rq.queue = contigmalloc(wq->rq.memsize, - M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); - if (wq->rq.queue) - wq->rq.dma_addr = vtophys(wq->rq.queue); - else - goto err6; - CTR5(KTR_IW_CXGBE, - "%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx", __func__, - wq->sq.queue, (unsigned long long)vtophys(wq->sq.queue), - wq->rq.queue, (unsigned long long)vtophys(wq->rq.queue)); - memset(wq->rq.queue, 0, wq->rq.memsize); - pci_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); - - wq->db = (void *)((unsigned long)rman_get_virtual(sc->regs_res) + - sc->sge_kdoorbell_reg); - wq->gts = (void *)((unsigned long)rman_get_virtual(rdev->adap->regs_res) - + sc->sge_gts_reg); - if (user) { - wq->sq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) + - (wq->sq.qid << rdev->qpshift)); - wq->sq.udb &= PAGE_MASK; - wq->rq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) + - (wq->rq.qid << rdev->qpshift)); - wq->rq.udb &= PAGE_MASK; + if (!wq->rq.rqt_hwaddr) { + ret = -ENOMEM; + goto free_sw_rq; } + + /*QP memory, allocate DMAable memory for Send & Receive Queues */ + wq->sq.queue = dma_alloc_coherent(rhp->ibdev.dma_device, wq->sq.memsize, + &(wq->sq.dma_addr), GFP_KERNEL); + if (!wq->sq.queue) { + ret = -ENOMEM; + goto free_hwaddr; + } + wq->sq.phys_addr = vtophys(wq->sq.queue); + dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); + memset(wq->sq.queue, 0, wq->sq.memsize); + + wq->rq.queue = dma_alloc_coherent(rhp->ibdev.dma_device, + wq->rq.memsize, &(wq->rq.dma_addr), GFP_KERNEL); + if (!wq->rq.queue) { + ret = -ENOMEM; + goto free_sq_dma; + } + wq->rq.phys_addr = vtophys(wq->rq.queue); + dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); + memset(wq->rq.queue, 0, wq->rq.memsize); + + CTR5(KTR_IW_CXGBE, + "%s QP sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx", + __func__, + wq->sq.queue, (unsigned long long)wq->sq.phys_addr, + wq->rq.queue, (unsigned long long)wq->rq.phys_addr); + + /* Doorbell/WC regions, determine the BAR2 queue offset and qid. */ + t4_bar2_sge_qregs(rdev->adap, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, user, + &sq_bar2_qoffset, &wq->sq.bar2_qid); + t4_bar2_sge_qregs(rdev->adap, wq->rq.qid, T4_BAR2_QTYPE_EGRESS, user, + &rq_bar2_qoffset, &wq->rq.bar2_qid); + + if (user) { + /* Compute BAR2 DB/WC physical address(page-aligned) for + * Userspace mapping. + */ + wq->sq.bar2_pa = (rdev->bar2_pa + sq_bar2_qoffset) & PAGE_MASK; + wq->rq.bar2_pa = (rdev->bar2_pa + rq_bar2_qoffset) & PAGE_MASK; + CTR3(KTR_IW_CXGBE, + "%s BAR2 DB/WC sq base pa 0x%llx rq base pa 0x%llx", + __func__, (unsigned long long)wq->sq.bar2_pa, + (unsigned long long)wq->rq.bar2_pa); + } else { + /* Compute BAR2 DB/WC virtual address to access in kernel. */ + wq->sq.bar2_va = (void __iomem *)((u64)rdev->bar2_kva + + sq_bar2_qoffset); + wq->rq.bar2_va = (void __iomem *)((u64)rdev->bar2_kva + + rq_bar2_qoffset); + CTR3(KTR_IW_CXGBE, "%s BAR2 DB/WC sq base va %p rq base va %p", + __func__, (unsigned long long)wq->sq.bar2_va, + (unsigned long long)wq->rq.bar2_va); + } + wq->rdev = rdev; wq->rq.msn = 1; @@ -199,8 +235,10 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, wr_len = sizeof *res_wr + 2 * sizeof *res; wr = alloc_wrqe(wr_len, &sc->sge.mgmtq); - if (wr == NULL) - return (0); + if (wr == NULL) { + ret = -ENOMEM; + goto free_rq_dma; + } res_wr = wrtod(wr); memset(res_wr, 0, wr_len); @@ -215,7 +253,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* eqsize is the number of 64B entries plus the status page size. */ - eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + spg_ndesc; + eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ @@ -226,7 +265,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | - V_FW_RI_RES_WR_FBMAX(2) | + V_FW_RI_RES_WR_FBMAX(3) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); @@ -237,7 +276,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* eqsize is the number of 64B entries plus the status page size. */ - eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + spg_ndesc; + eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ @@ -247,7 +287,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | - V_FW_RI_RES_WR_FBMAX(2) | + V_FW_RI_RES_WR_FBMAX(3) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); @@ -257,31 +297,37 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, c4iw_init_wr_wait(&wr_wait); t4_wrq_tx(sc, wr); - ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, + NULL, __func__); if (ret) - goto err7; + goto free_rq_dma; - CTR6(KTR_IW_CXGBE, + CTR5(KTR_IW_CXGBE, "%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx", - __func__, wq->sq.qid, wq->rq.qid, wq->db, - (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); + __func__, wq->sq.qid, wq->rq.qid, + (unsigned long long)wq->sq.bar2_va, + (unsigned long long)wq->rq.bar2_va); return 0; -err7: - contigfree(wq->rq.queue, wq->rq.memsize, M_DEVBUF); -err6: - dealloc_sq(rdev, &wq->sq); -err5: +free_rq_dma: + dma_free_coherent(rhp->ibdev.dma_device, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); +free_sq_dma: + dma_free_coherent(rhp->ibdev.dma_device, + wq->sq.memsize, wq->sq.queue, + dma_unmap_addr(&wq->sq, mapping)); +free_hwaddr: c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); -err4: +free_sw_rq: kfree(wq->rq.sw_rq); -err3: +free_sw_sq: kfree(wq->sq.sw_sq); -err2: +free_rq_qid: c4iw_put_qpid(rdev, wq->rq.qid, uctx); -err1: +free_sq_qid: c4iw_put_qpid(rdev, wq->sq.qid, uctx); - return -ENOMEM; + return ret; } static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, @@ -386,6 +432,8 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, default: return -EINVAL; } + wqe->send.r3 = 0; + wqe->send.r4 = 0; plen = 0; if (wr->num_sge) { @@ -429,8 +477,8 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; wqe->write.immd_data = 0; - wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); - wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { ret = build_immd(sq, wqe->write.u.immd_src, wr, @@ -466,11 +514,12 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { if (wr->num_sge > 1) return -EINVAL; - if (wr->num_sge) { - wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); - wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr + if (wr->num_sge && wr->sg_list[0].length) { + wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr >> 32)); - wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); + wqe->read.to_src_lo = + cpu_to_be32((u32)rdma_wr(wr)->remote_addr); wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr @@ -506,54 +555,6 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } -static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) -{ - - struct fw_ri_immd *imdp; - __be64 *p; - int i; - int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); - int rem; - - if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) - return -EINVAL; - - wqe->fr.qpbinde_to_dcacpu = 0; - wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12; - wqe->fr.addr_type = FW_RI_VA_BASED_TO; - wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags); - wqe->fr.len_hi = 0; - wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length); - wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey); - wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); - wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & - 0xffffffff); - WARN_ON(pbllen > T4_MAX_FR_IMMD); - imdp = (struct fw_ri_immd *)(&wqe->fr + 1); - imdp->op = FW_RI_DATA_IMMD; - imdp->r1 = 0; - imdp->r2 = 0; - imdp->immdlen = cpu_to_be32(pbllen); - p = (__be64 *)(imdp + 1); - rem = pbllen; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); - rem -= sizeof *p; - if (++p == (__be64 *)&sq->queue[sq->size]) - p = (__be64 *)sq->queue; - } - BUG_ON(rem < 0); - while (rem) { - *p = 0; - rem -= sizeof *p; - if (++p == (__be64 *)&sq->queue[sq->size]) - p = (__be64 *)sq->queue; - } - *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16); - return 0; -} - static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { @@ -563,17 +564,45 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, return 0; } +static void free_qp_work(struct work_struct *work) +{ + struct c4iw_ucontext *ucontext; + struct c4iw_qp *qhp; + struct c4iw_dev *rhp; + + qhp = container_of(work, struct c4iw_qp, free_work); + ucontext = qhp->ucontext; + rhp = qhp->rhp; + + CTR3(KTR_IW_CXGBE, "%s qhp %p ucontext %p\n", __func__, + qhp, ucontext); + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + + if (ucontext) + c4iw_put_ucontext(ucontext); + kfree(qhp); +} + +static void queue_qp_free(struct kref *kref) +{ + struct c4iw_qp *qhp; + + qhp = container_of(kref, struct c4iw_qp, kref); + CTR2(KTR_IW_CXGBE, "%s qhp %p", __func__, qhp); + queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work); +} + void c4iw_qp_add_ref(struct ib_qp *qp) { CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, qp); - atomic_inc(&(to_c4iw_qp(qp)->refcnt)); + kref_get(&to_c4iw_qp(qp)->kref); } void c4iw_qp_rem_ref(struct ib_qp *qp) { CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, qp); - if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt))) - wake_up(&(to_c4iw_qp(qp)->wait)); + kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free); } static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) @@ -636,6 +665,104 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); } +static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, + struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16) +{ + __be64 *p = (__be64 *)fr->pbl; + + fr->r2 = cpu_to_be32(0); + fr->stag = cpu_to_be32(mhp->ibmr.rkey); + + fr->tpte.valid_to_pdid = cpu_to_be32(F_FW_RI_TPTE_VALID | + V_FW_RI_TPTE_STAGKEY((mhp->ibmr.rkey & M_FW_RI_TPTE_STAGKEY)) | + V_FW_RI_TPTE_STAGSTATE(1) | + V_FW_RI_TPTE_STAGTYPE(FW_RI_STAG_NSMR) | + V_FW_RI_TPTE_PDID(mhp->attr.pdid)); + fr->tpte.locread_to_qpid = cpu_to_be32( + V_FW_RI_TPTE_PERM(c4iw_ib_to_tpt_access(wr->access)) | + V_FW_RI_TPTE_ADDRTYPE(FW_RI_VA_BASED_TO) | + V_FW_RI_TPTE_PS(ilog2(wr->mr->page_size) - 12)); + fr->tpte.nosnoop_pbladdr = cpu_to_be32(V_FW_RI_TPTE_PBLADDR( + PBL_OFF(&mhp->rhp->rdev, mhp->attr.pbl_addr)>>3)); + fr->tpte.dca_mwbcnt_pstag = cpu_to_be32(0); + fr->tpte.len_hi = cpu_to_be32(0); + fr->tpte.len_lo = cpu_to_be32(mhp->ibmr.length); + fr->tpte.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + fr->tpte.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff); + + p[0] = cpu_to_be64((u64)mhp->mpl[0]); + p[1] = cpu_to_be64((u64)mhp->mpl[1]); + + *len16 = DIV_ROUND_UP(sizeof(*fr), 16); +} + +static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, + struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16, + bool dsgl_supported) +{ + struct fw_ri_immd *imdp; + __be64 *p; + int i; + int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32); + int rem; + + if (mhp->mpl_len > t4_max_fr_depth(use_dsgl && dsgl_supported)) + return -EINVAL; + + wqe->fr.qpbinde_to_dcacpu = 0; + wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12; + wqe->fr.addr_type = FW_RI_VA_BASED_TO; + wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access); + wqe->fr.len_hi = 0; + wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length); + wqe->fr.stag = cpu_to_be32(wr->key); + wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & + 0xffffffff); + + if (dsgl_supported && use_dsgl && (pbllen > max_fr_immd)) { + struct fw_ri_dsgl *sglp; + + for (i = 0; i < mhp->mpl_len; i++) + mhp->mpl[i] = + (__force u64)cpu_to_be64((u64)mhp->mpl[i]); + + sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); + sglp->op = FW_RI_DATA_DSGL; + sglp->r1 = 0; + sglp->nsge = cpu_to_be16(1); + sglp->addr0 = cpu_to_be64(mhp->mpl_addr); + sglp->len0 = cpu_to_be32(pbllen); + + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); + } else { + imdp = (struct fw_ri_immd *)(&wqe->fr + 1); + imdp->op = FW_RI_DATA_IMMD; + imdp->r1 = 0; + imdp->r2 = 0; + imdp->immdlen = cpu_to_be32(pbllen); + p = (__be64 *)(imdp + 1); + rem = pbllen; + for (i = 0; i < mhp->mpl_len; i++) { + *p = cpu_to_be64((u64)mhp->mpl[i]); + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + BUG_ON(rem < 0); + while (rem) { + *p = 0; + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp) + + pbllen, 16); + } + + return 0; +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -644,13 +771,15 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, enum fw_wr_opcodes fw_opcode = 0; enum fw_ri_wr_flags fw_flags; struct c4iw_qp *qhp; - union t4_wr *wqe; + union t4_wr *wqe = NULL; u32 num_wrs; struct t4_swsqe *swsqe; unsigned long flag; u16 idx = 0; + struct c4iw_rdev *rdev; qhp = to_c4iw_qp(ibqp); + rdev = &qhp->rhp->rdev; spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); @@ -660,6 +789,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_wrs = t4_sq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -ENOMEM; } while (wr) { @@ -698,10 +828,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ_WITH_INV: fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; - if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { + c4iw_invalidate_mr(qhp->rhp, + wr->sg_list[0].lkey); fw_flags = FW_RI_RDMA_READ_INVALIDATE; - else + } else { fw_flags = 0; + } err = build_rdma_read(wqe, wr, &len16); if (err) break; @@ -709,17 +842,33 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!qhp->wq.sq.oldest_read) qhp->wq.sq.oldest_read = swsqe; break; - case IB_WR_FAST_REG_MR: - fw_opcode = FW_RI_FR_NSMR_WR; + case IB_WR_REG_MR: { + struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); + swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16); + if (rdev->adap->params.fr_nsmr_tpte_wr_support && + !mhp->attr.state && mhp->mpl_len <= 2) { + fw_opcode = FW_RI_FR_NSMR_TPTE_WR; + build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), + mhp, &len16); + } else { + fw_opcode = FW_RI_FR_NSMR_WR; + err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), + mhp, &len16, + rdev->adap->params.ulptx_memwrite_dsgl); + if (err) + break; + } + mhp->attr.state = 1; break; + } case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) fw_flags |= FW_RI_LOCAL_FENCE_FLAG; fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; err = build_inv_stag(wqe, wr, &len16); + c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); break; default: CTR2(KTR_IW_CXGBE, "%s post of type =%d TBD!", __func__, @@ -734,6 +883,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, swsqe->complete = 0; swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || qhp->sq_sig_all; + swsqe->flushed = 0; swsqe->wr_id = wr->wr_id; init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); @@ -748,7 +898,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } - t4_ring_sq_db(&qhp->wq, idx); + t4_ring_sq_db(&qhp->wq, idx, wqe, rdev->adap->iwt.wc_en); spin_unlock_irqrestore(&qhp->lock, flag); return err; } @@ -758,7 +908,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, { int err = 0; struct c4iw_qp *qhp; - union t4_recv_wr *wqe; + union t4_recv_wr *wqe = NULL; u32 num_wrs; u8 len16 = 0; unsigned long flag; @@ -774,6 +924,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, num_wrs = t4_rq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -ENOMEM; } while (wr) { @@ -811,16 +962,11 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, num_wrs--; } - t4_ring_rq_db(&qhp->wq, idx); + t4_ring_rq_db(&qhp->wq, idx, wqe, qhp->rhp->rdev.adap->iwt.wc_en); spin_unlock_irqrestore(&qhp->lock, flag); return err; } -int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) -{ - return -ENOSYS; -} - static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, u8 *ecode) { @@ -1004,7 +1150,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; - int flushed; + int rq_flushed, sq_flushed; unsigned long flag; CTR4(KTR_IW_CXGBE, "%s qhp %p rchp %p schp %p", __func__, qhp, rchp, @@ -1013,29 +1159,50 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&rchp->lock, flag); spin_lock(&qhp->lock); - c4iw_flush_hw_cq(&rchp->cq); + + if (qhp->wq.flushed) { + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); + return; + } + qhp->wq.flushed = 1; + + c4iw_flush_hw_cq(rchp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); - if (flushed && rchp->ibcq.comp_handler) { - spin_lock_irqsave(&rchp->comp_handler_lock, flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); - } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); - c4iw_flush_hw_cq(&schp->cq); - c4iw_count_scqes(&schp->cq, &qhp->wq, &count); - flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); + if (schp != rchp) + c4iw_flush_hw_cq(schp); + sq_flushed = c4iw_flush_sq(qhp); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); - if (flushed && schp->ibcq.comp_handler) { - spin_lock_irqsave(&schp->comp_handler_lock, flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + + if (schp == rchp) { + if (t4_clear_cq_armed(&rchp->cq) && + (rq_flushed || sq_flushed)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + } else { + if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + if (t4_clear_cq_armed(&schp->cq) && sq_flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } } } @@ -1044,11 +1211,11 @@ static void flush_qp(struct c4iw_qp *qhp) struct c4iw_cq *rchp, *schp; unsigned long flag; - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); + rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + schp = to_c4iw_cq(qhp->ibqp.send_cq); + t4_set_wq_in_error(&qhp->wq); if (qhp->ibqp.uobject) { - t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); @@ -1080,8 +1247,8 @@ rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, struct c4iw_ep *ep) KASSERT(rhp == qhp->rhp && ep == qhp->ep, ("%s: EDOOFUS", __func__)); - CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp, - qhp->wq.sq.qid, ep->hwtid); + CTR5(KTR_IW_CXGBE, "%s qhp %p qid 0x%x ep %p tid %u", __func__, qhp, + qhp->wq.sq.qid, ep, ep->hwtid); wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq); if (wr == NULL) @@ -1106,7 +1273,7 @@ rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, struct c4iw_ep *ep) t4_wrq_tx(sc, wr); ret = c4iw_wait_for_reply(rdev, &ep->com.wr_wait, ep->hwtid, - qhp->wq.sq.qid, __func__); + qhp->wq.sq.qid, ep->com.so, __func__); return ret; } @@ -1174,13 +1341,19 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; - CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp, - qhp->wq.sq.qid, ep->hwtid); + CTR5(KTR_IW_CXGBE, "%s qhp %p qid 0x%x ep %p tid %u", __func__, qhp, + qhp->wq.sq.qid, ep, ep->hwtid); wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq); if (wr == NULL) return (0); wqe = wrtod(wr); + ret = alloc_ird(rhp, qhp->attr.max_ird); + if (ret) { + qhp->attr.max_ird = 0; + free_wrqe(wr); + return ret; + } memset(wqe, 0, sizeof *wqe); @@ -1232,14 +1405,16 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) ret = creds(toep, inp, sizeof(*wqe)); if (ret) { free_wrqe(wr); + free_ird(rhp, qhp->attr.max_ird); return ret; } t4_wrq_tx(sc, wr); ret = c4iw_wait_for_reply(rdev, &ep->com.wr_wait, ep->hwtid, - qhp->wq.sq.qid, __func__); + qhp->wq.sq.qid, ep->com.so, __func__); toep->ulp_mode = ULP_MODE_RDMA; + free_ird(rhp, qhp->attr.max_ird); return ret; } @@ -1284,7 +1459,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, newattr.max_ord = attrs->max_ord; } if (mask & C4IW_QP_ATTR_MAX_IRD) { - if (attrs->max_ird > c4iw_max_read_depth) { + if (attrs->max_ird > cur_max_read_depth(rhp)) { ret = -EINVAL; goto out; } @@ -1339,6 +1514,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { @@ -1346,28 +1522,30 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, disconnect = 1; c4iw_get_ep(&qhp->ep->com); } - if (qhp->ibqp.uobject) - t4_set_wq_in_error(&qhp->wq); ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; break; case C4IW_QP_STATE_TERMINATE: + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; - if (qhp->ibqp.uobject) - t4_set_wq_in_error(&qhp->wq); ep = qhp->ep; - if (!internal) + if (!internal) { + c4iw_get_ep(&qhp->ep->com); terminate = 1; - disconnect = 1; - c4iw_get_ep(&qhp->ep->com); + disconnect = 1; + } else { + terminate = qhp->attr.send_term; + ret = rdma_fini(rhp, qhp, ep); + if (ret) + goto err; + } break; case C4IW_QP_STATE_ERROR: + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_ERROR); - if (qhp->ibqp.uobject) - t4_set_wq_in_error(&qhp->wq); if (!internal) { abort = 1; disconnect = 1; @@ -1460,7 +1638,7 @@ out: * an abnormal close (RTS/CLOSING->ERROR). */ if (disconnect) { - c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC : + __c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC : GFP_KERNEL); c4iw_put_ep(&ep->com); } @@ -1480,7 +1658,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) struct c4iw_dev *rhp; struct c4iw_qp *qhp; struct c4iw_qp_attributes attrs; - struct c4iw_ucontext *ucontext; CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, ib_qp); qhp = to_c4iw_qp(ib_qp); @@ -1493,20 +1670,13 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - spin_lock_irq(&rhp->lock); - remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid); - spin_unlock_irq(&rhp->lock); - atomic_dec(&qhp->refcnt); - wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); + remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); - ucontext = ib_qp->uobject ? - to_c4iw_ucontext(ib_qp->uobject->context) : NULL; - destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + free_ird(rhp, qhp->attr.max_ird); + c4iw_qp_rem_ref(ib_qp); CTR3(KTR_IW_CXGBE, "%s ib_qp %p qpid 0x%0x", __func__, ib_qp, qhp->wq.sq.qid); - kfree(qhp); return 0; } @@ -1520,10 +1690,11 @@ c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - int sqsize, rqsize; + unsigned int sqsize, rqsize; struct c4iw_ucontext *ucontext; - int ret, spg_ndesc; - struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4; + int ret; + struct c4iw_mm_entry *sq_key_mm = NULL, *rq_key_mm = NULL; + struct c4iw_mm_entry *sq_db_key_mm = NULL, *rq_db_key_mm = NULL; CTR2(KTR_IW_CXGBE, "%s ib_pd %p", __func__, pd); @@ -1540,26 +1711,32 @@ c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) return ERR_PTR(-EINVAL); - spg_ndesc = rhp->rdev.adap->params.sge.spg_len / EQ_ESIZE; - rqsize = roundup(attrs->cap.max_recv_wr + 1, 16); - if (rqsize > T4_MAX_RQ_SIZE(spg_ndesc)) + if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) return ERR_PTR(-E2BIG); + rqsize = attrs->cap.max_recv_wr + 1; + if (rqsize < 8) + rqsize = 8; - sqsize = roundup(attrs->cap.max_send_wr + 1, 16); - if (sqsize > T4_MAX_SQ_SIZE(spg_ndesc)) + if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size) return ERR_PTR(-E2BIG); + sqsize = attrs->cap.max_send_wr + 1; + if (sqsize < 8) + sqsize = 8; ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; - qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) return ERR_PTR(-ENOMEM); qhp->wq.sq.size = sqsize; - qhp->wq.sq.memsize = (sqsize + spg_ndesc) * sizeof *qhp->wq.sq.queue + - 16 * sizeof(__be64); + qhp->wq.sq.memsize = + (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64); + qhp->wq.sq.flush_cidx = -1; qhp->wq.rq.size = rqsize; - qhp->wq.rq.memsize = (rqsize + spg_ndesc) * sizeof *qhp->wq.rq.queue; + qhp->wq.rq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.rq.queue); if (ucontext) { qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); @@ -1592,38 +1769,37 @@ c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.enable_rdma_read = 1; qhp->attr.enable_rdma_write = 1; qhp->attr.enable_bind = 1; - qhp->attr.max_ord = 1; - qhp->attr.max_ird = 1; + qhp->attr.max_ord = 0; + qhp->attr.max_ird = 0; qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); - atomic_set(&qhp->refcnt, 1); + kref_init(&qhp->kref); + INIT_WORK(&qhp->free_work, free_qp_work); - spin_lock_irq(&rhp->lock); - ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); - spin_unlock_irq(&rhp->lock); + ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) goto err2; if (udata) { - mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); - if (!mm1) { + sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL); + if (!sq_key_mm) { ret = -ENOMEM; goto err3; } - mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); - if (!mm2) { + rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); + if (!rq_key_mm) { ret = -ENOMEM; goto err4; } - mm3 = kmalloc(sizeof *mm3, GFP_KERNEL); - if (!mm3) { + sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); + if (!sq_db_key_mm) { ret = -ENOMEM; goto err5; } - mm4 = kmalloc(sizeof *mm4, GFP_KERNEL); - if (!mm4) { + rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); + if (!rq_db_key_mm) { ret = -ENOMEM; goto err6; } @@ -1636,6 +1812,7 @@ c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, uresp.rq_size = qhp->wq.rq.size; uresp.rq_memsize = qhp->wq.rq.memsize; spin_lock(&ucontext->mmap_lock); + uresp.ma_sync_key = 0; uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; uresp.rq_key = ucontext->key; @@ -1648,46 +1825,56 @@ c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) goto err7; - mm1->key = uresp.sq_key; - mm1->addr = qhp->wq.sq.phys_addr; - mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); - CTR4(KTR_IW_CXGBE, "%s mm1 %x, %x, %d", __func__, mm1->key, - mm1->addr, mm1->len); - insert_mmap(ucontext, mm1); - mm2->key = uresp.rq_key; - mm2->addr = vtophys(qhp->wq.rq.queue); - mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); - CTR4(KTR_IW_CXGBE, "%s mm2 %x, %x, %d", __func__, mm2->key, - mm2->addr, mm2->len); - insert_mmap(ucontext, mm2); - mm3->key = uresp.sq_db_gts_key; - mm3->addr = qhp->wq.sq.udb; - mm3->len = PAGE_SIZE; - CTR4(KTR_IW_CXGBE, "%s mm3 %x, %x, %d", __func__, mm3->key, - mm3->addr, mm3->len); - insert_mmap(ucontext, mm3); - mm4->key = uresp.rq_db_gts_key; - mm4->addr = qhp->wq.rq.udb; - mm4->len = PAGE_SIZE; - CTR4(KTR_IW_CXGBE, "%s mm4 %x, %x, %d", __func__, mm4->key, - mm4->addr, mm4->len); - insert_mmap(ucontext, mm4); + sq_key_mm->key = uresp.sq_key; + sq_key_mm->addr = qhp->wq.sq.phys_addr; + sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); + CTR4(KTR_IW_CXGBE, "%s sq_key_mm %x, %x, %d", __func__, + sq_key_mm->key, sq_key_mm->addr, + sq_key_mm->len); + insert_mmap(ucontext, sq_key_mm); + rq_key_mm->key = uresp.rq_key; + rq_key_mm->addr = qhp->wq.rq.phys_addr; + rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); + CTR4(KTR_IW_CXGBE, "%s rq_key_mm %x, %x, %d", __func__, + rq_key_mm->key, rq_key_mm->addr, + rq_key_mm->len); + insert_mmap(ucontext, rq_key_mm); + sq_db_key_mm->key = uresp.sq_db_gts_key; + sq_db_key_mm->addr = (u64)qhp->wq.sq.bar2_pa; + sq_db_key_mm->len = PAGE_SIZE; + CTR4(KTR_IW_CXGBE, "%s sq_db_key_mm %x, %x, %d", __func__, + sq_db_key_mm->key, sq_db_key_mm->addr, + sq_db_key_mm->len); + insert_mmap(ucontext, sq_db_key_mm); + rq_db_key_mm->key = uresp.rq_db_gts_key; + rq_db_key_mm->addr = (u64)qhp->wq.rq.bar2_pa; + rq_db_key_mm->len = PAGE_SIZE; + CTR4(KTR_IW_CXGBE, "%s rq_db_key_mm %x, %x, %d", __func__, + rq_db_key_mm->key, rq_db_key_mm->addr, + rq_db_key_mm->len); + insert_mmap(ucontext, rq_db_key_mm); + + c4iw_get_ucontext(ucontext); + qhp->ucontext = ucontext; } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); - CTR5(KTR_IW_CXGBE, - "%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x", - __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, - qhp->wq.sq.qid); + + CTR5(KTR_IW_CXGBE, "%s sq id %u size %u memsize %zu num_entries %u\n", + __func__, qhp->wq.sq.qid, + qhp->wq.sq.size, qhp->wq.sq.memsize, attrs->cap.max_send_wr); + CTR5(KTR_IW_CXGBE, "%s rq id %u size %u memsize %zu num_entries %u\n", + __func__, qhp->wq.rq.qid, + qhp->wq.rq.size, qhp->wq.rq.memsize, attrs->cap.max_recv_wr); return &qhp->ibqp; err7: - kfree(mm4); + kfree(rq_db_key_mm); err6: - kfree(mm3); + kfree(sq_db_key_mm); err5: - kfree(mm2); + kfree(rq_key_mm); err4: - kfree(mm1); + kfree(sq_key_mm); err3: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); err2: diff --git a/sys/dev/cxgbe/iw_cxgbe/t4.h b/sys/dev/cxgbe/iw_cxgbe/t4.h index 7cae2241d0ac..375688c67684 100644 --- a/sys/dev/cxgbe/iw_cxgbe/t4.h +++ b/sys/dev/cxgbe/iw_cxgbe/t4.h @@ -33,6 +33,8 @@ #ifndef __T4_H__ #define __T4_H__ +#include "common/t4_regs_values.h" +#include "common/t4_regs.h" /* * Fixme: Adding missing defines */ @@ -60,14 +62,8 @@ #define CIDXINC(x) ((x) << CIDXINC_SHIFT) #define T4_MAX_NUM_PD 65536 -#define T4_MAX_EQ_SIZE 65520 -#define T4_MAX_IQ_SIZE 65520 -#define T4_MAX_RQ_SIZE(n) (8192 - (n) - 1) -#define T4_MAX_SQ_SIZE(n) (T4_MAX_EQ_SIZE - (n) - 1) -#define T4_MAX_QP_DEPTH(n) (T4_MAX_RQ_SIZE(n)) -#define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 2) -#define T4_MAX_MR_SIZE (~0ULL - 1) -#define T4_PAGESIZE_MASK 0xffffffff000 /* 4KB-8TB */ +#define T4_MAX_MR_SIZE (~0ULL) +#define T4_PAGESIZE_MASK 0xffffffff000 /* 4KB-8TB */ #define T4_STAG_UNSET 0xffffffff #define T4_FW_MAJ 0 #define A_PCIE_MA_SYNC 0x30b4 @@ -102,7 +98,14 @@ struct t4_status_page { sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ sizeof(struct fw_ri_immd)) & ~31UL) -#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) +#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) +#define T4_MAX_FR_DSGL 1024 +#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64)) + +static inline int t4_max_fr_depth(int use_dsgl) +{ + return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH; +} #define T4_RQ_NUM_SLOTS 2 #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) @@ -116,6 +119,7 @@ union t4_wr { struct fw_ri_rdma_read_wr read; struct fw_ri_bind_mw_wr bind; struct fw_ri_fr_nsmr_wr fr; + struct fw_ri_fr_nsmr_tpte_wr fr_tpte; struct fw_ri_inv_lstag_wr inv; struct t4_status_page status; __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; @@ -191,7 +195,7 @@ struct t4_cqe { __be32 msn; } rcqe; struct { - u32 nada1; + u32 stag; u16 nada2; u16 cidx; } scqe; @@ -254,6 +258,7 @@ struct t4_cqe { /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) +#define CQE_WRID_FR_STAG(x) (be32_to_cpu((x)->u.scqe.stag)) /* generic accessor macros */ #define CQE_WRID_HI(x) ((x)->u.gen.wrid_hi) @@ -289,24 +294,44 @@ struct t4_swsqe { int complete; int signaled; u16 idx; + int flushed; + struct timespec host_ts; + u64 sge_ts; +}; + +static inline pgprot_t t4_pgprot_wc(pgprot_t prot) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return pgprot_writecombine(prot); +#else + return pgprot_noncached(prot); +#endif +} + +enum { + T4_SQ_ONCHIP = (1<<0), }; struct t4_sq { union t4_wr *queue; bus_addr_t dma_addr; - DECLARE_PCI_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_ADDR(mapping); unsigned long phys_addr; struct t4_swsqe *sw_sq; struct t4_swsqe *oldest_read; - u64 udb; + void __iomem *bar2_va; + u64 bar2_pa; size_t memsize; + u32 bar2_qid; u32 qid; u16 in_use; u16 size; u16 cidx; u16 pidx; u16 wq_pidx; + u16 wq_pidx_inc; u16 flags; + short flush_cidx; }; struct t4_swrqe { @@ -316,10 +341,13 @@ struct t4_swrqe { struct t4_rq { union t4_recv_wr *queue; bus_addr_t dma_addr; - DECLARE_PCI_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_ADDR(mapping); + unsigned long phys_addr; struct t4_swrqe *sw_rq; - u64 udb; + void __iomem *bar2_va; + u64 bar2_pa; size_t memsize; + u32 bar2_qid; u32 qid; u32 msn; u32 rqt_hwaddr; @@ -329,14 +357,14 @@ struct t4_rq { u16 cidx; u16 pidx; u16 wq_pidx; + u16 wq_pidx_inc; }; struct t4_wq { struct t4_sq sq; struct t4_rq rq; - void __iomem *db; - void __iomem *gts; struct c4iw_rdev *rdev; + int flushed; }; static inline int t4_rqes_posted(struct t4_wq *wq) @@ -384,7 +412,12 @@ static inline u16 t4_rq_host_wq_pidx(struct t4_wq *wq) static inline u16 t4_rq_wq_size(struct t4_wq *wq) { - return wq->rq.size * T4_RQ_NUM_SLOTS; + return wq->rq.size * T4_RQ_NUM_SLOTS; +} + +static inline int t4_sq_onchip(struct t4_sq *sq) +{ + return sq->flags & T4_SQ_ONCHIP; } static inline int t4_sq_empty(struct t4_wq *wq) @@ -414,6 +447,9 @@ static inline void t4_sq_produce(struct t4_wq *wq, u8 len16) static inline void t4_sq_consume(struct t4_wq *wq) { + BUG_ON(wq->sq.in_use < 1); + if (wq->sq.cidx == wq->sq.flush_cidx) + wq->sq.flush_cidx = -1; wq->sq.in_use--; if (++wq->sq.cidx == wq->sq.size) wq->sq.cidx = 0; @@ -429,16 +465,69 @@ static inline u16 t4_sq_wq_size(struct t4_wq *wq) return wq->sq.size * T4_SQ_NUM_SLOTS; } -static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) +/* This function copies 64 byte coalesced work request to memory + * mapped BAR2 space. For coalesced WRs, the SGE fetches data + * from the FIFO instead of from Host. + */ +static inline void pio_copy(u64 __iomem *dst, u64 *src) { - wmb(); - writel(QID(wq->sq.qid) | PIDX(inc), wq->db); + int count = 8; + + while (count) { + writeq(*src, dst); + src++; + dst++; + count--; + } } -static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) +static inline void +t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe, u8 wc) { + + /* Flush host queue memory writes. */ wmb(); - writel(QID(wq->rq.qid) | PIDX(inc), wq->db); + if (wc && inc == 1 && wq->sq.bar2_qid == 0 && wqe) { + CTR2(KTR_IW_CXGBE, "%s: WC wq->sq.pidx = %d\n", + __func__, wq->sq.pidx); + pio_copy((u64 __iomem *) + ((u64)wq->sq.bar2_va + SGE_UDB_WCDOORBELL), + (u64 *)wqe); + } else { + CTR2(KTR_IW_CXGBE, "%s: DB wq->sq.pidx = %d\n", + __func__, wq->sq.pidx); + writel(V_PIDX_T5(inc) | V_QID(wq->sq.bar2_qid), + (void __iomem *)((u64)wq->sq.bar2_va + + SGE_UDB_KDOORBELL)); + } + + /* Flush user doorbell area writes. */ + wmb(); + return; +} + +static inline void +t4_ring_rq_db(struct t4_wq *wq, u16 inc, union t4_recv_wr *wqe, u8 wc) +{ + + /* Flush host queue memory writes. */ + wmb(); + if (wc && inc == 1 && wq->rq.bar2_qid == 0 && wqe) { + CTR2(KTR_IW_CXGBE, "%s: WC wq->rq.pidx = %d\n", + __func__, wq->rq.pidx); + pio_copy((u64 __iomem *)((u64)wq->rq.bar2_va + + SGE_UDB_WCDOORBELL), (u64 *)wqe); + } else { + CTR2(KTR_IW_CXGBE, "%s: DB wq->rq.pidx = %d\n", + __func__, wq->rq.pidx); + writel(V_PIDX_T5(inc) | V_QID(wq->rq.bar2_qid), + (void __iomem *)((u64)wq->rq.bar2_va + + SGE_UDB_KDOORBELL)); + } + + /* Flush user doorbell area writes. */ + wmb(); + return; } static inline int t4_wq_in_error(struct t4_wq *wq) @@ -451,17 +540,24 @@ static inline void t4_set_wq_in_error(struct t4_wq *wq) wq->rq.queue[wq->rq.size].status.qp_err = 1; } +enum t4_cq_flags { + CQ_ARMED = 1, +}; + struct t4_cq { struct t4_cqe *queue; bus_addr_t dma_addr; - DECLARE_PCI_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_cqe *sw_queue; - void __iomem *gts; + void __iomem *bar2_va; + u64 bar2_pa; + u32 bar2_qid; struct c4iw_rdev *rdev; - u64 ugts; size_t memsize; __be64 bits_type_ts; u32 cqid; + u32 qid_mask; + int vector; u16 size; /* including status page */ u16 cidx; u16 sw_pidx; @@ -470,21 +566,34 @@ struct t4_cq { u16 cidx_inc; u8 gen; u8 error; + unsigned long flags; }; +static inline void write_gts(struct t4_cq *cq, u32 val) +{ + writel(val | V_INGRESSQID(cq->bar2_qid), + (void __iomem *)((u64)cq->bar2_va + SGE_UDB_GTS)); +} + +static inline int t4_clear_cq_armed(struct t4_cq *cq) +{ + return test_and_clear_bit(CQ_ARMED, &cq->flags); +} + static inline int t4_arm_cq(struct t4_cq *cq, int se) { u32 val; + set_bit(CQ_ARMED, &cq->flags); while (cq->cidx_inc > CIDXINC_MASK) { - val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) | - INGRESSQID(cq->cqid); - writel(val, cq->gts); + val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7); + writel(val | V_INGRESSQID(cq->bar2_qid), + (void __iomem *)((u64)cq->bar2_va + SGE_UDB_GTS)); cq->cidx_inc -= CIDXINC_MASK; } - val = SEINTARM(se) | CIDXINC(cq->cidx_inc) | TIMERREG(6) | - INGRESSQID(cq->cqid); - writel(val, cq->gts); + val = SEINTARM(se) | CIDXINC(cq->cidx_inc) | TIMERREG(6); + writel(val | V_INGRESSQID(cq->bar2_qid), + (void __iomem *)((u64)cq->bar2_va + SGE_UDB_GTS)); cq->cidx_inc = 0; return 0; } @@ -492,12 +601,19 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se) static inline void t4_swcq_produce(struct t4_cq *cq) { cq->sw_in_use++; + if (cq->sw_in_use == cq->size) { + CTR2(KTR_IW_CXGBE, "%s cxgb4 sw cq overflow cqid %u\n", + __func__, cq->cqid); + cq->error = 1; + BUG_ON(1); + } if (++cq->sw_pidx == cq->size) cq->sw_pidx = 0; } static inline void t4_swcq_consume(struct t4_cq *cq) { + BUG_ON(cq->sw_in_use < 1); cq->sw_in_use--; if (++cq->sw_cidx == cq->size) cq->sw_cidx = 0; @@ -509,9 +625,8 @@ static inline void t4_hwcq_consume(struct t4_cq *cq) if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == M_CIDXINC) { u32 val; - val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) | - INGRESSQID(cq->cqid); - writel(val, cq->gts); + val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7); + write_gts(cq, val); cq->cidx_inc = 0; } if (++cq->cidx == cq->size) { @@ -525,6 +640,11 @@ static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe) return (CQE_GENBIT(cqe) == cq->gen); } +static inline int t4_cq_notempty(struct t4_cq *cq) +{ + return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]); +} + static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) { int ret; @@ -539,7 +659,11 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) ret = -EOVERFLOW; cq->error = 1; printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid); + BUG_ON(1); } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { + + /* Ensure CQE is flushed to memory */ + rmb(); *cqe = &cq->queue[cq->cidx]; ret = 0; } else @@ -549,6 +673,13 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq) { + if (cq->sw_in_use == cq->size) { + CTR2(KTR_IW_CXGBE, "%s cxgb4 sw cq overflow cqid %u\n", + __func__, cq->cqid); + cq->error = 1; + BUG_ON(1); + return NULL; + } if (cq->sw_in_use) return &cq->sw_queue[cq->sw_cidx]; return NULL; @@ -576,4 +707,14 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq) { ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; } +struct t4_dev_status_page { + u8 db_off; + u8 wc_supported; + u16 pad2; + u32 pad3; + u64 qp_start; + u64 qp_size; + u64 cq_start; + u64 cq_size; +}; #endif diff --git a/sys/dev/cxgbe/iw_cxgbe/user.h b/sys/dev/cxgbe/iw_cxgbe/user.h index d42f659771ae..a98a30056191 100644 --- a/sys/dev/cxgbe/iw_cxgbe/user.h +++ b/sys/dev/cxgbe/iw_cxgbe/user.h @@ -34,7 +34,7 @@ #ifndef __C4IW_USER_H__ #define __C4IW_USER_H__ -#define C4IW_UVERBS_ABI_VERSION 2 +#define C4IW_UVERBS_ABI_VERSION 3 /* * Make sure that all structs defined in this file remain laid out so @@ -68,4 +68,10 @@ struct c4iw_create_qp_resp { __u32 qid_mask; __u32 flags; }; + +struct c4iw_alloc_ucontext_resp { + __u64 status_page_key; + __u32 status_page_size; + __u32 reserved; /* explicit padding (optional for i386) */ +}; #endif diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h index bef5f388447e..58b35e2c3349 100644 --- a/sys/dev/cxgbe/offload.h +++ b/sys/dev/cxgbe/offload.h @@ -151,7 +151,10 @@ struct tom_tunables { int tx_align; int tx_zcopy; }; - +/* iWARP driver tunables */ +struct iw_tunables { + int wc_en; +}; #ifdef TCP_OFFLOAD int t4_register_uld(struct uld_info *); int t4_unregister_uld(struct uld_info *); diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 9471be400348..600d4fdf2ced 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -469,7 +469,7 @@ TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); static int t4_fcoecaps_allowed = 0; TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); -static int t5_write_combine = 0; +static int t5_write_combine = 1; TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine); static int t4_num_vis = 1; @@ -2331,6 +2331,7 @@ t4_map_bar_2(struct adapter *sc) setbit(&sc->doorbells, DOORBELL_WCWR); setbit(&sc->doorbells, DOORBELL_UDBWC); } else { + t5_write_combine = 0; device_printf(sc->dev, "couldn't enable write combining: %d\n", rc); @@ -2340,7 +2341,10 @@ t4_map_bar_2(struct adapter *sc) t4_write_reg(sc, A_SGE_STAT_CFG, V_STATSOURCE_T5(7) | mode); } +#else + t5_write_combine = 0; #endif + sc->iwt.wc_en = t5_write_combine; } return (0); diff --git a/sys/modules/cxgbe/iw_cxgbe/Makefile b/sys/modules/cxgbe/iw_cxgbe/Makefile index b07fe1b7027b..e21281e478fc 100644 --- a/sys/modules/cxgbe/iw_cxgbe/Makefile +++ b/sys/modules/cxgbe/iw_cxgbe/Makefile @@ -24,6 +24,9 @@ SRCS+= resource.c SRCS+= vnode_if.h CFLAGS+= -I${CXGBE} -I${SRCTOP}/sys/ofed/include -DLINUX_TYPES_DEFINED +CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include +CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM +CFLAGS+= -DINET6 -DINET .include