Fix for iWARP servers that listen on INADDR_ANY.

The iWARP Connection Manager (CM) on FreeBSD creates a TCP socket to
represent an iWARP endpoint when the connection is over TCP. For
servers the current approach is to invoke create_listen callback for
each iWARP RNIC registered with the CM. This doesn't work too well for
INADDR_ANY because a listen on any TCP socket already notifies all
hardware TOEs/RNICs of the new listener. This patch fixes the server
side of things for FreeBSD. We've tried to keep all these modifications
in the iWARP/TCP specific parts of the OFED infrastructure as much as
possible.

Submitted by:	Krishnamraju Eraparaju @ Chelsio (with design inputs from Steve Wise)
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D4801
This commit is contained in:
Navdeep Parhar 2016-01-22 23:33:34 +00:00
parent 5abb4cd79f
commit 097f289f25
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=294610
11 changed files with 446 additions and 145 deletions

View File

@ -174,4 +174,5 @@ static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
}
void iwch_ev_dispatch(struct iwch_dev *, struct mbuf *);
void process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so);
#endif

View File

@ -260,7 +260,6 @@ alloc_ep(int size, int flags)
void __free_ep(struct iwch_ep_common *epc)
{
CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]);
KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so));
KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc));
free(epc, M_DEVBUF);
}
@ -1361,7 +1360,7 @@ iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
int
iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
{
int err = 0;
struct iwch_listen_ep *ep;
@ -1381,35 +1380,22 @@ iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
state_set(&ep->com, LISTEN);
ep->com.so = cm_id->so;
err = init_sock(&ep->com);
if (err)
goto fail;
err = solisten(ep->com.so, ep->backlog, ep->com.thread);
if (!err) {
cm_id->provider_data = ep;
goto out;
}
close_socket(&ep->com, 0);
fail:
cm_id->rem_ref(cm_id);
put_ep(&ep->com);
cm_id->provider_data = ep;
out:
return err;
}
int
iwch_destroy_listen(struct iw_cm_id *cm_id)
void
iwch_destroy_listen_ep(struct iw_cm_id *cm_id)
{
struct iwch_listen_ep *ep = to_listen_ep(cm_id);
CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
state_set(&ep->com, DEAD);
close_socket(&ep->com, 0);
cm_id->rem_ref(cm_id);
put_ep(&ep->com);
return 0;
return;
}
int
@ -1526,54 +1512,32 @@ process_connected(struct iwch_ep *ep)
}
}
static struct socket *
dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep)
void
process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
{
struct socket *so;
ACCEPT_LOCK();
so = TAILQ_FIRST(&head->so_comp);
if (!so) {
ACCEPT_UNLOCK();
return NULL;
}
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
SOCK_LOCK(so);
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
soref(so);
soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep);
so->so_state |= SS_NBIO;
PANIC_IF(!(so->so_state & SS_ISCONNECTED));
PANIC_IF(so->so_error);
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
soaccept(so, (struct sockaddr **)remote);
return so;
}
static void
process_newconn(struct iwch_ep *parent_ep)
{
struct socket *child_so;
struct iwch_ep *child_ep;
struct sockaddr_in *local;
struct sockaddr_in *remote;
struct iwch_ep *parent_ep = parent_cm_id->provider_data;
CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so);
if (!child_so) {
log(LOG_ERR, "%s - invalid child socket!\n", __func__);
return;
}
child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
if (!child_ep) {
log(LOG_ERR, "%s - failed to allocate ep entry!\n",
__FUNCTION__);
return;
}
child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
if (!child_so) {
log(LOG_ERR, "%s - failed to dequeue child socket!\n",
__FUNCTION__);
__free_ep(&child_ep->com);
return;
}
SOCKBUF_LOCK(&child_so->so_rcv);
soupcall_set(child_so, SO_RCV, iwch_so_upcall, child_ep);
SOCKBUF_UNLOCK(&child_so->so_rcv);
in_getsockaddr(child_so, (struct sockaddr **)&local);
in_getpeeraddr(child_so, (struct sockaddr **)&remote);
CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__,
inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
child_ep->com.tdev = parent_ep->com.tdev;
@ -1590,9 +1554,9 @@ process_newconn(struct iwch_ep *parent_ep)
child_ep->com.thread = parent_ep->com.thread;
child_ep->parent_ep = parent_ep;
free(local, M_SONAME);
free(remote, M_SONAME);
get_ep(&parent_ep->com);
child_ep->parent_ep = parent_ep;
callout_init(&child_ep->timer, 1);
state_set(&child_ep->com, MPA_REQ_WAIT);
start_ep_timer(child_ep);
@ -1630,7 +1594,10 @@ process_socket_event(struct iwch_ep *ep)
}
if (state == LISTEN) {
process_newconn(ep);
/* socket listening events are handled at IWCM */
CTR3(KTR_IW_CXGB, "%s Invalid ep state:%u, ep:%p", __func__,
ep->com.state, ep);
BUG();
return;
}

View File

@ -231,8 +231,8 @@ iwch_wakeup(struct cv *cv, struct mtx *lock, int *rpl_done)
/* CM prototypes */
int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
int iwch_destroy_listen(struct iw_cm_id *cm_id);
int iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog);
void iwch_destroy_listen_ep(struct iw_cm_id *cm_id);
int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags);

View File

@ -1140,8 +1140,9 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.iwcm->connect = iwch_connect;
dev->ibdev.iwcm->accept = iwch_accept_cr;
dev->ibdev.iwcm->reject = iwch_reject_cr;
dev->ibdev.iwcm->create_listen = iwch_create_listen;
dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen;
dev->ibdev.iwcm->create_listen_ep = iwch_create_listen_ep;
dev->ibdev.iwcm->destroy_listen_ep = iwch_destroy_listen_ep;
dev->ibdev.iwcm->newconn = process_newconn;
dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
dev->ibdev.iwcm->get_qp = iwch_get_qp;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
* Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -111,8 +111,6 @@ static void ep_timeout(unsigned long arg);
static void init_sock(struct c4iw_ep_common *epc);
static void process_data(struct c4iw_ep *ep);
static void process_connected(struct c4iw_ep *ep);
static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep);
static void process_newconn(struct c4iw_ep *parent_ep);
static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
static void process_socket_event(struct c4iw_ep *ep);
static void release_ep_resources(struct c4iw_ep *ep);
@ -623,40 +621,21 @@ process_connected(struct c4iw_ep *ep)
}
}
static struct socket *
dequeue_socket(struct socket *head, struct sockaddr_in **remote,
struct c4iw_ep *child_ep)
void
process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
{
struct socket *so;
ACCEPT_LOCK();
so = TAILQ_FIRST(&head->so_comp);
if (!so) {
ACCEPT_UNLOCK();
return (NULL);
}
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
SOCK_LOCK(so);
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
soref(so);
soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep);
so->so_state |= SS_NBIO;
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
soaccept(so, (struct sockaddr **)remote);
return (so);
}
static void
process_newconn(struct c4iw_ep *parent_ep)
{
struct socket *child_so;
struct c4iw_ep *child_ep;
struct sockaddr_in *local;
struct sockaddr_in *remote;
struct c4iw_ep *parent_ep = parent_cm_id->provider_data;
if (!child_so) {
CTR4(KTR_IW_CXGBE,
"%s: parent so %p, parent ep %p, child so %p, invalid so",
__func__, parent_ep->com.so, parent_ep, child_so);
log(LOG_ERR, "%s: invalid child socket\n", __func__);
return;
}
child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
if (!child_ep) {
CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM",
@ -664,23 +643,18 @@ process_newconn(struct c4iw_ep *parent_ep)
log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__);
return;
}
child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
if (!child_so) {
CTR4(KTR_IW_CXGBE,
"%s: parent so %p, parent ep %p, child ep %p, dequeue err",
__func__, parent_ep->com.so, parent_ep, child_ep);
log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__);
__free_ep(&child_ep->com);
return;
}
SOCKBUF_LOCK(&child_so->so_rcv);
soupcall_set(child_so, SO_RCV, c4iw_so_upcall, child_ep);
SOCKBUF_UNLOCK(&child_so->so_rcv);
CTR5(KTR_IW_CXGBE,
"%s: parent so %p, parent ep %p, child so %p, child ep %p",
__func__, parent_ep->com.so, parent_ep, child_so, child_ep);
child_ep->com.local_addr = parent_ep->com.local_addr;
in_getsockaddr(child_so, (struct sockaddr **)&local);
in_getpeeraddr(child_so, (struct sockaddr **)&remote);
child_ep->com.local_addr = *local;
child_ep->com.remote_addr = *remote;
child_ep->com.dev = parent_ep->com.dev;
child_ep->com.so = child_so;
@ -688,15 +662,17 @@ process_newconn(struct c4iw_ep *parent_ep)
child_ep->com.thread = parent_ep->com.thread;
child_ep->parent_ep = parent_ep;
free(local, M_SONAME);
free(remote, M_SONAME);
c4iw_get_ep(&parent_ep->com);
child_ep->parent_ep = parent_ep;
init_timer(&child_ep->timer);
state_set(&child_ep->com, MPA_REQ_WAIT);
START_EP_TIMER(child_ep);
/* maybe the request has already been queued up on the socket... */
process_mpa_request(child_ep);
return;
}
static int
@ -738,7 +714,10 @@ process_socket_event(struct c4iw_ep *ep)
}
if (state == LISTEN) {
process_newconn(ep);
/* socket listening events are handled at IWCM */
CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__,
ep->com.state, ep);
BUG();
return;
}
@ -919,7 +898,6 @@ void _c4iw_free_ep(struct kref *kref)
ep = container_of(kref, struct c4iw_ep, com.kref);
epc = &ep->com;
KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so));
KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
__func__, epc));
kfree(ep);
@ -2126,10 +2104,10 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
/*
* iwcm->create_listen. Returns -errno on failure.
* iwcm->create_listen_ep. Returns -errno on failure.
*/
int
c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
{
int rc;
struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
@ -2154,17 +2132,6 @@ c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
ep->com.thread = curthread;
state_set(&ep->com, LISTEN);
ep->com.so = so;
init_sock(&ep->com);
rc = solisten(so, ep->backlog, ep->com.thread);
if (rc != 0) {
log(LOG_ERR, "%s: failed to start listener: %d\n", __func__,
rc);
close_socket(&ep->com, 0);
cm_id->rem_ref(cm_id);
c4iw_put_ep(&ep->com);
goto failed;
}
cm_id->provider_data = ep;
return (0);
@ -2174,21 +2141,19 @@ c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
return (-rc);
}
int
c4iw_destroy_listen(struct iw_cm_id *cm_id)
void
c4iw_destroy_listen_ep(struct iw_cm_id *cm_id)
{
int rc;
struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, inp %p", __func__, cm_id,
cm_id->so, cm_id->so->so_pcb);
CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, state %s", __func__, cm_id,
cm_id->so, states[ep->com.state]);
state_set(&ep->com, DEAD);
rc = close_socket(&ep->com, 0);
cm_id->rem_ref(cm_id);
c4iw_put_ep(&ep->com);
return (rc);
return;
}
int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
* Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -850,8 +850,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
int c4iw_destroy_listen(struct iw_cm_id *cm_id);
int c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog);
void c4iw_destroy_listen_ep(struct iw_cm_id *cm_id);
int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
void c4iw_qp_add_ref(struct ib_qp *qp);
@ -914,6 +914,8 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
struct c4iw_dev_ucontext *uctx);
void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe);
void process_newconn(struct iw_cm_id *parent_cm_id,
struct socket *child_so);
extern struct cxgb4_client t4c_client;
extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
* Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -474,8 +474,9 @@ c4iw_register_device(struct c4iw_dev *dev)
iwcm->connect = c4iw_connect;
iwcm->accept = c4iw_accept_cr;
iwcm->reject = c4iw_reject_cr;
iwcm->create_listen = c4iw_create_listen;
iwcm->destroy_listen = c4iw_destroy_listen;
iwcm->create_listen_ep = c4iw_create_listen_ep;
iwcm->destroy_listen_ep = c4iw_destroy_listen_ep;
iwcm->newconn = process_newconn;
iwcm->add_ref = c4iw_qp_add_ref;
iwcm->rem_ref = c4iw_qp_rem_ref;
iwcm->get_qp = c4iw_get_qp;

View File

@ -3,6 +3,7 @@
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
* Copyright (c) 2016 Chelsio Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -407,6 +408,75 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu
return -EAGAIN;
}
int
rdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type,
void **cm_id)
{
int ret;
u8 port;
int found_dev = 0, found_cmid = 0;
struct rdma_id_private *id_priv;
struct rdma_id_private *dev_id_priv;
struct cma_device *cma_dev;
struct rdma_dev_addr dev_addr;
union ib_gid gid;
enum rdma_link_layer dev_ll = dev_type == ARPHRD_INFINIBAND ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
memset(&dev_addr, 0, sizeof(dev_addr));
ret = rdma_translate_ip((struct sockaddr *)local_addr,
&dev_addr, NULL);
if (ret)
goto err;
/* find rdma device based on MAC address/gid */
mutex_lock(&lock);
memcpy(&gid, dev_addr.src_dev_addr +
rdma_addr_gid_offset(&dev_addr), sizeof(gid));
list_for_each_entry(cma_dev, &dev_list, list)
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port)
if ((rdma_port_get_link_layer(cma_dev->device, port) ==
dev_ll) &&
(rdma_node_get_transport(cma_dev->device->node_type) ==
RDMA_TRANSPORT_IWARP)) {
ret = find_gid_port(cma_dev->device,
&gid, port);
if (!ret) {
found_dev = 1;
goto out;
} else if (ret == 1) {
mutex_unlock(&lock);
goto err;
}
}
out:
mutex_unlock(&lock);
if (!found_dev)
goto err;
/* Traverse through the list of listening cm_id's to find the
* desired cm_id based on rdma device & port number.
*/
list_for_each_entry(id_priv, &listen_any_list, list)
list_for_each_entry(dev_id_priv, &id_priv->listen_list,
listen_list)
if (dev_id_priv->cma_dev == cma_dev)
if (dev_id_priv->cm_id.iw->local_addr.sin_port
== local_addr->sin_port) {
*cm_id = (void *)dev_id_priv->cm_id.iw;
found_cmid = 1;
}
return found_cmid ? 0 : -ENODEV;
err:
return -ENODEV;
}
EXPORT_SYMBOL(rdma_find_cmid_laddr);
static int cma_acquire_dev(struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
@ -780,6 +850,12 @@ static inline int cma_any_addr(struct sockaddr *addr)
{
return cma_zero_addr(addr) || cma_loopback_addr(addr);
}
int
rdma_cma_any_addr(struct sockaddr *addr)
{
return cma_any_addr(addr);
}
EXPORT_SYMBOL(rdma_cma_any_addr);
static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
{
@ -1707,6 +1783,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
dev_id_priv = container_of(id, struct rdma_id_private, id);
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
dev_id_priv->sock = id_priv->sock;
memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));

View File

@ -5,6 +5,7 @@
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
* Copyright (c) 2016 Chelsio Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -35,6 +36,8 @@
* SOFTWARE.
*
*/
#include "opt_inet.h"
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/idr.h>
@ -47,7 +50,10 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/string.h>
#include <netinet/tcp.h>
#include <sys/mutex.h>
#include <rdma/rdma_cm.h>
#include <rdma/iw_cm.h>
#include <rdma/ib_addr.h>
@ -65,6 +71,85 @@ struct iwcm_work {
struct iw_cm_event event;
struct list_head free_list;
};
struct iwcm_listen_work {
struct work_struct work;
struct iw_cm_id *cm_id;
};
static LIST_HEAD(listen_port_list);
static DEFINE_MUTEX(listen_port_mutex);
static DEFINE_MUTEX(dequeue_mutex);
struct listen_port_info {
struct list_head list;
uint16_t port_num;
uint32_t refcnt;
};
static int32_t
add_port_to_listenlist(uint16_t port)
{
struct listen_port_info *port_info;
int err = 0;
mutex_lock(&listen_port_mutex);
list_for_each_entry(port_info, &listen_port_list, list)
if (port_info->port_num == port)
goto found_port;
port_info = kmalloc(sizeof(*port_info), GFP_KERNEL);
if (!port_info) {
err = -ENOMEM;
mutex_unlock(&listen_port_mutex);
goto out;
}
port_info->port_num = port;
port_info->refcnt = 0;
list_add(&port_info->list, &listen_port_list);
found_port:
++(port_info->refcnt);
mutex_unlock(&listen_port_mutex);
return port_info->refcnt;
out:
return err;
}
static int32_t
rem_port_from_listenlist(uint16_t port)
{
struct listen_port_info *port_info;
int ret, found_port = 0;
mutex_lock(&listen_port_mutex);
list_for_each_entry(port_info, &listen_port_list, list)
if (port_info->port_num == port) {
found_port = 1;
break;
}
if (found_port) {
--(port_info->refcnt);
ret = port_info->refcnt;
if (port_info->refcnt == 0) {
/* Remove this entry from the list as there are no
* more listeners for this port_num.
*/
list_del(&port_info->list);
kfree(port_info);
}
} else {
ret = -EINVAL;
}
mutex_unlock(&listen_port_mutex);
return ret;
}
/*
* The following services provide a mechanism for pre-allocating iwcm_work
@ -320,6 +405,167 @@ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
}
EXPORT_SYMBOL(iw_cm_disconnect);
static struct socket *
dequeue_socket(struct socket *head)
{
struct socket *so;
struct sockaddr_in *remote;
ACCEPT_LOCK();
so = TAILQ_FIRST(&head->so_comp);
if (!so) {
ACCEPT_UNLOCK();
return NULL;
}
SOCK_LOCK(so);
/*
* Before changing the flags on the socket, we have to bump the
* reference count. Otherwise, if the protocol calls sofree(),
* the socket will be released due to a zero refcount.
*/
soref(so);
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
so->so_state |= SS_NBIO;
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
soaccept(so, (struct sockaddr **)&remote);
free(remote, M_SONAME);
return so;
}
static void
iw_so_event_handler(struct work_struct *_work)
{
#ifdef INET
struct iwcm_listen_work *work = container_of(_work,
struct iwcm_listen_work, work);
struct iw_cm_id *listen_cm_id = work->cm_id;
struct iwcm_id_private *cm_id_priv;
struct iw_cm_id *real_cm_id;
struct sockaddr_in *local;
struct socket *so;
cm_id_priv = container_of(listen_cm_id, struct iwcm_id_private, id);
if (cm_id_priv->state != IW_CM_STATE_LISTEN) {
kfree(work);
return;
}
mutex_lock(&dequeue_mutex);
/* Dequeue & process all new 'so' connection requests for this cmid */
while ((so = dequeue_socket(work->cm_id->so)) != NULL) {
if (rdma_cma_any_addr((struct sockaddr *)
&listen_cm_id->local_addr)) {
in_getsockaddr(so, (struct sockaddr **)&local);
if (rdma_find_cmid_laddr(local, ARPHRD_ETHER,
(void **) &real_cm_id)) {
free(local, M_SONAME);
goto err;
}
free(local, M_SONAME);
real_cm_id->device->iwcm->newconn(real_cm_id, so);
} else {
listen_cm_id->device->iwcm->newconn(listen_cm_id, so);
}
}
err:
mutex_unlock(&dequeue_mutex);
kfree(work);
#endif
return;
}
static int
iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
{
struct iwcm_listen_work *work;
struct socket *so;
struct iw_cm_id *cm_id = arg;
mutex_lock(&dequeue_mutex);
/* check whether iw_so_event_handler() already dequeued this 'so' */
so = TAILQ_FIRST(&parent_so->so_comp);
if (!so)
return SU_OK;
work = kzalloc(sizeof(*work), M_NOWAIT);
if (!work)
return -ENOMEM;
work->cm_id = cm_id;
INIT_WORK(&work->work, iw_so_event_handler);
queue_work(iwcm_wq, &work->work);
mutex_unlock(&dequeue_mutex);
return SU_OK;
}
static void
iw_init_sock(struct iw_cm_id *cm_id)
{
struct sockopt sopt;
struct socket *so = cm_id->so;
int on = 1;
SOCK_LOCK(so);
soupcall_set(so, SO_RCV, iw_so_upcall, cm_id);
so->so_state |= SS_NBIO;
SOCK_UNLOCK(so);
sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_NODELAY;
sopt.sopt_val = (caddr_t)&on;
sopt.sopt_valsize = sizeof(on);
sopt.sopt_td = NULL;
sosetopt(so, &sopt);
}
static int
iw_close_socket(struct iw_cm_id *cm_id, int close)
{
struct socket *so = cm_id->so;
int rc;
SOCK_LOCK(so);
soupcall_clear(so, SO_RCV);
SOCK_UNLOCK(so);
if (close)
rc = soclose(so);
else
rc = soshutdown(so, SHUT_WR | SHUT_RD);
cm_id->so = NULL;
return rc;
}
static int
iw_create_listen(struct iw_cm_id *cm_id, int backlog)
{
int rc;
iw_init_sock(cm_id);
rc = solisten(cm_id->so, backlog, curthread);
if (rc != 0)
iw_close_socket(cm_id, 0);
return rc;
}
static int
iw_destroy_listen(struct iw_cm_id *cm_id)
{
int rc;
rc = iw_close_socket(cm_id, 0);
return rc;
}
/*
* CM_ID <-- DESTROYING
*
@ -330,7 +576,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
unsigned long flags;
int ret;
int ret = 0, refcnt;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
/*
@ -345,8 +591,18 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
case IW_CM_STATE_LISTEN:
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
/* destroy the listening endpoint */
ret = cm_id->device->iwcm->destroy_listen(cm_id);
if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) {
refcnt =
rem_port_from_listenlist(cm_id->local_addr.sin_port);
if (refcnt == 0)
ret = iw_destroy_listen(cm_id);
cm_id->device->iwcm->destroy_listen_ep(cm_id);
} else {
ret = iw_destroy_listen(cm_id);
cm_id->device->iwcm->destroy_listen_ep(cm_id);
}
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_ESTABLISHED:
@ -418,7 +674,7 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
{
struct iwcm_id_private *cm_id_priv;
unsigned long flags;
int ret;
int ret, refcnt;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@ -431,9 +687,33 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
case IW_CM_STATE_IDLE:
cm_id_priv->state = IW_CM_STATE_LISTEN;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
if (ret)
if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) {
refcnt =
add_port_to_listenlist(cm_id->local_addr.sin_port);
if (refcnt == 1) {
ret = iw_create_listen(cm_id, backlog);
} else if (refcnt <= 0) {
ret = -EINVAL;
} else {
/* if refcnt > 1, a socket listener created
* already. And we need not create socket
* listener on other rdma devices/listen cm_id's
* due to TOE. That is when a socket listener is
* created with INADDR_ANY all registered TOE
* devices will get a call to start
* hardware listeners.
*/
}
} else {
ret = iw_create_listen(cm_id, backlog);
}
if (!ret)
cm_id->device->iwcm->create_listen_ep(cm_id, backlog);
else
cm_id_priv->state = IW_CM_STATE_IDLE;
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
default:

View File

@ -1,6 +1,7 @@
/*
* Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2016 Chelsio Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -120,10 +121,13 @@ struct iw_cm_verbs {
int (*reject)(struct iw_cm_id *cm_id,
const void *pdata, u8 pdata_len);
int (*create_listen)(struct iw_cm_id *cm_id,
int (*create_listen_ep)(struct iw_cm_id *cm_id,
int backlog);
int (*destroy_listen)(struct iw_cm_id *cm_id);
void (*destroy_listen_ep)(struct iw_cm_id *cm_id);
void (*newconn)(struct iw_cm_id *parent_cm_id,
struct socket *so);
};
/**

View File

@ -1,6 +1,7 @@
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2016 Chelsio Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -400,5 +401,7 @@ int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
* @timeout: QP timeout
*/
void rdma_set_timeout(struct rdma_cm_id *id, int timeout);
int rdma_cma_any_addr(struct sockaddr *addr);
int rdma_find_cmid_laddr(struct sockaddr_in *local_addr,
unsigned short dev_type, void **cm_id);
#endif /* RDMA_CM_H */